# Code to read orbits from IAU circular PDF with uncertainties and convert to ORB6 format
#
# Formatted lines => cir###.txt
# Read orbital elements from IAU Cicular PDF (with uncertainties)
# Print lines in ORB6 format to cir###.txt
# Lines unable to be formatted => cir###_badlines.txt
#
# R. Matson - 6/2023 (dsplit + split_text from S. Stepanoff)
#           - 7/2024 updated version to send lines unable to be split to new file
#					 and pull coords, mags from WDS
#
#
# Change references (line 99 & 286) before running!
#

###########################################

import pdfplumber
import re
import numpy as np
from itertools import groupby

from utils_WDS import read_WDS


############################################

# Get first index containing a substring (from StackOverflow)
def first_substring(strings, substring):
    return min(i for i, string in enumerate(strings) if substring in string)

############################################

# Check if float
def is_float(element: any) -> bool:
    try:
        float(element)
        return True
    except ValueError:
        return False

############################################

# Function to split strings at decimal and combine with fixed widths
def dsplit(strelem,lnum,rnum):
    tmpstr = strelem.split('.',)
    if len(tmpstr) == 2.:
        newstr = str.rjust(tmpstr[0],lnum)+'.'+str.ljust(tmpstr[1],rnum)
    else:
        strpad = ' '*rnum
        newstr = str.rjust(tmpstr[0],lnum)+'.'+strpad
    return newstr

############################################

def split_text(s):
    for k, g in groupby(s, str.isalpha):
        yield ''.join(g)

############################################

# Use DD to match with WDS and pull precise coords and mags, returns strings

def get_WDS(name):
	name = name.strip()
	global wds
	match = np.array(np.where(wds['DD'] == name))[0]

	if np.size(match) == 0:
		newname = name[:7]
		comp = name[7:]
		# print(newname,'+',comp)
		match = np.array(np.where( (wds['DD'] == newname) & (wds['comp'] == comp) ))[0]

		if np.size(match) == 0:
			print('No WDS match found for ',name)
			coords = '                  '
			mag1 = '  .  '
			mag2 = '  .  '
			notes = ' '

		else:
			coords = str(wds['coord'][match[0]])
			mag1   = str(wds['mag1'][match[0]])
			mag2   = str(wds['mag2'][match[0]])
			notes = str(wds['notes'][match[0]])

	else:
		coords = str(wds['coord'][match[0]])
		mag1   = str(wds['mag1'][match[0]])
		mag2   = str(wds['mag2'][match[0]])
		notes = str(wds['notes'][match[0]])


	return coords,mag1,mag2,notes

############################################

# References
ref1 = 'Alz2023'
ref2 = 'Doc2023a'
ref3 = 'Sca2023a'
ref4 = 'Tok2023a'
ref5 = 'Lin2023'

print('')
#print('References =',ref1,',',ref2,',',ref3,',',ref4,',',ref5)
print('')

# For testing...
# file = 'cir209.pdf'
# ephem = '2023-24'
# numpages = 13
# core_en_pat = '2022.845'

# Open pdf
file = input("Enter filename: ")
pdf = pdfplumber.open(file)

# Create file or erase if exists
outfile = file[:-4]
open(outfile+".txt","w").close()
open(outfile+"_badlines.txt","w").close()

# Put in placeholder for HD number
HD = '.     '

# Ephemeris dates
ephem = input('Enter ephemeris dates (YYYY-YY): ')

# Number of pages to read in
numpages = int(input("Enter number of pages to be read: "))

# Pattern after last orbit
core_en_pat = input("Enter exact words following final orbit (e.g., TOK = TOKOVININ): ")
#core_en_pat = input("Enter 'Last Obs.' date from final row': ")

# Read in WDS summ file to get precise coordinates, magnitues, notes, etc.
wds = read_WDS('/data/wds/wds/wds/wds.summ')
#print(wds.info)

for n in range(int(numpages)):

	# Extract text
	p0 = pdf.pages[n]
	text = p0.extract_text(x_tolerance=2)
	#print(end)

	# Select data using pattern before data starts through end of page 
	# and user input for end of last page
	print(n+1,'of',numpages,'pages')
	if n == (numpages-1):
		core_st_pat = re.compile(r"P T e a i Ω ω")
		core_st = re.search(core_st_pat, text)
		start = int(core_st.end()+1.)		
		core_end = re.search(core_en_pat, text)
		end = int(core_end.start())
		# end = int(core_end.end())
	
	else:
		core_st_pat = re.compile(r"P T e a i Ω ω")
		core_st = re.search(core_st_pat, text)
		start = int(core_st.end()+1.)
		end = len(text)-1

	core = text[start:end-1]
	# print(core)

	# Each orbit spans two rows, so split into two-line groups
	lines = core.split("\n")
	line_groups = list(zip(lines[::2],lines[1::2]))

	# Grab individual elements from line groups and format for circ2cat.out
	for i in line_groups:
		elements = str.split(i[0])+str.split(i[1])

		# Replace '...' and 'fixed' with '.'
		elements = [sub.replace('fixed', '.') for sub in elements]
		elements = [sub.replace('...', '.') for sub in elements]
		elements = [sub.replace('±', '') for sub in elements]

		WDSno = str.ljust(elements[0],10)
		# print(elements,'=',len(elements))

		Tidx = first_substring(elements,'.')

		# Find T0 index
		if is_float(elements[Tidx]) == True:
			while (float(elements[Tidx]) < 1600):
				Tidx = Tidx+1
		else:
			print(elements[Tidx],'not float')

		# Combine strings to make proper names/DDs
		if Tidx == 5:
			Name = str.ljust(elements[1],3) + str.rjust(elements[2],4) + str.ljust(elements[3],7)
		
		elif Tidx == 4:
			if elements[2][-1].isalpha():
				temp = list(split_text(elements[2]))
				Name = str.ljust(elements[1],3) + str.rjust(temp[0],4) + ''.join(temp[1:])
			else:
				Name = str.ljust(elements[1],3) + str.rjust(elements[2],4)
		
		elif Tidx == 3:
			Name = str.ljust(elements[1],14)

		# Get WDS info based on DD Name
		coords,mag1,mag2,notes = get_WDS(Name)
		mag1 = dsplit(mag1,3,3)
		mag2 = dsplit(mag2,2,3)
		if 'N' in notes:
			note = 'n'
		else:
			note = ' '

		# print(elements,'=',len(elements))

		try:
			P = elements[Tidx-1]
			T = elements[Tidx]
			e = elements[Tidx+1]
			a = elements[Tidx+2]
			i = elements[Tidx+3]
			N = elements[Tidx+4]
			O = elements[Tidx+5]
			eph1_pa = elements[Tidx+6]
			eph1_sep = elements[Tidx+7]

			Hidx = Tidx+8
			try:
				while (elements[Hidx][0].isalpha()) or (elements[Hidx][1].isalpha()):
					Hidx = Hidx+1
			except:
				Hidx = Hidx

			if Hidx == Tidx+9:
				Auth = elements[Hidx-1]
			elif Hidx == Tidx+10:
				Auth = ' '.join(elements[Hidx-2:Hidx])
			elif Hidx == Tidx+11:
				Auth = ' '.join(elements[Hidx-3:Hidx])
			elif Hidx == Tidx+12:
				Auth = ' '.join(elements[Hidx-4:Hidx])

			HIP = elements[Hidx]
			ADS = elements[Hidx+1]
			P_e = elements[Hidx+2]
			T_e = elements[Hidx+3]
			e_e = elements[Hidx+4]
			a_e = elements[Hidx+5]
			i_e = elements[Hidx+6]
			N_e = elements[Hidx+7]
			O_e = elements[Hidx+8]
			eph2_pa = elements[Hidx+9]
			eph2_sep = elements[Hidx+10]
			try:
				last = int(float(elements[Hidx+11]))
			except:
				last = ''

			if ADS.isnumeric() == False:
				ADS = str('.    ')
			else:
				ADS = str.rjust(ADS,5)
			if HIP.isnumeric() == False:
				HIP = str('.     ')
			else:
				HIP = str.rjust(HIP,6)

			P = dsplit(P,5,6)
			P_e = dsplit(P_e,4,6)
			a = dsplit(a,3,5)
			a_e = dsplit(a_e,2,5)
			i = dsplit(i,3,4)
			i_e = dsplit(i_e,3,4)
			N = dsplit(N,3,5)
			N_e = dsplit(N_e,3,4)
			T = dsplit(T,5,6)
			T_e = dsplit(T_e,4,6)
			e = dsplit(e,1,6)
			e_e = dsplit(e_e,1,6)
			O = dsplit(O,3,4)
			O_e = dsplit(O_e,3,4)

			if Auth == 'ALZ':
			    ref = ref1
			elif Auth == 'D et al.':
			    ref = ref2
			elif Auth == 'S et al.':
			    ref = ref3
			elif Auth == 'TOK':
			    ref = ref4
			elif Auth == 'TOK+RV':
			    ref = ref4
			elif Auth == 'TOK+Gaia':
			    ref = ref4
			elif Auth == 'L et al.':
			    ref = ref5
			# elif Auth == 'Z & T':
			#     ref = ref5
			else:
			    ref = '???'
			    
			png = 'wds'+WDSno.lower()+'a.png'
			
			with open(outfile + ".txt","a") as ofile:
				print(coords,WDSno,'{:14}'.format(Name),'{:5}'.format(ADS),HD,'{:6}'.format(HIP),mag1,mag2,
					P+'y'+P_e,a+'a',a_e,i,i_e,N,N_e,T+'y'+T_e,e,e_e,O,O_e,
					'2000','{:4}'.format(last),ephem,' ','0.0',note,'n','{:8}'.format(ref),png,file=ofile)

		except IndexError:
		    with open(outfile+"_badlines.txt","a") as bfile:
		        print(coords,str.ljust(WDSno,11) + str.ljust(Name,14) + ' '.join(elements[Tidx-1:]),file=bfile)

print('')
print('Formatted orbits written to ',outfile,'.txt')
print('Unformatted orbits written to ',outfile,'_badlines.txt')
print('')