# Code to read orbits from IAU circular PDF with uncertainties and convert to ORB6 format # # Formatted lines => cir###.txt # Read orbital elements from IAU Cicular PDF (with uncertainties) # Print lines in ORB6 format to cir###.txt # Lines unable to be formatted => cir###_badlines.txt # # R. Matson - 6/2023 (dsplit + split_text from S. Stepanoff) # - 7/2024 updated version to send lines unable to be split to new file # and pull coords, mags from WDS # # # Change references (line 99 & 286) before running! # ########################################### import pdfplumber import re import numpy as np from itertools import groupby from utils_WDS import read_WDS ############################################ # Get first index containing a substring (from StackOverflow) def first_substring(strings, substring): return min(i for i, string in enumerate(strings) if substring in string) ############################################ # Check if float def is_float(element: any) -> bool: try: float(element) return True except ValueError: return False ############################################ # Function to split strings at decimal and combine with fixed widths def dsplit(strelem,lnum,rnum): tmpstr = strelem.split('.',) if len(tmpstr) == 2.: newstr = str.rjust(tmpstr[0],lnum)+'.'+str.ljust(tmpstr[1],rnum) else: strpad = ' '*rnum newstr = str.rjust(tmpstr[0],lnum)+'.'+strpad return newstr ############################################ def split_text(s): for k, g in groupby(s, str.isalpha): yield ''.join(g) ############################################ # Use DD to match with WDS and pull precise coords and mags, returns strings def get_WDS(name): name = name.strip() global wds match = np.array(np.where(wds['DD'] == name))[0] if np.size(match) == 0: newname = name[:7] comp = name[7:] # print(newname,'+',comp) match = np.array(np.where( (wds['DD'] == newname) & (wds['comp'] == comp) ))[0] if np.size(match) == 0: print('No WDS match found for ',name) coords = ' ' mag1 = ' . ' mag2 = ' . ' notes = ' ' else: coords = str(wds['coord'][match[0]]) mag1 = str(wds['mag1'][match[0]]) mag2 = str(wds['mag2'][match[0]]) notes = str(wds['notes'][match[0]]) else: coords = str(wds['coord'][match[0]]) mag1 = str(wds['mag1'][match[0]]) mag2 = str(wds['mag2'][match[0]]) notes = str(wds['notes'][match[0]]) return coords,mag1,mag2,notes ############################################ # References ref1 = 'Alz2023' ref2 = 'Doc2023a' ref3 = 'Sca2023a' ref4 = 'Tok2023a' ref5 = 'Lin2023' print('') #print('References =',ref1,',',ref2,',',ref3,',',ref4,',',ref5) print('') # For testing... # file = 'cir209.pdf' # ephem = '2023-24' # numpages = 13 # core_en_pat = '2022.845' # Open pdf file = input("Enter filename: ") pdf = pdfplumber.open(file) # Create file or erase if exists outfile = file[:-4] open(outfile+".txt","w").close() open(outfile+"_badlines.txt","w").close() # Put in placeholder for HD number HD = '. ' # Ephemeris dates ephem = input('Enter ephemeris dates (YYYY-YY): ') # Number of pages to read in numpages = int(input("Enter number of pages to be read: ")) # Pattern after last orbit core_en_pat = input("Enter exact words following final orbit (e.g., TOK = TOKOVININ): ") #core_en_pat = input("Enter 'Last Obs.' date from final row': ") # Read in WDS summ file to get precise coordinates, magnitues, notes, etc. wds = read_WDS('/data/wds/wds/wds/wds.summ') #print(wds.info) for n in range(int(numpages)): # Extract text p0 = pdf.pages[n] text = p0.extract_text(x_tolerance=2) #print(end) # Select data using pattern before data starts through end of page # and user input for end of last page print(n+1,'of',numpages,'pages') if n == (numpages-1): core_st_pat = re.compile(r"P T e a i Ω ω") core_st = re.search(core_st_pat, text) start = int(core_st.end()+1.) core_end = re.search(core_en_pat, text) end = int(core_end.start()) # end = int(core_end.end()) else: core_st_pat = re.compile(r"P T e a i Ω ω") core_st = re.search(core_st_pat, text) start = int(core_st.end()+1.) end = len(text)-1 core = text[start:end-1] # print(core) # Each orbit spans two rows, so split into two-line groups lines = core.split("\n") line_groups = list(zip(lines[::2],lines[1::2])) # Grab individual elements from line groups and format for circ2cat.out for i in line_groups: elements = str.split(i[0])+str.split(i[1]) # Replace '...' and 'fixed' with '.' elements = [sub.replace('fixed', '.') for sub in elements] elements = [sub.replace('...', '.') for sub in elements] elements = [sub.replace('±', '') for sub in elements] WDSno = str.ljust(elements[0],10) # print(elements,'=',len(elements)) Tidx = first_substring(elements,'.') # Find T0 index if is_float(elements[Tidx]) == True: while (float(elements[Tidx]) < 1600): Tidx = Tidx+1 else: print(elements[Tidx],'not float') # Combine strings to make proper names/DDs if Tidx == 5: Name = str.ljust(elements[1],3) + str.rjust(elements[2],4) + str.ljust(elements[3],7) elif Tidx == 4: if elements[2][-1].isalpha(): temp = list(split_text(elements[2])) Name = str.ljust(elements[1],3) + str.rjust(temp[0],4) + ''.join(temp[1:]) else: Name = str.ljust(elements[1],3) + str.rjust(elements[2],4) elif Tidx == 3: Name = str.ljust(elements[1],14) # Get WDS info based on DD Name coords,mag1,mag2,notes = get_WDS(Name) mag1 = dsplit(mag1,3,3) mag2 = dsplit(mag2,2,3) if 'N' in notes: note = 'n' else: note = ' ' # print(elements,'=',len(elements)) try: P = elements[Tidx-1] T = elements[Tidx] e = elements[Tidx+1] a = elements[Tidx+2] i = elements[Tidx+3] N = elements[Tidx+4] O = elements[Tidx+5] eph1_pa = elements[Tidx+6] eph1_sep = elements[Tidx+7] Hidx = Tidx+8 try: while (elements[Hidx][0].isalpha()) or (elements[Hidx][1].isalpha()): Hidx = Hidx+1 except: Hidx = Hidx if Hidx == Tidx+9: Auth = elements[Hidx-1] elif Hidx == Tidx+10: Auth = ' '.join(elements[Hidx-2:Hidx]) elif Hidx == Tidx+11: Auth = ' '.join(elements[Hidx-3:Hidx]) elif Hidx == Tidx+12: Auth = ' '.join(elements[Hidx-4:Hidx]) HIP = elements[Hidx] ADS = elements[Hidx+1] P_e = elements[Hidx+2] T_e = elements[Hidx+3] e_e = elements[Hidx+4] a_e = elements[Hidx+5] i_e = elements[Hidx+6] N_e = elements[Hidx+7] O_e = elements[Hidx+8] eph2_pa = elements[Hidx+9] eph2_sep = elements[Hidx+10] try: last = int(float(elements[Hidx+11])) except: last = '' if ADS.isnumeric() == False: ADS = str('. ') else: ADS = str.rjust(ADS,5) if HIP.isnumeric() == False: HIP = str('. ') else: HIP = str.rjust(HIP,6) P = dsplit(P,5,6) P_e = dsplit(P_e,4,6) a = dsplit(a,3,5) a_e = dsplit(a_e,2,5) i = dsplit(i,3,4) i_e = dsplit(i_e,3,4) N = dsplit(N,3,5) N_e = dsplit(N_e,3,4) T = dsplit(T,5,6) T_e = dsplit(T_e,4,6) e = dsplit(e,1,6) e_e = dsplit(e_e,1,6) O = dsplit(O,3,4) O_e = dsplit(O_e,3,4) if Auth == 'ALZ': ref = ref1 elif Auth == 'D et al.': ref = ref2 elif Auth == 'S et al.': ref = ref3 elif Auth == 'TOK': ref = ref4 elif Auth == 'TOK+RV': ref = ref4 elif Auth == 'TOK+Gaia': ref = ref4 elif Auth == 'L et al.': ref = ref5 # elif Auth == 'Z & T': # ref = ref5 else: ref = '???' png = 'wds'+WDSno.lower()+'a.png' with open(outfile + ".txt","a") as ofile: print(coords,WDSno,'{:14}'.format(Name),'{:5}'.format(ADS),HD,'{:6}'.format(HIP),mag1,mag2, P+'y'+P_e,a+'a',a_e,i,i_e,N,N_e,T+'y'+T_e,e,e_e,O,O_e, '2000','{:4}'.format(last),ephem,' ','0.0',note,'n','{:8}'.format(ref),png,file=ofile) except IndexError: with open(outfile+"_badlines.txt","a") as bfile: print(coords,str.ljust(WDSno,11) + str.ljust(Name,14) + ' '.join(elements[Tidx-1:]),file=bfile) print('') print('Formatted orbits written to ',outfile,'.txt') print('Unformatted orbits written to ',outfile,'_badlines.txt') print('')