progName="GedExplore_v04.py" #by David@ColeCanada.com #on 2023GJul10 #Purpose to explore a GEDCOM # by # 1 loading the keys into a Python dictionary # 2 creating a ged family database # putting all info into a Python Dictionary # using unique keys # 3 display the metrics # v03 will use "try" when reading the ged file # # TODO # 0. validate FileName # ignore records preceding "1 SCHEMA" # 1. get key between @ and @ # 2. parse the NAME correctly, allowing for mistakes # given/surname/title # both give & surname must not be null # 3. find refs to missing INDI # 4. find FAMs with no parent(s) # 5. during second pass # ensure FAMS match and FAMC match # count INDIs and FAMs # gather BIRT, DEAT, MARR, DATE, PLAC, SEX, # ensure that each record has been used # Goal: see GEDExplore_plan.txt # Issue 1 Encountered utf-8 error in "ColesOfDevonDeLeg15L10.ged" # after record 674 # So I used "lastGood=674" # when lastGood==10 it will list all recN # but when I copied bad file to "ColesOfDevo.ged", it worked fine # so this must be a data record in a file stored on microsd. Doh! # I believe that these 2 files "should be" identical. Doh! print(progName) #### Control Parameters displayOnly=10 #normally 10 FILEname="not stated in header" prefixList = [] ################################################################ #gedFile=input("gedFile name") #gedFile="COLEDES4.GED" #no utf=8 error #lastGood=10 #gedFile="SmithEg.ged" #no utf=8 error #lastGood=10 #gedFile="davidcole3.ged" #utf-8 error after nRec 10924 #lastGood=10925 #gedFile="ColesOfDevon.ged" #no utf=8 error #lastGood=10 #gedFile="ColesOfDevonDeLeg15L10.ged" #error after nRec 674 #lastGood=674 #gedFile="JohnsonSampleTree_asof_2022BFeb08.ged" #no utf=8 error #lastGood=10 #gedFile="Cole21CMar22_asof_22DApr06.ged" #error after 15983 #lastGood=15983 #gedFile="Fam1.GED" #no utf=8 error #lastGood=10 gedFile="Cole21CMar22_asof_22DApr06_X15984.ged" #error after 15983 #lastGood=15983 #gedFile="Fam1.GED" #gedFile="ColeDavid2021LDec14_deFS.ged" #gedFile="ColeDavid2021LDec14_deFS_viaGeany.ged" #gedFile="ColeDavid2021LDec14_deFS_202viaGeany.ged" #gedFile="ColeDavid2021LDec14_deFS_998viaGeany.ged" g=open(gedFile) nRec=0 gedDict={} gedHeadDict={} errList=[] isHead=False isPrevKey=False famList={"1 HUSB","1 WIFE","1 CHIL"} #print(famList) #for line in g: errCnt=0 firstUTF_recN=0 prevRec="" # isHdrProcessed is True only after the header has been fully processed isHdrProcessed=False #print("at 69 isHdrProcessed:", isHdrProcessed) while True : nRec=nRec+1 strNRec=str(nRec) #print("strNRec[3:-1]:",strNRec) lenNRec=len(strNRec) suffixN=lenNRec-4 #print("strNRec[suffixN:lenNRec]:",strNRec[suffixN:lenNRec]) rhDigits=strNRec[suffixN:lenNRec] if rhDigits=="0000" : print(nRec) #input("?") #if strNRec[3:-1]==" #print("at 70") try: line = g.readline() except: #print("at 73") errCnt+=1 if errCnt==1 : firstUTF_recN=nRec #if end line = "bad+utf-8 error" +"\n" #print("Error reading nRec:",nRec," errCnt:",errCnt) #print("utf-8 err after:","nRec:",(nRec-1),"prevRec:",prevRec, end="") utf8Line="utf-8 err after:"+"nRec:"+str((nRec-1))+",prevRec:"+prevRec.strip() errList.append(utf8Line) #print("utf8Line:",utf8Line) finally: prevRec=line # at eof, break out out while loop if line == '' : break #line=g.read #if lastGood==10 : print(nRec) #if nRec>=(lastGood-5) and nRec