diff --git a/SL2/s2_a3.py b/SL2/s2_a3.py index 5abbd32..95e2a75 100644 --- a/SL2/s2_a3.py +++ b/SL2/s2_a3.py @@ -1,82 +1,82 @@ -import re -import json -import string - -text_file = open(r"Personen.txt", "r") -data = text_file.readlines() - -arr = [None] * len(data) -counter = 1 -text_file.close() - -for line in data: - try: - data_array = re.split(",", line) - name = data_array[0] - name_array = re.split("\s",name) - adresse1 = data_array[1] - adresse2 = data_array[2] - geburtsdatum = data_array[3] - telefon = data_array[4] - telefon = telefon.replace("\n", "") - - titel = re.search("^.*\.$",name_array[0]) - - - if titel != None: - titel = titel.string - if len(name_array) == 4: - vorname = name_array[1] - zweitname = name_array[2] - nachname = name_array[3] - else: - vorname = name_array[1] - nachname = name_array[2] - zweitname = None - else: - if len(name_array) == 3: - vorname = name_array[0] - zweitname = name_array[1] - nachname = name_array[2] - else: - vorname = name_array[0] - nachname = name_array[1] - zweitname = None - - # adresse_array = re.split("\s", adresse) - plz = re.findall("\d{5}", adresse2) - hausnummer = re.findall(r"\b\d{1,4}\b", adresse1) - strasse_chars = re.findall("\D",adresse1) - strasse = ''.join(strasse_chars) - strasse = strasse.replace("\"", "") - ort_chars = re.findall("\D",adresse2) - ort = ''.join(ort_chars) - ort = ort.replace("\"", "") - - - geburtsdatum = re.findall("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]", line) - # telefon = re.findall("\d{6,20}", line) - - person = { - "Index" : counter, - "Titel" : titel, - "Vorname" : vorname, - "Zweitname" : zweitname, - "Nachname" : nachname, - "Geburtsdatum" : geburtsdatum, - "Strasse" : strasse, - "Hausnummer" : hausnummer, - "PLZ" : plz, - "Wohnort" : ort, - "Rufnummer" : telefon - } - - arr[counter - 1] = person - counter = counter + 1 - except: - continue - -with open(r"PersonenNeu.json", "w") as f: - json.dump(arr,f, indent=1, ensure_ascii=False) - +import re +import json + +file = open(r"Personen.txt", 'r') +lines = file.readlines() +del lines[0] #delete line "Name,Adresse,GebDatum,Telefon" + +personsData = [None] * len(lines) + +file.close() + +index = 0 #set index equals line + +for line in lines: + lineData = re.split(",", line) #splits whole line into name, adresse, birthday, number + nameData = re.split("\s", lineData[0]) #splits name area + + #pre inzialiizing data + title = None #could be null + vname = "" + secName = None #could be null + name = "" + bday = "" + street = "" + streetNumber = 0 #integer-format + postal = 0 #integer-format + city = "" + phone = "" + + #Name-Extract + if(re.fullmatch(".*\.$", nameData[0]) and len(nameData) > 2): #Ausnahmebehandlung für "Klaus-D. Baum" Zeile 290 bzw. Personen . im Vornamen | . = beliebiger char \.$ = Ende auf Punkt + title = nameData[0] + if(len(nameData) == 4): #Check für Zweitnamen + vname = nameData[1] + secName = nameData[2] + name = nameData[3] + else: + vname = nameData[1] + name = nameData[2] + else: + if(len(nameData) == 3): #Check für Zweitnamen + vname = nameData[0] + secName = nameData[1] + name = nameData[2] + else: + vname = nameData[0] + name = nameData[1] + + #Adresse-Extract + street = re.search('[^\"\s\d]+', lineData[1]).group(0) # ^ = ignoriert Leerzeichen " und Zahlen + streetNumber = re.search('(?<=\D)\d+', lineData[1]).group(0) #beginnt bei ersten Zahl + postal = re.search('\d{5}', lineData[2]).group(0) #extrahiert die ersten fünf Zahlen + city = re.search('[^\"\s\d]+', lineData[2]).group(0) #ignoriert " Leerzeichen und Zahlen + + #Birthday + bdayData = re.split("-", lineData[3]) #split birthday data + bday = bdayData[2]+"."+bdayData[1]+"."+bdayData[0] #compose data in right order + + #Telefon + phone = re.search('[^\n]+', lineData[4]).group(0) #just extract \n and keep format + + #json format einteilung + person = { + 'Index' : index, + 'Titel' : title, + 'Vorname' : vname, + 'Zweitname' : secName, + 'Nachname' : name, + 'Geburtstag' : bday, + 'Strasse' : street, + 'Hausnummer' : streetNumber, + 'PLZ' : postal, + 'Wohnort' : city, + 'Telefonnummer' : phone} + personsData[index] = person + + index += 1 + +with open(r'PersonenNeu.json', 'w') as f: + json.dump(personsData, f, indent=1, ensure_ascii=False) + f.close() \ No newline at end of file