2023-01-09 17:41:29 +01:00
|
|
|
import re
|
|
|
|
import json
|
2023-01-15 10:19:04 +01:00
|
|
|
import traceback
|
2023-01-09 17:41:29 +01:00
|
|
|
|
|
|
|
index = 0
|
|
|
|
|
2023-01-15 10:19:04 +01:00
|
|
|
|
2023-01-09 17:41:29 +01:00
|
|
|
def parse_zeile(zeile):
|
|
|
|
global index
|
|
|
|
index = index + 1
|
|
|
|
|
2023-01-15 10:19:04 +01:00
|
|
|
elemente = []
|
|
|
|
aktuelles_element = ""
|
|
|
|
is_escaped = False
|
|
|
|
for c in zeile:
|
|
|
|
if c == "\"":
|
|
|
|
is_escaped = not is_escaped
|
|
|
|
elif c == "," and not is_escaped:
|
|
|
|
elemente.append(aktuelles_element)
|
|
|
|
aktuelles_element = ""
|
|
|
|
else:
|
|
|
|
aktuelles_element = aktuelles_element + c
|
|
|
|
|
|
|
|
elemente.append(aktuelles_element)
|
|
|
|
|
2023-01-09 17:41:29 +01:00
|
|
|
name = elemente[0]
|
2023-01-15 10:19:04 +01:00
|
|
|
adresse = elemente[1]
|
|
|
|
geburtsdatum = elemente[2]
|
|
|
|
# rufnummer = elemente[3]
|
2023-01-09 17:41:29 +01:00
|
|
|
|
|
|
|
person = {}
|
|
|
|
|
|
|
|
person['Index'] = index
|
|
|
|
|
2023-01-15 10:19:04 +01:00
|
|
|
# Maurizio Nette
|
|
|
|
# Ing. Charlotte Sölzer
|
|
|
|
# Dipl.-Ing. Friedericke Hecker
|
|
|
|
# Univ.Prof. Siglinde Herrmann
|
|
|
|
name_matches = re.search(r'(?:([^ ]+) )?([^ ]+) ([^ ]+)(?: ([^ ]+))?', name)
|
2023-01-09 17:41:29 +01:00
|
|
|
person['Titel'] = [name_matches.group(1)]
|
|
|
|
person['Vorname'] = [name_matches.group(2)]
|
|
|
|
if name_matches.group(4) is None:
|
|
|
|
person['Zweitname'] = [None]
|
|
|
|
person['Nachname'] = [name_matches.group(3)]
|
|
|
|
else:
|
|
|
|
person['Zweitname'] = [name_matches.group(3)]
|
|
|
|
person['Nachname'] = [name_matches.group(4)]
|
|
|
|
|
2023-01-15 10:19:04 +01:00
|
|
|
# Bärbel-Heydrich-Straße 45,76368 Freital
|
|
|
|
# Augustin-Weinhold-Weg 14,05392 Sankt Goarshausen
|
|
|
|
# Lorenz-Lindner-Allee 46,61054 Nürtingen
|
|
|
|
# Eckard-Sölzer-Gasse 13,68137 Uelzen
|
|
|
|
adress_matches = re.search(r'([^0-9]+) ([0-9]+),([0-9]{5,10}) (.+)', adresse)
|
2023-01-09 17:41:29 +01:00
|
|
|
person['Straße'] = [adress_matches.group(1)]
|
|
|
|
person['Hausnummer'] = [adress_matches.group(2)]
|
|
|
|
person['PLZ'] = [adress_matches.group(3)]
|
|
|
|
person['Wohnort'] = [adress_matches.group(4)]
|
|
|
|
|
|
|
|
birth_matches = re.search(r'([0-9]{2,4})[ .-]([0-9]{2})[ .-]([0-9]{2,4})', geburtsdatum)
|
|
|
|
if len(birth_matches.group(1)) == 4:
|
|
|
|
person['Geburtsdatum'] = [birth_matches.group(3) + "." + birth_matches.group(2) + "." + birth_matches.group(1)]
|
|
|
|
else:
|
|
|
|
person['Geburtsdatum'] = [birth_matches.group(1) + "." + birth_matches.group(2) + "." + birth_matches.group(3)]
|
|
|
|
|
|
|
|
return person
|
|
|
|
|
|
|
|
|
|
|
|
personen = []
|
|
|
|
|
|
|
|
with open("Personen.txt", 'r') as file:
|
2023-01-15 10:19:04 +01:00
|
|
|
next(file)
|
2023-01-09 17:41:29 +01:00
|
|
|
for zeile in file:
|
2023-01-15 10:19:04 +01:00
|
|
|
zeile = zeile.replace("\n", "")
|
|
|
|
|
|
|
|
try:
|
|
|
|
zeile_parsed = parse_zeile(zeile)
|
|
|
|
personen.append(zeile_parsed)
|
|
|
|
except:
|
|
|
|
print("Fehler in Zeile:", zeile)
|
|
|
|
print(traceback.format_exc())
|
2023-01-09 17:41:29 +01:00
|
|
|
|
|
|
|
with open("PersonenNeu.json", 'w') as file:
|
|
|
|
json.dump(personen, file)
|