import re import json file = open(r"Personen.txt", 'r') lines = file.readlines() del lines[0] #delete line "Name,Adresse,GebDatum,Telefon" personsData = [None] * len(lines) file.close() index = 0 #set index equals line for line in lines: lineData = re.split(",", line) #splits whole line into name, adresse, birthday, number nameData = re.split("\s", lineData[0]) #splits name area #pre inzialiizing data title = None #could be null vname = "" secName = None #could be null name = "" bday = "" street = "" streetNumber = 0 #integer-format postal = 0 #integer-format city = "" phone = "" #Name-Extract if(re.fullmatch(".*\.$", nameData[0]) and len(nameData) > 2): #Ausnahmebehandlung für "Klaus-D. Baum" Zeile 290 bzw. Personen . im Vornamen | . = beliebiger char \.$ = Ende auf Punkt title = nameData[0] if(len(nameData) == 4): #Check für Zweitnamen vname = nameData[1] secName = nameData[2] name = nameData[3] else: vname = nameData[1] name = nameData[2] else: if(len(nameData) == 3): #Check für Zweitnamen vname = nameData[0] secName = nameData[1] name = nameData[2] else: vname = nameData[0] name = nameData[1] #Adresse-Extract street = re.search('[^\"\s\d]+', lineData[1]).group(0) # ^ = ignoriert Leerzeichen " und Zahlen streetNumber = re.search('(?<=\D)\d+', lineData[1]).group(0) #beginnt bei ersten Zahl postal = re.search('\d{5}', lineData[2]).group(0) #extrahiert die ersten fünf Zahlen city = re.search('[^\"\s\d]+', lineData[2]).group(0) #ignoriert " Leerzeichen und Zahlen #Birthday bdayData = re.split("-", lineData[3]) #split birthday data bday = bdayData[2]+"."+bdayData[1]+"."+bdayData[0] #compose data in right order #Telefon phone = re.search('[^\n]+', lineData[4]).group(0) #just extract \n and keep format #json format einteilung person = { 'Index' : index, 'Titel' : title, 'Vorname' : vname, 'Zweitname' : secName, 'Nachname' : name, 'Geburtstag' : bday, 'Strasse' : street, 'Hausnummer' : streetNumber, 'PLZ' : postal, 'Wohnort' : city, 'Telefonnummer' : phone} personsData[index] = person index += 1 with open(r'PersonenNeu.json', 'w') as f: json.dump(personsData, f, indent=1, ensure_ascii=False) f.close()