a3 überarbeitet, Zeile 290 einlesen funktioniert, ohne try:, mehr regex

Main
kevin 2023-01-18 19:21:09 +01:00
parent fc34a84e3b
commit dbb2f8044b
1 changed files with 81 additions and 81 deletions

View File

@ -1,82 +1,82 @@
import re import re
import json import json
import string
text_file = open(r"Personen.txt", "r") file = open(r"Personen.txt", 'r')
data = text_file.readlines() lines = file.readlines()
del lines[0] #delete line "Name,Adresse,GebDatum,Telefon"
arr = [None] * len(data) personsData = [None] * len(lines)
counter = 1
text_file.close()
for line in data: file.close()
try:
data_array = re.split(",", line)
name = data_array[0]
name_array = re.split("\s",name)
adresse1 = data_array[1]
adresse2 = data_array[2]
geburtsdatum = data_array[3]
telefon = data_array[4]
telefon = telefon.replace("\n", "")
titel = re.search("^.*\.$",name_array[0]) index = 0 #set index equals line
for line in lines:
lineData = re.split(",", line) #splits whole line into name, adresse, birthday, number
nameData = re.split("\s", lineData[0]) #splits name area
if titel != None: #pre inzialiizing data
titel = titel.string title = None #could be null
if len(name_array) == 4: vname = ""
vorname = name_array[1] secName = None #could be null
zweitname = name_array[2] name = ""
nachname = name_array[3] bday = ""
else: street = ""
vorname = name_array[1] streetNumber = 0 #integer-format
nachname = name_array[2] postal = 0 #integer-format
zweitname = None city = ""
phone = ""
#Name-Extract
if(re.fullmatch(".*\.$", nameData[0]) and len(nameData) > 2): #Ausnahmebehandlung für "Klaus-D. Baum" Zeile 290 bzw. Personen . im Vornamen | . = beliebiger char \.$ = Ende auf Punkt
title = nameData[0]
if(len(nameData) == 4): #Check für Zweitnamen
vname = nameData[1]
secName = nameData[2]
name = nameData[3]
else: else:
if len(name_array) == 3: vname = nameData[1]
vorname = name_array[0] name = nameData[2]
zweitname = name_array[1] else:
nachname = name_array[2] if(len(nameData) == 3): #Check für Zweitnamen
else: vname = nameData[0]
vorname = name_array[0] secName = nameData[1]
nachname = name_array[1] name = nameData[2]
zweitname = None else:
vname = nameData[0]
name = nameData[1]
# adresse_array = re.split("\s", adresse) #Adresse-Extract
plz = re.findall("\d{5}", adresse2) street = re.search('[^\"\s\d]+', lineData[1]).group(0) # ^ = ignoriert Leerzeichen " und Zahlen
hausnummer = re.findall(r"\b\d{1,4}\b", adresse1) streetNumber = re.search('(?<=\D)\d+', lineData[1]).group(0) #beginnt bei ersten Zahl
strasse_chars = re.findall("\D",adresse1) postal = re.search('\d{5}', lineData[2]).group(0) #extrahiert die ersten fünf Zahlen
strasse = ''.join(strasse_chars) city = re.search('[^\"\s\d]+', lineData[2]).group(0) #ignoriert " Leerzeichen und Zahlen
strasse = strasse.replace("\"", "")
ort_chars = re.findall("\D",adresse2)
ort = ''.join(ort_chars)
ort = ort.replace("\"", "")
#Birthday
bdayData = re.split("-", lineData[3]) #split birthday data
bday = bdayData[2]+"."+bdayData[1]+"."+bdayData[0] #compose data in right order
geburtsdatum = re.findall("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]", line) #Telefon
# telefon = re.findall("\d{6,20}", line) phone = re.search('[^\n]+', lineData[4]).group(0) #just extract \n and keep format
person = { #json format einteilung
"Index" : counter, person = {
"Titel" : titel, 'Index' : index,
"Vorname" : vorname, 'Titel' : title,
"Zweitname" : zweitname, 'Vorname' : vname,
"Nachname" : nachname, 'Zweitname' : secName,
"Geburtsdatum" : geburtsdatum, 'Nachname' : name,
"Strasse" : strasse, 'Geburtstag' : bday,
"Hausnummer" : hausnummer, 'Strasse' : street,
"PLZ" : plz, 'Hausnummer' : streetNumber,
"Wohnort" : ort, 'PLZ' : postal,
"Rufnummer" : telefon 'Wohnort' : city,
} 'Telefonnummer' : phone}
personsData[index] = person
arr[counter - 1] = person index += 1
counter = counter + 1
except:
continue
with open(r"PersonenNeu.json", "w") as f: with open(r'PersonenNeu.json', 'w') as f:
json.dump(arr,f, indent=1, ensure_ascii=False) json.dump(personsData, f, indent=1, ensure_ascii=False)
f.close() f.close()