a3 überarbeitet, Zeile 290 einlesen funktioniert, ohne try:, mehr regex
parent
fc34a84e3b
commit
dbb2f8044b
124
SL2/s2_a3.py
124
SL2/s2_a3.py
|
@ -1,82 +1,82 @@
|
|||
import re
|
||||
import json
|
||||
import string
|
||||
|
||||
text_file = open(r"Personen.txt", "r")
|
||||
data = text_file.readlines()
|
||||
file = open(r"Personen.txt", 'r')
|
||||
lines = file.readlines()
|
||||
del lines[0] #delete line "Name,Adresse,GebDatum,Telefon"
|
||||
|
||||
arr = [None] * len(data)
|
||||
counter = 1
|
||||
text_file.close()
|
||||
personsData = [None] * len(lines)
|
||||
|
||||
for line in data:
|
||||
try:
|
||||
data_array = re.split(",", line)
|
||||
name = data_array[0]
|
||||
name_array = re.split("\s",name)
|
||||
adresse1 = data_array[1]
|
||||
adresse2 = data_array[2]
|
||||
geburtsdatum = data_array[3]
|
||||
telefon = data_array[4]
|
||||
telefon = telefon.replace("\n", "")
|
||||
file.close()
|
||||
|
||||
titel = re.search("^.*\.$",name_array[0])
|
||||
index = 0 #set index equals line
|
||||
|
||||
for line in lines:
|
||||
lineData = re.split(",", line) #splits whole line into name, adresse, birthday, number
|
||||
nameData = re.split("\s", lineData[0]) #splits name area
|
||||
|
||||
if titel != None:
|
||||
titel = titel.string
|
||||
if len(name_array) == 4:
|
||||
vorname = name_array[1]
|
||||
zweitname = name_array[2]
|
||||
nachname = name_array[3]
|
||||
#pre inzialiizing data
|
||||
title = None #could be null
|
||||
vname = ""
|
||||
secName = None #could be null
|
||||
name = ""
|
||||
bday = ""
|
||||
street = ""
|
||||
streetNumber = 0 #integer-format
|
||||
postal = 0 #integer-format
|
||||
city = ""
|
||||
phone = ""
|
||||
|
||||
#Name-Extract
|
||||
if(re.fullmatch(".*\.$", nameData[0]) and len(nameData) > 2): #Ausnahmebehandlung für "Klaus-D. Baum" Zeile 290 bzw. Personen . im Vornamen | . = beliebiger char \.$ = Ende auf Punkt
|
||||
title = nameData[0]
|
||||
if(len(nameData) == 4): #Check für Zweitnamen
|
||||
vname = nameData[1]
|
||||
secName = nameData[2]
|
||||
name = nameData[3]
|
||||
else:
|
||||
vorname = name_array[1]
|
||||
nachname = name_array[2]
|
||||
zweitname = None
|
||||
vname = nameData[1]
|
||||
name = nameData[2]
|
||||
else:
|
||||
if len(name_array) == 3:
|
||||
vorname = name_array[0]
|
||||
zweitname = name_array[1]
|
||||
nachname = name_array[2]
|
||||
if(len(nameData) == 3): #Check für Zweitnamen
|
||||
vname = nameData[0]
|
||||
secName = nameData[1]
|
||||
name = nameData[2]
|
||||
else:
|
||||
vorname = name_array[0]
|
||||
nachname = name_array[1]
|
||||
zweitname = None
|
||||
vname = nameData[0]
|
||||
name = nameData[1]
|
||||
|
||||
# adresse_array = re.split("\s", adresse)
|
||||
plz = re.findall("\d{5}", adresse2)
|
||||
hausnummer = re.findall(r"\b\d{1,4}\b", adresse1)
|
||||
strasse_chars = re.findall("\D",adresse1)
|
||||
strasse = ''.join(strasse_chars)
|
||||
strasse = strasse.replace("\"", "")
|
||||
ort_chars = re.findall("\D",adresse2)
|
||||
ort = ''.join(ort_chars)
|
||||
ort = ort.replace("\"", "")
|
||||
#Adresse-Extract
|
||||
street = re.search('[^\"\s\d]+', lineData[1]).group(0) # ^ = ignoriert Leerzeichen " und Zahlen
|
||||
streetNumber = re.search('(?<=\D)\d+', lineData[1]).group(0) #beginnt bei ersten Zahl
|
||||
postal = re.search('\d{5}', lineData[2]).group(0) #extrahiert die ersten fünf Zahlen
|
||||
city = re.search('[^\"\s\d]+', lineData[2]).group(0) #ignoriert " Leerzeichen und Zahlen
|
||||
|
||||
#Birthday
|
||||
bdayData = re.split("-", lineData[3]) #split birthday data
|
||||
bday = bdayData[2]+"."+bdayData[1]+"."+bdayData[0] #compose data in right order
|
||||
|
||||
geburtsdatum = re.findall("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]", line)
|
||||
# telefon = re.findall("\d{6,20}", line)
|
||||
#Telefon
|
||||
phone = re.search('[^\n]+', lineData[4]).group(0) #just extract \n and keep format
|
||||
|
||||
#json format einteilung
|
||||
person = {
|
||||
"Index" : counter,
|
||||
"Titel" : titel,
|
||||
"Vorname" : vorname,
|
||||
"Zweitname" : zweitname,
|
||||
"Nachname" : nachname,
|
||||
"Geburtsdatum" : geburtsdatum,
|
||||
"Strasse" : strasse,
|
||||
"Hausnummer" : hausnummer,
|
||||
"PLZ" : plz,
|
||||
"Wohnort" : ort,
|
||||
"Rufnummer" : telefon
|
||||
}
|
||||
'Index' : index,
|
||||
'Titel' : title,
|
||||
'Vorname' : vname,
|
||||
'Zweitname' : secName,
|
||||
'Nachname' : name,
|
||||
'Geburtstag' : bday,
|
||||
'Strasse' : street,
|
||||
'Hausnummer' : streetNumber,
|
||||
'PLZ' : postal,
|
||||
'Wohnort' : city,
|
||||
'Telefonnummer' : phone}
|
||||
personsData[index] = person
|
||||
|
||||
arr[counter - 1] = person
|
||||
counter = counter + 1
|
||||
except:
|
||||
continue
|
||||
index += 1
|
||||
|
||||
with open(r"PersonenNeu.json", "w") as f:
|
||||
json.dump(arr,f, indent=1, ensure_ascii=False)
|
||||
with open(r'PersonenNeu.json', 'w') as f:
|
||||
json.dump(personsData, f, indent=1, ensure_ascii=False)
|
||||
|
||||
f.close()
|
Loading…
Reference in New Issue