82 lines
2.2 KiB
Python
82 lines
2.2 KiB
Python
import spacy
|
||
from spacy.training.example import Example
|
||
import json
|
||
import os
|
||
import shutil
|
||
import sys
|
||
|
||
|
||
def load_data(file_path):
|
||
with open(file_path, "r", encoding="utf8") as f:
|
||
raw = json.load(f)
|
||
return [
|
||
(
|
||
entry["text"],
|
||
{
|
||
"entities": [
|
||
(start, end, label) for start, end, label in entry["entities"]
|
||
]
|
||
},
|
||
)
|
||
for entry in raw
|
||
]
|
||
|
||
|
||
def main():
|
||
# Stelle sicher, dass der "output"-Ordner existiert
|
||
os.makedirs("output", exist_ok=True)
|
||
|
||
TRAIN_DATA = load_data(os.path.join("spacy_training", "annotation_data.json"))
|
||
|
||
nlp = spacy.blank("de")
|
||
ner = nlp.add_pipe("ner")
|
||
ner.add_label("KENNZAHL")
|
||
|
||
optimizer = nlp.begin_training()
|
||
for i in range(20):
|
||
for text, annotations in TRAIN_DATA:
|
||
example = Example.from_dict(nlp.make_doc(text), annotations)
|
||
nlp.update([example], drop=0.2, sgd=optimizer)
|
||
|
||
temp_model_dir = "output/temp-model"
|
||
final_model_dir = "output/model-last"
|
||
backup_dir = "output/model-backup"
|
||
|
||
try:
|
||
# Vorheriges temporäres Verzeichnis entfernen
|
||
if os.path.exists(temp_model_dir):
|
||
shutil.rmtree(temp_model_dir)
|
||
|
||
# Modell zunächst in temp speichern
|
||
nlp.to_disk(temp_model_dir)
|
||
|
||
# Backup der letzten Version (falls vorhanden)
|
||
if os.path.exists(final_model_dir):
|
||
if os.path.exists(backup_dir):
|
||
shutil.rmtree(backup_dir)
|
||
shutil.copytree(final_model_dir, backup_dir)
|
||
|
||
shutil.rmtree(final_model_dir)
|
||
|
||
# Modell verschieben
|
||
shutil.move(temp_model_dir, final_model_dir)
|
||
print("[INFO] Training abgeschlossen und Modell gespeichert.")
|
||
|
||
nlp.to_disk("spacy_training/output/model-last")
|
||
|
||
# Training beendet – Status auf False setzen
|
||
with open("spacy_training/training_running.json", "w") as f:
|
||
json.dump({"running": False}, f)
|
||
|
||
sys.exit(0)
|
||
|
||
except Exception as e:
|
||
print(f"[FEHLER] Während des Trainings ist ein Fehler aufgetreten: {e}")
|
||
if os.path.exists(temp_model_dir):
|
||
shutil.rmtree(temp_model_dir)
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|