pse2_ff/project/backend/spacy-service/spacy_training/ner_trainer.py

82 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import spacy
from spacy.training.example import Example
import json
import os
import shutil
import sys
def load_data(file_path):
with open(file_path, "r", encoding="utf8") as f:
raw = json.load(f)
return [
(
entry["text"],
{
"entities": [
(start, end, label) for start, end, label in entry["entities"]
]
},
)
for entry in raw
]
def main():
# Stelle sicher, dass der "output"-Ordner existiert
os.makedirs("output", exist_ok=True)
TRAIN_DATA = load_data(os.path.join("spacy_training", "annotation_data.json"))
nlp = spacy.blank("de")
ner = nlp.add_pipe("ner")
ner.add_label("KENNZAHL")
optimizer = nlp.begin_training()
for i in range(20):
for text, annotations in TRAIN_DATA:
example = Example.from_dict(nlp.make_doc(text), annotations)
nlp.update([example], drop=0.2, sgd=optimizer)
temp_model_dir = "output/temp-model"
final_model_dir = "output/model-last"
backup_dir = "output/model-backup"
try:
# Vorheriges temporäres Verzeichnis entfernen
if os.path.exists(temp_model_dir):
shutil.rmtree(temp_model_dir)
# Modell zunächst in temp speichern
nlp.to_disk(temp_model_dir)
# Backup der letzten Version (falls vorhanden)
if os.path.exists(final_model_dir):
if os.path.exists(backup_dir):
shutil.rmtree(backup_dir)
shutil.copytree(final_model_dir, backup_dir)
shutil.rmtree(final_model_dir)
# Modell verschieben
shutil.move(temp_model_dir, final_model_dir)
print("[INFO] Training abgeschlossen und Modell gespeichert.")
nlp.to_disk("spacy_training/output/model-last")
# Training beendet Status auf False setzen
with open("spacy_training/training_running.json", "w") as f:
json.dump({"running": False}, f)
sys.exit(0)
except Exception as e:
print(f"[FEHLER] Während des Trainings ist ein Fehler aufgetreten: {e}")
if os.path.exists(temp_model_dir):
shutil.rmtree(temp_model_dir)
sys.exit(1)
if __name__ == "__main__":
main()