import spacy from spacy.training.example import Example import json import os import shutil import sys def load_data(file_path): with open(file_path, "r", encoding="utf8") as f: raw = json.load(f) return [ ( entry["text"], { "entities": [ (start, end, label) for start, end, label in entry["entities"] ] }, ) for entry in raw ] def main(): # Stelle sicher, dass der "output"-Ordner existiert os.makedirs("output", exist_ok=True) TRAIN_DATA = load_data(os.path.join("spacy_training", "annotation_data.json")) nlp = spacy.blank("de") ner = nlp.add_pipe("ner") ner.add_label("KENNZAHL") optimizer = nlp.begin_training() for i in range(20): for text, annotations in TRAIN_DATA: example = Example.from_dict(nlp.make_doc(text), annotations) nlp.update([example], drop=0.2, sgd=optimizer) temp_model_dir = "output/temp-model" final_model_dir = "output/model-last" backup_dir = "output/model-backup" try: # Vorheriges temporäres Verzeichnis entfernen if os.path.exists(temp_model_dir): shutil.rmtree(temp_model_dir) # Modell zunächst in temp speichern nlp.to_disk(temp_model_dir) # Backup der letzten Version (falls vorhanden) if os.path.exists(final_model_dir): if os.path.exists(backup_dir): shutil.rmtree(backup_dir) shutil.copytree(final_model_dir, backup_dir) shutil.rmtree(final_model_dir) # Modell verschieben shutil.move(temp_model_dir, final_model_dir) print("[INFO] Training abgeschlossen und Modell gespeichert.") nlp.to_disk("spacy_training/output/model-last") # Training beendet – Status auf False setzen with open("spacy_training/training_running.json", "w") as f: json.dump({"running": False}, f) sys.exit(0) except Exception as e: print(f"[FEHLER] Während des Trainings ist ein Fehler aufgetreten: {e}") if os.path.exists(temp_model_dir): shutil.rmtree(temp_model_dir) sys.exit(1) if __name__ == "__main__": main()