import spacy from spacy.training.example import Example import json def load_data(file_path): with open(file_path, "r", encoding="utf8") as f: raw = json.load(f) TRAIN_DATA = [] for entry in raw: text = entry["text"] entities = [(start, end, label) for start, end, label in entry["entities"]] TRAIN_DATA.append((text, {"entities": entities})) return TRAIN_DATA def main(): TRAIN_DATA = load_data("annotation_data.json") nlp = spacy.blank("de") ner = nlp.add_pipe("ner") ner.add_label("KENNZAHL") optimizer = nlp.begin_training() for i in range(20): for text, annotations in TRAIN_DATA: example = Example.from_dict(nlp.make_doc(text), annotations) nlp.update([example], drop=0.2, sgd=optimizer) nlp.to_disk("output/model-last") # nlp.to_disk("model/") # Speichert das Modell if __name__ == "__main__": main()