pse2_ff/project/backend/spacy-service/spacy_training/ner_trainer.py

36 lines
934 B
Python

import spacy
from spacy.training.example import Example
import json
def load_data(file_path):
with open(file_path, "r", encoding="utf8") as f:
raw = json.load(f)
TRAIN_DATA = []
for entry in raw:
text = entry["text"]
entities = [(start, end, label) for start, end, label in entry["entities"]]
TRAIN_DATA.append((text, {"entities": entities}))
return TRAIN_DATA
def main():
TRAIN_DATA = load_data("annotation_data.json")
nlp = spacy.blank("de")
ner = nlp.add_pipe("ner")
ner.add_label("KENNZAHL")
optimizer = nlp.begin_training()
for i in range(20):
for text, annotations in TRAIN_DATA:
example = Example.from_dict(nlp.make_doc(text), annotations)
nlp.update([example], drop=0.2, sgd=optimizer)
nlp.to_disk("output/model-last")
# nlp.to_disk("model/") # Speichert das Modell
if __name__ == "__main__":
main()