36 lines
934 B
Python
36 lines
934 B
Python
import spacy
|
|
from spacy.training.example import Example
|
|
import json
|
|
|
|
|
|
def load_data(file_path):
|
|
with open(file_path, "r", encoding="utf8") as f:
|
|
raw = json.load(f)
|
|
TRAIN_DATA = []
|
|
for entry in raw:
|
|
text = entry["text"]
|
|
entities = [(start, end, label) for start, end, label in entry["entities"]]
|
|
TRAIN_DATA.append((text, {"entities": entities}))
|
|
return TRAIN_DATA
|
|
|
|
|
|
def main():
|
|
TRAIN_DATA = load_data("annotation_data.json")
|
|
nlp = spacy.blank("de")
|
|
ner = nlp.add_pipe("ner")
|
|
ner.add_label("KENNZAHL")
|
|
|
|
optimizer = nlp.begin_training()
|
|
for i in range(20):
|
|
for text, annotations in TRAIN_DATA:
|
|
example = Example.from_dict(nlp.make_doc(text), annotations)
|
|
nlp.update([example], drop=0.2, sgd=optimizer)
|
|
|
|
nlp.to_disk("output/model-last")
|
|
|
|
# nlp.to_disk("model/") # Speichert das Modell
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|