Formatierungsänderungen durch black, jetzt endgültig committen
parent
10e2996039
commit
d22572cc44
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,18 @@
|
|||
import os
|
||||
import json
|
||||
from training_data import TRAINING_DATA
|
||||
|
||||
# Setze hier den Pfad zu annotation_data.json
|
||||
OUTFILE = os.path.join(os.path.dirname(__file__), "annotation_data.json")
|
||||
|
||||
json_list = []
|
||||
for text, annot in TRAINING_DATA:
|
||||
entities = []
|
||||
for start, end, label in annot["entities"]:
|
||||
entities.append([start, end, label])
|
||||
json_list.append({"text": text, "entities": entities})
|
||||
|
||||
with open(OUTFILE, "w", encoding="utf8") as f:
|
||||
json.dump(json_list, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print("Alle Trainingsdaten wurden erfolgreich nach annotation_data.json migriert!")
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
import json
|
||||
|
||||
# Alte Daten laden
|
||||
with open("annotation_data.json", "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Neue Kennzahl (als Dict/Objekt)
|
||||
neuer_eintrag = {
|
||||
"text": "Hier steht der Beispielsatz mit der neuen Kennzahl.",
|
||||
"entities": [[1, 5, "NEUEKENNZAHL"]],
|
||||
}
|
||||
|
||||
# Anhängen
|
||||
data.append(neuer_eintrag)
|
||||
|
||||
# Wieder speichern
|
||||
with open("annotation_data.json", "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
import spacy
|
||||
from spacy.training.example import Example
|
||||
import json
|
||||
|
||||
|
||||
def load_data(file_path):
|
||||
with open(file_path, "r", encoding="utf8") as f:
|
||||
raw = json.load(f)
|
||||
TRAIN_DATA = []
|
||||
for entry in raw:
|
||||
text = entry["text"]
|
||||
entities = [(start, end, label) for start, end, label in entry["entities"]]
|
||||
TRAIN_DATA.append((text, {"entities": entities}))
|
||||
return TRAIN_DATA
|
||||
|
||||
|
||||
def main():
|
||||
TRAIN_DATA = load_data("annotation_data.json")
|
||||
nlp = spacy.blank("de")
|
||||
ner = nlp.add_pipe("ner")
|
||||
ner.add_label("KENNZAHL")
|
||||
|
||||
optimizer = nlp.begin_training()
|
||||
for i in range(20):
|
||||
for text, annotations in TRAIN_DATA:
|
||||
example = Example.from_dict(nlp.make_doc(text), annotations)
|
||||
nlp.update([example], drop=0.2, sgd=optimizer)
|
||||
|
||||
nlp.to_disk("output/model-last")
|
||||
|
||||
# nlp.to_disk("model/") # Speichert das Modell
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"id": "TEST",
|
||||
"extracted_text_per_page": [
|
||||
{
|
||||
"page": 1,
|
||||
"text": "Die Gesamtrendite beträgt 7,2 %."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -57,6 +57,8 @@ services:
|
|||
- VALIDATE_SERVICE_URL=http://validate:5000/validate
|
||||
ports:
|
||||
- 5052:5052
|
||||
volumes:
|
||||
- ./backend/spacy-service/spacy_training:/app/spacy_training
|
||||
|
||||
exxeta:
|
||||
build:
|
||||
|
|
|
|||
Loading…
Reference in New Issue