Formatierungsänderungen durch black, jetzt endgültig committen

pull/94/head
Abdulrahman Dabbagh 2025-06-16 14:27:35 +02:00
parent 10e2996039
commit d22572cc44
6 changed files with 1734 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,18 @@
import os
import json
from training_data import TRAINING_DATA
# Setze hier den Pfad zu annotation_data.json
OUTFILE = os.path.join(os.path.dirname(__file__), "annotation_data.json")
json_list = []
for text, annot in TRAINING_DATA:
entities = []
for start, end, label in annot["entities"]:
entities.append([start, end, label])
json_list.append({"text": text, "entities": entities})
with open(OUTFILE, "w", encoding="utf8") as f:
json.dump(json_list, f, ensure_ascii=False, indent=2)
print("Alle Trainingsdaten wurden erfolgreich nach annotation_data.json migriert!")

View File

@ -0,0 +1,18 @@
import json
# Alte Daten laden
with open("annotation_data.json", "r", encoding="utf-8") as f:
data = json.load(f)
# Neue Kennzahl (als Dict/Objekt)
neuer_eintrag = {
"text": "Hier steht der Beispielsatz mit der neuen Kennzahl.",
"entities": [[1, 5, "NEUEKENNZAHL"]],
}
# Anhängen
data.append(neuer_eintrag)
# Wieder speichern
with open("annotation_data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)

View File

@ -0,0 +1,35 @@
import spacy
from spacy.training.example import Example
import json
def load_data(file_path):
with open(file_path, "r", encoding="utf8") as f:
raw = json.load(f)
TRAIN_DATA = []
for entry in raw:
text = entry["text"]
entities = [(start, end, label) for start, end, label in entry["entities"]]
TRAIN_DATA.append((text, {"entities": entities}))
return TRAIN_DATA
def main():
TRAIN_DATA = load_data("annotation_data.json")
nlp = spacy.blank("de")
ner = nlp.add_pipe("ner")
ner.add_label("KENNZAHL")
optimizer = nlp.begin_training()
for i in range(20):
for text, annotations in TRAIN_DATA:
example = Example.from_dict(nlp.make_doc(text), annotations)
nlp.update([example], drop=0.2, sgd=optimizer)
nlp.to_disk("output/model-last")
# nlp.to_disk("model/") # Speichert das Modell
if __name__ == "__main__":
main()

View File

@ -0,0 +1,9 @@
{
"id": "TEST",
"extracted_text_per_page": [
{
"page": 1,
"text": "Die Gesamtrendite beträgt 7,2 %."
}
]
}

View File

@ -57,6 +57,8 @@ services:
- VALIDATE_SERVICE_URL=http://validate:5000/validate - VALIDATE_SERVICE_URL=http://validate:5000/validate
ports: ports:
- 5052:5052 - 5052:5052
volumes:
- ./backend/spacy-service/spacy_training:/app/spacy_training
exxeta: exxeta:
build: build: