diff --git a/project/backend/coordinator/controller/spacy_controller.py b/project/backend/coordinator/controller/spacy_controller.py index edcf653..97b64aa 100644 --- a/project/backend/coordinator/controller/spacy_controller.py +++ b/project/backend/coordinator/controller/spacy_controller.py @@ -5,21 +5,17 @@ import puremagic from werkzeug.utils import secure_filename from model.database import db import os -import json import requests spacy_controller = Blueprint("spacy", __name__, url_prefix="/api/spacy") SPACY_TRAINING_URL = os.getenv("SPACY_TRAINING_URL", "http://spacy:5052/train") -training_running_flag_path = os.path.join("spacy_training", "training_running.json") +SPACY_URL = os.getenv("SPACY_URL", "http://spacy:5052") @spacy_controller.route("/train", methods=["POST"]) def trigger_training(): try: - with open(training_running_flag_path, "w") as f: - json.dump({"running": True}, f) - response = requests.post(SPACY_TRAINING_URL, timeout=600) if response.ok: return jsonify({"message": "Training erfolgreich angestoßen."}), 200 @@ -109,48 +105,29 @@ def delete_file(id): @spacy_controller.route("/append-training-entry", methods=["POST"]) -def append_training_entry(): +def forward_training_entry(): entry = request.get_json() - - if not entry or "text" not in entry or "entities" not in entry: - return ( - jsonify( - {"error": "Ungültiges Format – 'text' und 'entities' erforderlich."} - ), - 400, - ) - - path = os.path.join("spacy_training", "annotation_data.json") - try: - os.makedirs(os.path.dirname(path), exist_ok=True) - if os.path.exists(path): - with open(path, "r", encoding="utf-8") as f: - data = json.load(f) - else: - data = [] - - if entry in data: - return jsonify({"message": "Eintrag existiert bereits."}), 200 - - data.append(entry) - with open(path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2, ensure_ascii=False) - - return jsonify({"message": "Eintrag erfolgreich gespeichert."}), 200 + response = requests.post(f"{SPACY_URL}/append-training-entry", json=entry) + return jsonify(response.json()), response.status_code except Exception as e: - print(f"[ERROR] Fehler beim Schreiben: {e}") - return jsonify({"error": "Interner Fehler beim Schreiben."}), 500 + return jsonify({"error": str(e)}), 500 + + +# globale Variable oben einfügen +current_training_status = {"running": False} + + +@spacy_controller.route("/training/status", methods=["POST"]) +def update_training_status(): + data = request.get_json() + current_training_status["running"] = data.get("running", False) + running = current_training_status["running"] + print(f"[INFO] Trainingsstatus aktualisiert: running = {running}") + + return jsonify({"status": "success", "running": current_training_status["running"]}) @spacy_controller.route("/train-status", methods=["GET"]) def training_status(): - try: - if os.path.exists(training_running_flag_path): - with open(training_running_flag_path, "r") as f: - status = json.load(f) - return jsonify(status), 200 - else: - return jsonify({"running": False}), 200 - except Exception as e: - return jsonify({"error": "Fehler beim Statuscheck", "details": str(e)}), 500 + return jsonify(current_training_status), 200 diff --git a/project/backend/exxetaGPT-service/Dockerfile b/project/backend/exxetaGPT-service/Dockerfile deleted file mode 100644 index c2b6203..0000000 --- a/project/backend/exxetaGPT-service/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM python:3.11-slim - -WORKDIR /app -COPY . /app -RUN pip install --no-cache-dir -r requirements.txt -ENV PYTHONUNBUFFERED=1 - -CMD ["python", "extractExxeta.py"] diff --git a/project/backend/spacy-service/app.py b/project/backend/spacy-service/app.py index 67da9ef..706f7fc 100644 --- a/project/backend/spacy-service/app.py +++ b/project/backend/spacy-service/app.py @@ -14,7 +14,7 @@ training_status = {"running": False} app = Flask(__name__) CORS(app) - +COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://coordinator:5000") VALIDATE_SERVICE_URL = os.getenv( "VALIDATE_SERVICE_URL", "http://localhost:5054/validate" ) @@ -88,31 +88,11 @@ def append_training_entry(): @app.route("/train", methods=["POST"]) def trigger_training(): from threading import Thread - import subprocess - import shutil - - def run_training(): - training_status["running"] = True - try: - if os.path.exists("output/model-last"): - shutil.copytree( - "output/model-last", "output/model-backup", dirs_exist_ok=True - ) - subprocess.run(["python", "spacy_training/ner_trainer.py"], check=True) - load_model() - except Exception as e: - print("Training failed:", e) - training_status["running"] = False Thread(target=run_training).start() return jsonify({"message": "Training gestartet"}), 200 -@app.route("/train-status", methods=["GET"]) -def get_training_status(): - return jsonify(training_status), 200 - - @app.route("/reload-model", methods=["POST"]) def reload_model(): try: @@ -127,16 +107,32 @@ def reload_model(): def run_training(): training_status["running"] = True + notify_coordinator(True) + try: if os.path.exists("output/model-last"): shutil.copytree( "output/model-last", "output/model-backup", dirs_exist_ok=True ) subprocess.run(["python", "spacy_training/ner_trainer.py"], check=True) - load_model() # ⬅ Modell nach dem Training direkt neu laden + load_model() except Exception as e: print("Training failed:", e) training_status["running"] = False + notify_coordinator(False) + + +def notify_coordinator(running: bool): + try: + response = requests.post( + f"{COORDINATOR_URL}/api/spacy/training/status", json={"running": running} + ) + print( + f"[SPACY] Coordinator: running = {running}, Status = {response.status_code}" + ) + + except Exception as e: + print(f"[SPACY] Fehler beim Senden des Trainingsstatus: {e}") if __name__ == "__main__": diff --git a/project/backend/spacy-service/spacy_training/annotation_data.json b/project/backend/spacy-service/spacy_training/annotation_data.json index b76f4fe..1aab418 100644 --- a/project/backend/spacy-service/spacy_training/annotation_data.json +++ b/project/backend/spacy-service/spacy_training/annotation_data.json @@ -1638,185 +1638,5 @@ "RENDITE" ] ] - }, - { - "text": "Die Gesamtrendite beträgt 7,2 %.", - "entities": [ - [ - 1, - 5, - "NEUEKENNZAHL" - ] - ] - }, - { - "text": "fhfhfh56", - "entities": [ - [ - 6, - 8, - "TEST545" - ] - ] - }, - { - "text": "fhfhfh56", - "entities": [ - [ - 6, - 8, - "TEST345" - ] - ] - }, - { - "text": "sdgds45", - "entities": [ - [ - 6, - 7, - "TEST243" - ] - ] - }, - { - "text": "4t4r3", - "entities": [ - [ - 4, - 5, - "TEST243" - ] - ] - }, - { - "text": "sdgds45", - "entities": [ - [ - 6, - 7, - "DGTDDTFHZ" - ] - ] - }, - { - "text": "gjufzj45", - "entities": [ - [ - 7, - 8, - "DGTDDTFHZ" - ] - ] - }, - { - "text": "irr beträgt 43", - "entities": [ - [ - 12, - 14, - "TEST3243" - ] - ] - }, - { - "text": "irr beträgt 43", - "entities": [ - [ - 12, - 14, - "IRR" - ] - ] - }, - { - "text": "Rendite besträgt 5 %", - "entities": [ - [ - 17, - 20, - "RENDITE" - ] - ] - }, - { - "text": "RenditeX besträgt 5 %", - "entities": [ - [ - 18, - 21, - "RENDITE_X" - ] - ] - }, - { - "text": "gtg3ahz8", - "entities": [ - [ - 7, - 8, - "ERTRETT" - ] - ] - }, - { - "text": "wffwee 45", - "entities": [ - [ - 7, - 9, - "TEST45" - ] - ] - }, - { - "text": "efwwef 45", - "entities": [ - [ - 7, - 9, - "TEST12" - ] - ] - }, - { - "text": "wfwefwe34", - "entities": [ - [ - 7, - 9, - "TEST232" - ] - ] - }, - { - "text": "fwefbmj34", - "entities": [ - [ - 7, - 9, - "TEST223" - ] - ] - }, - { - "text": "asdas45", - "entities": [ - [ - 5, - 7, - "TEST122" - ] - ] - }, - { - "text": "ewefw4", - "entities": [ - [ - 5, - 6, - "TEST3434" - ] - ] } ] \ No newline at end of file diff --git a/project/backend/spacy-service/spacy_training/base_config.cfg b/project/backend/spacy-service/spacy_training/base_config.cfg deleted file mode 100644 index f7ffac0..0000000 --- a/project/backend/spacy-service/spacy_training/base_config.cfg +++ /dev/null @@ -1,85 +0,0 @@ -# This is an auto-generated partial config. To use it with 'spacy train' -# you can run spacy init fill-config to auto-fill all default settings: -# python -m spacy init fill-config ./base_config.cfg ./config.cfg -[paths] -train = ./data/train.spacy -dev = ./data/train.spacy -vectors = null -[system] -gpu_allocator = null - -[nlp] -lang = "de" -pipeline = ["tok2vec","ner"] -batch_size = 1000 - -[components] - -[components.tok2vec] -factory = "tok2vec" - -[components.tok2vec.model] -@architectures = "spacy.Tok2Vec.v2" - -[components.tok2vec.model.embed] -@architectures = "spacy.MultiHashEmbed.v2" -width = ${components.tok2vec.model.encode.width} -attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"] -rows = [5000, 1000, 2500, 2500] -include_static_vectors = false - -[components.tok2vec.model.encode] -@architectures = "spacy.MaxoutWindowEncoder.v2" -width = 96 -depth = 4 -window_size = 1 -maxout_pieces = 3 - -[components.ner] -factory = "ner" - -[components.ner.model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "ner" -extra_state_tokens = false -hidden_width = 64 -maxout_pieces = 2 -use_upper = true -nO = null - -[components.ner.model.tok2vec] -@architectures = "spacy.Tok2VecListener.v1" -width = ${components.tok2vec.model.encode.width} - -[corpora] - -[corpora.train] -@readers = "spacy.Corpus.v1" -path = ${paths.train} -max_length = 0 - -[corpora.dev] -@readers = "spacy.Corpus.v1" -path = ${paths.dev} -max_length = 0 - -[training] -dev_corpus = "corpora.dev" -train_corpus = "corpora.train" - -[training.optimizer] -@optimizers = "Adam.v1" - -[training.batcher] -@batchers = "spacy.batch_by_words.v1" -discard_oversize = false -tolerance = 0.2 - -[training.batcher.size] -@schedules = "compounding.v1" -start = 100 -stop = 1000 -compound = 1.001 - -[initialize] -vectors = ${paths.vectors} \ No newline at end of file diff --git a/project/backend/spacy-service/spacy_training/config.cfg b/project/backend/spacy-service/spacy_training/config.cfg deleted file mode 100644 index 1cf80c4..0000000 --- a/project/backend/spacy-service/spacy_training/config.cfg +++ /dev/null @@ -1,145 +0,0 @@ -[paths] -train = "./data/train.spacy" -dev = "./data/train.spacy" -vectors = null -init_tok2vec = null - -[system] -gpu_allocator = null -seed = 0 - -[nlp] -lang = "de" -pipeline = ["tok2vec","ner"] -batch_size = 1000 -disabled = [] -before_creation = null -after_creation = null -after_pipeline_creation = null -tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"} -vectors = {"@vectors":"spacy.Vectors.v1"} - -[components] - -[components.ner] -factory = "ner" -incorrect_spans_key = null -moves = null -scorer = {"@scorers":"spacy.ner_scorer.v1"} -update_with_oracle_cut_size = 100 - -[components.ner.model] -@architectures = "spacy.TransitionBasedParser.v2" -state_type = "ner" -extra_state_tokens = false -hidden_width = 64 -maxout_pieces = 2 -use_upper = true -nO = null - -[components.ner.model.tok2vec] -@architectures = "spacy.Tok2VecListener.v1" -width = ${components.tok2vec.model.encode.width} -upstream = "*" - -[components.tok2vec] -factory = "tok2vec" - -[components.tok2vec.model] -@architectures = "spacy.Tok2Vec.v2" - -[components.tok2vec.model.embed] -@architectures = "spacy.MultiHashEmbed.v2" -width = ${components.tok2vec.model.encode.width} -attrs = ["NORM","PREFIX","SUFFIX","SHAPE"] -rows = [5000,1000,2500,2500] -include_static_vectors = false - -[components.tok2vec.model.encode] -@architectures = "spacy.MaxoutWindowEncoder.v2" -width = 96 -depth = 4 -window_size = 1 -maxout_pieces = 3 - -[corpora] - -[corpora.dev] -@readers = "spacy.Corpus.v1" -path = ${paths.dev} -max_length = 0 -gold_preproc = false -limit = 0 -augmenter = null - -[corpora.train] -@readers = "spacy.Corpus.v1" -path = ${paths.train} -max_length = 0 -gold_preproc = false -limit = 0 -augmenter = null - -[training] -dev_corpus = "corpora.dev" -train_corpus = "corpora.train" -seed = ${system.seed} -gpu_allocator = ${system.gpu_allocator} -dropout = 0.1 -accumulate_gradient = 1 -patience = 1600 -max_epochs = 0 -max_steps = 20000 -eval_frequency = 200 -frozen_components = [] -annotating_components = [] -before_to_disk = null -before_update = null - -[training.batcher] -@batchers = "spacy.batch_by_words.v1" -discard_oversize = false -tolerance = 0.2 -get_length = null - -[training.batcher.size] -@schedules = "compounding.v1" -start = 100 -stop = 1000 -compound = 1.001 -t = 0.0 - -[training.logger] -@loggers = "spacy.ConsoleLogger.v1" -progress_bar = false - -[training.optimizer] -@optimizers = "Adam.v1" -beta1 = 0.9 -beta2 = 0.999 -L2_is_weight_decay = true -L2 = 0.01 -grad_clip = 1.0 -use_averages = false -eps = 0.00000001 -learn_rate = 0.001 - -[training.score_weights] -ents_f = 1.0 -ents_p = 0.0 -ents_r = 0.0 -ents_per_type = null - -[pretraining] - -[initialize] -vectors = ${paths.vectors} -init_tok2vec = ${paths.init_tok2vec} -vocab_data = null -lookups = null -before_init = null -after_init = null - -[initialize.components] - -[initialize.tokenizer] \ No newline at end of file diff --git a/project/backend/spacy-service/spacy_training/data_to_json.py b/project/backend/spacy-service/spacy_training/data_to_json.py deleted file mode 100644 index e2d84d4..0000000 --- a/project/backend/spacy-service/spacy_training/data_to_json.py +++ /dev/null @@ -1,18 +0,0 @@ -import os -import json -from training_data import TRAINING_DATA - -# Setze hier den Pfad zu annotation_data.json -OUTFILE = os.path.join(os.path.dirname(__file__), "annotation_data.json") - -json_list = [] -for text, annot in TRAINING_DATA: - entities = [] - for start, end, label in annot["entities"]: - entities.append([start, end, label]) - json_list.append({"text": text, "entities": entities}) - -with open(OUTFILE, "w", encoding="utf8") as f: - json.dump(json_list, f, ensure_ascii=False, indent=2) - -print("Alle Trainingsdaten wurden erfolgreich nach annotation_data.json migriert!") diff --git a/project/backend/spacy-service/spacy_training/output/model-last/meta.json b/project/backend/spacy-service/spacy_training/output/model-last/meta.json index a92a61a..8a188d0 100644 --- a/project/backend/spacy-service/spacy_training/output/model-last/meta.json +++ b/project/backend/spacy-service/spacy_training/output/model-last/meta.json @@ -19,16 +19,13 @@ "labels":{ "ner":[ "AUSSCH\u00dcTTUNGSRENDITE", - "IRR", "KENNZAHL", "LAUFZEIT", "L\u00c4NDERALLOKATION", "MANAGMENTGEB\u00dcHREN", "RENDITE", - "RENDITE_X", "RISIKOPROFIL", "SEKTORENALLOKATION", - "TEST3243", "ZIELAUSSCH\u00dcTTUNG", "ZIELRENDITE" ] diff --git a/project/backend/spacy-service/spacy_training/output/model-last/ner/model b/project/backend/spacy-service/spacy_training/output/model-last/ner/model index baba9fa..303ffe1 100644 Binary files a/project/backend/spacy-service/spacy_training/output/model-last/ner/model and b/project/backend/spacy-service/spacy_training/output/model-last/ner/model differ diff --git a/project/backend/spacy-service/spacy_training/output/model-last/ner/moves b/project/backend/spacy-service/spacy_training/output/model-last/ner/moves index 9f82843..48f4d53 100644 --- a/project/backend/spacy-service/spacy_training/output/model-last/ner/moves +++ b/project/backend/spacy-service/spacy_training/output/model-last/ner/moves @@ -1 +1 @@ -moves,{"0":{},"1":{"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10,"TEST3243":-11,"IRR":-12,"RENDITE_X":-13},"2":{"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10,"TEST3243":-11,"IRR":-12,"RENDITE_X":-13},"3":{"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10,"TEST3243":-11,"IRR":-12,"RENDITE_X":-13},"4":{"":1,"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10,"TEST3243":-11,"IRR":-12,"RENDITE_X":-13},"5":{"":1}}cfgneg_key \ No newline at end of file +moves{"0":{},"1":{"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10},"2":{"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10},"3":{"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10},"4":{"":1,"KENNZAHL":-1,"RISIKOPROFIL":-2,"AUSSCH\u00dcTTUNGSRENDITE":-3,"LAUFZEIT":-4,"RENDITE":-5,"L\u00c4NDERALLOKATION":-6,"ZIELRENDITE":-7,"ZIELAUSSCH\u00dcTTUNG":-8,"MANAGMENTGEB\u00dcHREN":-9,"SEKTORENALLOKATION":-10},"5":{"":1}}cfgneg_key \ No newline at end of file diff --git a/project/backend/spacy-service/spacy_training/output/model-last/vocab/strings.json b/project/backend/spacy-service/spacy_training/output/model-last/vocab/strings.json index 0d8c921..e54596f 100644 --- a/project/backend/spacy-service/spacy_training/output/model-last/vocab/strings.json +++ b/project/backend/spacy-service/spacy_training/output/model-last/vocab/strings.json @@ -273,11 +273,8 @@ "4,91", "40", "400", - "43", "45", "491", - "4r3", - "4t4r3", "5", "5%+", "5,0", @@ -308,7 +305,6 @@ "67", "7", "7,1", - "7,2", "7,5", "7,5%+", "7,50", @@ -1069,7 +1065,6 @@ "R.I.P.", "RE", "RENDITE", - "RENDITE_X", "REV", "REWE", "RISIKOPROFIL", @@ -1084,10 +1079,8 @@ "Redaktion", "Region", "Regionen", - "Rendite", "Rendite-", "Rendite-Risiko-Profil", - "RenditeX", "Renovierungen", "Rents", "Residential", @@ -1167,7 +1160,6 @@ "T", "T.", "TED", - "TEST3243", "Tag", "Target", "Target-IRR", @@ -1308,7 +1300,6 @@ "Xxxxx-Xxxxx-Xxxxx", "Xxxxx-xxx", "Xxxxx-xxxx", - "XxxxxX", "Xxxxx\u0308xx", "Xxxxx\u0308xxx-Xxxxx", "Xxxxx\u0308xxxx", @@ -1403,6 +1394,7 @@ "across", "act", "active", + "adasd23", "add", "adv", "adv.", @@ -1559,11 +1551,9 @@ "berlin", "bestandsentwicklung", "bestandsentwicklungen", - "bestr\u00e4gt", "betr", "betr.", "betreute", - "betr\u00e4gt", "bev\u00f6lkerungsprognose", "beziehungsweise", "bez\u00fcglich", @@ -1694,6 +1684,7 @@ "d.h", "d.h.", "d.x", + "d23", "dX", "dXxx.\u20ac", "d_d", @@ -1774,7 +1765,6 @@ "durchschnittlich", "du\u2019s", "dv.", - "dxdxd", "dy", "d\u00e4nemark", "d\u2019", @@ -1939,7 +1929,6 @@ "festgelegt", "festgelegter", "ff", - "fhfhfh56", "fierce", "fil", "financially", @@ -2040,7 +2029,6 @@ "ght", "gic", "gie", - "gjufzj45", "gl.", "global", "globale", @@ -2062,7 +2050,6 @@ "h.", "h.c", "h.c.", - "h56", "haltedauer", "halten", "halten-strategie", @@ -2262,7 +2249,6 @@ "ize", "j", "j.", - "j45", "ja", "jahr", "jahre", @@ -2743,10 +2729,8 @@ "relationships", "remains", "ren", - "rendite", "rendite-", "rendite-risiko-profil", - "renditex", "renegotiation", "renovierungen", "rent", @@ -2803,7 +2787,6 @@ "s.o", "s.o.", "s.w", - "s45", "sa", "sa.", "sale", @@ -2814,7 +2797,6 @@ "scs", "scsp", "sd.", - "sdgds45", "sector", "sectors", "sed", @@ -2920,7 +2902,6 @@ "tc.", "td.", "te-", - "teX", "ted", "tee", "teflimmobilfe)-", @@ -3246,7 +3227,6 @@ "\u00e4", "\u00e4.", "\u00e4gl", - "\u00e4gt", "\u00e4r.", "\u00e4rzteh\u00e4user", "\u00e4rzteh\u00e4usern", diff --git a/project/backend/spacy-service/spacy_training/test.json b/project/backend/spacy-service/spacy_training/test.json deleted file mode 100644 index 818a8fd..0000000 --- a/project/backend/spacy-service/spacy_training/test.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "TEST", - "extracted_text_per_page": [ - { - "page": 1, - "text": "Die Gesamtrendite beträgt 7,2 %." - } - ] -} diff --git a/project/backend/spacy-service/spacy_training/training_data.py b/project/backend/spacy-service/spacy_training/training_data.py deleted file mode 100644 index a7f58f4..0000000 --- a/project/backend/spacy-service/spacy_training/training_data.py +++ /dev/null @@ -1,563 +0,0 @@ -TRAINING_DATA = [ - ( - "Core", - {"entities": [[0, 4, "RISIKOPROFIL"]]}, - ), - ( - "Core+", - {"entities": [[0, 5, "RISIKOPROFIL"]]}, - ), - ( - "Core/Core+", - {"entities": [[0, 10, "RISIKOPROFIL"]]}, - ), - ( - "Value Add", - {"entities": [[0, 9, "RISIKOPROFIL"]]}, - ), - ( - "Core/Value Add", - {"entities": [[0, 14, "RISIKOPROFIL"]]}, - ), - ( - "Core+/Value Add", - {"entities": [[0, 15, "RISIKOPROFIL"]]}, - ), - ( - "Core/Core+/Value Add", - {"entities": [[0, 20, "RISIKOPROFIL"]]}, - ), - ( - "The RE portfolio of the fund is a good illustration of Fond expertise in European core/core+ investments .", - {"entities": [[82, 92, "RISIKOPROFIL"]]}, - ), - ( - "Risk level: Core/Core+", - {"entities": [[12, 22, "RISIKOPROFIL"]]}, - ), - ( - "Different risk profile (core, core+, value-added)", - {"entities": [[24, 48, "RISIKOPROFIL"]]}, - ), - ( - "Core/Core+ with OpCo premium", - {"entities": [[0, 10, "RISIKOPROFIL"]]}, - ), - ( - "Core /Core+ Assets, well-established = Key Gateway Cities in Europe le.g. hotels in the market with minor asset London, Paris, Amsterdam, Berlin] management initiatives", - {"entities": [[0, 11, "RISIKOPROFIL"]]}, - ), - ( - "Risikoprofil: Core, Core +", - {"entities": [[14, 26, "RISIKOPROFIL"]]}, - ), - ( - "Name des Fonds Name des Investmentmanagers Allgemeine Informationen Name des Ansprechpartners Telefonnummer des Ansprechpartners E-Mail des Ansprechpartners Art des Anlagevehikels Struktur des Anlagevehikels Sitz des Anlagevehikels Struktur des Antagevehikels vom Manager festgelegter Stil Rechtsform Jahr des ersten Closings Laufzeit Geplantes Jahr der Auflösung Ziel-Netto-IRR / Gesamtrendite* Zielvolumen des Anlagevehikels Ziel-LTY ‚Aktueller LTV Ziirraiaein Maximaler LTV Zielregionfen)/Jand Zielsektoren Zielanlagestrategie INREV Fonds Offen Deutschland Core, Core + Offener Immobilien-Spezialfonds 2022 10 - 12 Jahre 2032 - 2034 7,50%+ 250 Mio. € 20% 0% 20% Führende Metropolregionen Deutschlands und ausgewählte Standorte >50T Einw. Wohnimmobilien Wertstabile Wohnimmobilien (mit Bestandsentwicklungen)", - {"entities": [[560, 572, "RISIKOPROFIL"]]}, - ), - ( - "Core/Core+ strategy, with tactical exposure to development projects aiming at enhancing the quality of the portfolio over time", - {"entities": [[0, 10, "RISIKOPROFIL"]]}, - ), - ( - "Strategie - Übersicht Risikoprofil Core+ Halten-Strategie Kaufen — Halten (langfristig) — Exit 1. Nachvermietungsstrategie Anlagestrategien 2. Standortaufwertungsstrategie 3. Strategie der Aufwertung der Immobilien Niederlande (max. 35 %) Länderallokation Frankreich (max. 35 %) (in % vom Zielvolumen) Skandinavien (Schweden, Dänemark) (max. 35 %) Deutschland (<= 10 %)", - {"entities": [[35, 40, "RISIKOPROFIL"]]}, - ), - ( - "Core and Core+", - {"entities": [[0, 14, "RISIKOPROFIL"]]}, - ), - ( - "core, core+, value-added", - {"entities": [[0, 24, "RISIKOPROFIL"]]}, - ), - ( - "Manage to Core: max 20%", - {"entities": [[10, 14, "RISIKOPROFIL"]]}, - ), - ( - "Benefits of the core/ core+ segment", - {"entities": [[16, 27, "RISIKOPROFIL"]]}, - ), - ( - "Drawbacks of the core/ core+ segment", - {"entities": [[17, 28, "RISIKOPROFIL"]]}, - ), - ( - "Why a Core / Core + investment program?", - {"entities": [[6, 19, "RISIKOPROFIL"]]}, - ), - ( - "Different risk profile (core, core+, value-added)", - {"entities": [[24, 48, "RISIKOPROFIL"]]}, - ), - ( - "INK MGallery Hotel Area: Amsterdam Core Tenant: Closed in 2018", - {"entities": [[35, 39, "RISIKOPROFIL"]]}, - ), - ( - "A strategy targeting high quality Core and Core+ buildings, with defined SRI objectives, in order to extract value through an active asset management.", - {"entities": [[34, 48, "RISIKOPROFIL"]]}, - ), - ( - "Navigate the diversity of the Core/Core+ investment opportunities in European Prime Cities", - {"entities": [[30, 40, "RISIKOPROFIL"]]}, - ), - ( - "GEDis an open-ended Lux-based fund providing an attractive core/core+ real estate exposure, leveraging GRRE expertise in European RE markets. It offers diversification in terms of pan-European geographies and sectors: Offices, Retail and Hotels.", - {"entities": [[59, 69, "RISIKOPROFIL"]]}, - ), - ( - "Core assets leave less room for active asset management value creation", - {"entities": [[0, 4, "RISIKOPROFIL"]]}, - ), - ( - "capital preservation is defined here as a characteristic of core/core+ investments. There is no guarantee of capital.", - {"entities": [[60, 70, "RISIKOPROFIL"]]}, - ), - ( - "Country / city BELGIUM Brussels BELGIUM Brussels SPAIN Madrid FRANCE Levallois FRANCE Paris 14 BELGIUM Brussels NETHERLANDS Rotterdam NETHERLANDS Rotterdam Sector Offices Offices Offices Offices Offices Offices Offices Logistics Risk Core", - {"entities": [[234, 238, "RISIKOPROFIL"]]}, - ), - ( - "GERD(a balanced pan-European open ended retail fund — under the form of a French collective undertaking for Real Estate investments “OPCI”) is the flagship ofQin France and combines RE and listed assets (respective targets of 60% and 40%) with max. 40% leverage. The RE portfolio of the fund is a good illustration Of expertise in European core/core+ investments.", - {"entities": [[340, 350, "RISIKOPROFIL"]]}, - ), - ( - "Prime office assets in Prime markets are very pricey unless rent reversion is real. Risk premium remains attractive on a leveraged basis. Manage to core or build to core can make sense as a LT investor in main cities. Residential is also attractive", - {"entities": [[148, 152, "RISIKOPROFIL"]]}, - ), - ( - "Paris region is a deep and liquid market. Rents have some potential to improve. Considering current low yield and fierce competition, office right outside CBD for Core + assets can be considered. Manage to core strategies could make sense.", - {"entities": [[163, 169, "RISIKOPROFIL"]]}, - ), - ( - "Lisbon is a small market but it experienced a rapid economic recovery in recent years and is interesting for Core Offices, quality Retail assetor Hotel walls with top operators. Limited liquidity of this market means investment must be small", - {"entities": [[109, 113, "RISIKOPROFIL"]]}, - ), - ( - "4,0 %", - {"entities": [[0, 5, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Prognostizierte jährliche Ausschüttung von 4,0%", - {"entities": [[44, 48, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "20% über einer @ Ausschüttungsrendite von 4,0%", - {"entities": [[44, 48, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Prognostizierte Ausschüttungsrandite* Mindestanlage Mitgliedschaft Im Anlagesusschuss Ankaufs- / Verkaufs- / Verkaufs(Teflimmobilfe)- / Baumanagementgebahr (inkl. USt.) Parformanceabhängige Vergütung Einmalige Strukturierungsgebühr Laufzeit / Investtionszeltraum Ausschüttungsintervalle Deutsche Metropolregianen und umliegende Regionen mit Städten >50T Einwohner Artikel 8 Wohnimmobilien Deutschland ‚Aktive Bestandsentwicklung Offener Spezial-AlF mit festen Anlagebedingungen rd. 200 Mio. € / max. 20% rd. 250 Mio. € 7,5 % (nach Kosten & Gebühren, vor Steuern) 8 4,0 % {nach Kosten & Gebühren, var Steuern}", - {"entities": [[570, 575, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "5,00-5,25 % Ausschüttungsrendite", - {"entities": [[0, 11, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Zielrendite 5,00-5,25 % Ausschüttungsrendite", - {"entities": [[12, 23, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite 4,9% 5,3%", - {"entities": [[21, 25, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite 4,9% 5,3%", - {"entities": [[26, 30, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschittungsrendite 3,8% 5,7%", - {"entities": [[20, 24, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschittungsrendite 3,8% 5,7%", - {"entities": [[25, 29, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite 4,5% 4,6%", - {"entities": [[21, 25, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite 4,5% 4,6%", - {"entities": [[26, 30, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite 5,0% 4,7%", - {"entities": [[26, 30, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite 5,0% 4,7%", - {"entities": [[21, 25, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite “eons a Nuremberg aha 5,0 % 4,8 %", - {"entities": [[43, 48, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Auschüttungsrendite “eons a Nuremberg aha 5,0 % 4,8 %", - {"entities": [[49, 54, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "3-4% dividend yield", - {"entities": [[0, 4, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Zielmärkte Klassifizierung SFDR Invastitionsfokus Rendite- / Risikoprofil Rechtsform Eigenkapital /FK Quote Investftionsvolumen Prognostizierte Gesamtrendite {IRR)* Prognostizierte Ausschüttungsrandite* Mindestanlage Mitgliedschaft Im Anlagesusschuss Ankaufs- / Verkaufs- / Verkaufs(Teflimmobilfe)- / Baumanagementgebahr (inkl. USt.) Parformanceabhängige Vergütung Einmalige Strukturierungsgebühr Deutsche Metropolregianen und umliegende Regionen mit Städten >50T Einwohner Artikel 8 Wohnimmobilien Deutschland ‚Aktive Bestandsentwicklung Offener Spezial-AlF mit festen Anlagebedingungen rd. 200 Mio. € / max. 20% rd. 250 Mio. € 7,5 % (nach Kosten & Gebühren, vor Steuern) 8 4,0 % {nach Kosten & Gebühren, var Steuern} 5Mio.€ Ab 10 Mio. € 1,40 % / 0,80 % /2,12% / 4,91 % Laufzeit / Investtionszeltraum Ausschüttungsintervalle 20 % über einer @ Ausschüttungsrendite von 4,0 % 0,1% der bis zum 31.12.2023 erfolgten Kapitalzusagen (max. 200.000 &) 10 bis 12 Jahre / bis zu 24 Monate angestrebt Mindestens jährlich", - {"entities": [[945, 960, "LAUFZEIT"]]}, - ), - ( - "Laufzeit / Investtionszeltraum,10 bis 12 Jahre / bis zu 24 Monate angestrebt Ausschüttungsintervalle,Mindestens jährlich", - {"entities": [[31, 46, "LAUFZEIT"]]}, - ), - ( - "10-12 Jahre Laufzeit bei einem LTV von bis zu 20%", - {"entities": [[0, 11, "LAUFZEIT"]]}, - ), - ( - "vom Manager festgelegter Stil Rechtsform Jahr des ersten Closings Laufzeit Geplantes Jahr der Auflösung Ziel-Netto-IRR / Gesamtrendite* Zielvolumen des Anlagevehikels Ziel-LTY‚Aktueller LTV Zielsektoren Zielanlagestrategie Fonds Offen Deutschland Core, Core + Offener Immobilien-Spezialfonds 2022 10 - 12 Jahre", - {"entities": [[297, 310, "LAUFZEIT"], [247, 259, "RISIKOPROFIL"]]}, - ), - ( - "Allgemeine Annahmen Ankaufsphase Haltedauer Zielobjektgröße Finanzierung Investitions-annahmen Zielrendite 24 Monate Investmentzeitraum 10 Jahre (+) EUR 20-75 Mio. Keine externe Finanzierung zum Auftakt (ausschließlich Darlehen der Anteilseigner). Die Finanzierung wird nach der Ankaufsphase und Stabilisierung der Zinssätze neu geprüft. Angestrebter LTV zwischen 25-40 % Investitionen für Renovierungen und ESG- Verbesserungen werden für jedes Objekt einzeln festgelegt. 5,00-5,25 % Ausschüttungsrendites", - {"entities": [[136, 148, "LAUFZEIT"], [472, 483, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Zielrendite 5,00-5,25 % Ausschüttungsrendite 1) Ankauf von Objekten an Tag eins mit 100% Eigenkapital. Die Strategie unterstellt die Aufnahme von Fremdkapital, sobald sich die Zins- und Finanzierungskonditionen nachhaltig stabilisieren. Strategie - Übersicht Risikoprofil Core+", - {"entities": [[12, 23, "AUSSCHÜTTUNGSRENDITE"], [272, 277, "RISIKOPROFIL"]]}, - ), - ( - "Vehicle lifetime / investment period Open-ended fund", - {"entities": [[37, 52, "LAUFZEIT"]]}, - ), - ( - "Vehicle / domicile Alternative Investment Fund / Luxembourg (e.g. SCSp SICAV-RAIF) Investment strategy eturn pro Real Estate (PropCo + OpCo) Investing in upscale hotels with long-term management contracts in major European destinations Core/Core+ with OpCo premium Management Agreements solely with financially strong and experienced partners/ global brands Cash flow-oriented Cash-flow pattern Target equity /AuM € 400m equity / € 800m AuM (50% Loan-to-Value) Vehicle lifetime / investment period Open-ended fund", - {"entities": [[498, 513, "LAUFZEIT"], [236, 245, "RISIKOPROFIL"]]}, - ), - ( - "Vehicle type (Lux-RAIF) (net of fees) IRR6.5% ACCOR Vehicle structure Open-ended Targetvehiclesize € 400m (equity) Manager-defined Core/Core+ with | style OpCo Premium darge CLV. 50% Pt H | LTO N WORLDWIDE Year of first closing 2020 Target no. ofinvestors 1-5 Fund life (yrs} Open-ended Min-commitmentper —¢ 400m", - {"entities": [[131, 141, "RISIKOPROFIL"], [70, 80, "LAUFZEIT"]]}, - ), - ( - "Fund term: Open-ended", - {"entities": [[11, 21, "LAUFZEIT"]]}, - ), - ( - "Abdeckung der Risiko-Rendite-Bandbreite (Core, Core+, Value-Add)", - {"entities": [[41, 63, "RISIKOPROFIL"]]}, - ), - ( - "5,1% - 8,5% IRR!", - {"entities": [[0, 11, "RENDITE"]]}, - ), - ( - "Retailinvestitionsvolumen nach Ländern (2024) Vereinigtes Königreich, 26,4% Deutschland, 19,0% Andere, 19,7% Italien, 8,2% Irland, 3,3% N | Frankreich, Spanien, 8,1%", - {"entities": [[46, 75, "LÄNDERALLOKATION"], [76, 94, "LÄNDERALLOKATION"], [95, 108, "LÄNDERALLOKATION"], [109, 122, "LÄNDERALLOKATION"], [123, 135, "LÄNDERALLOKATION"]]}, - ), - ( - "Erwartete IRR 5 (je nach Objekt- ‘A(E) 6.00% - 8,00%", - {"entities": [[39, 52, "RENDITE"]]}, - ), - ( - "Zielmarkts Deutsche Metropolregianen und umliegende Regionen mit Städten >50T Einwohner Klassifizierung SFDR Artikel 8 Invastitionsfokus Wohnimmobilien Deutschland Rendite- / Risikoprofil ‚Aktive Bestandsentwicklung Rechtsform Offener Spezial-AlF mit festen Anlagebedingungen Eigenkapital /FK Quote rd. 200 Mio. € / max. 20% Investftionsvolumen rd. 250 Mio. € Prognostiderte Gesamtrendite {IRR)* 7,5 % (nach Kosten & Gebühren, vor Steuern) Prognostizierte Ausschüttungsrandite* @ 4,0 % {nach Kosten & Gebühren, var Steuern} Mindestanlage 5Mio.€ Mitgliedschaft Im Anlagesusschuss Ab 10 Mio. € Ankaufs- / Verkaufs- / Verkaufs(Teflimmobilfe)- / Baumanagementgebahr (inkl. USt) 1,40 %/080%/212%/491% Parformanceabhängige Vergütung 20 % über einer ® Ausschüttungsrendite von 4,0% Einmalige Strukturierungsgebühr 0,1% der bis zum 31.12.2023 erfolgten Kapitalzusagen (max. 200.000 €) Laufzelt / Investtonszeltraum 10 bis 12 Jahre / bis zu 24 Monate angestrebt Ausschüttungsintervalle Mindestens jährlich", - {"entities": [[396, 401, "RENDITE"], [482, 487, "AUSSCHÜTTUNGSRENDITE"], [914, 929, "LAUFZEIT"]]}, - ), - ( - "= Prognostizierte jährliche Ausschüttung von @ 4,0%* = Prognostizierte Gesamtrendite (IRR) von 7,5%*", - {"entities": [[48, 52, "AUSSCHÜTTUNGSRENDITE"], [96, 100, "RENDITE"]]}, - ), - ( - "Prognose: 7,5%+ IRR auf Fondsebene", - {"entities": [[10, 14, "RENDITE"]]}, - ), - ( - "= Prognostizierte jährliche Ausschüttung* von 84,0% = Prognostizierte Gesamtrendite (IRR}* von 7,5%", - {"entities": [[96, 100, "RENDITE"], [49, 53, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "= Lagefokussierung: Metropolregionen Deutschlands = Finanzierung: max. 20% LTV = Risikoprofil: Core, Core +", - {"entities": [[95, 107, "RISIKOPROFIL"]]}, - ), - ( - "Performance-Fee: 20% über einer @ Ausschüttungsrendite von 4,0%", - {"entities": [[61, 65, "AUSSCHÜTTUNGSRENDITE"]]}, - ), - ( - "Fondstyp Offener Spezial-AIF nach KAGB mit festen Anlagebedingungen ESG-Klassifizierung Fonds gemäß Artikel 8 EU-Offenlegungsverordnung KVG IntReal GmbH, Hamburg Anlagestrategie Aufbau eines Objektportfolios aus Ärztehäusern, die langfristig vermietet sind Ärztehäuser, Laborimmobilien, im Verbund mit Ärztehäusern auch ambulant Zielobjekte betreute Wohngemeinschaften; Mietanteil Medizin und medizinnahe Dienstleistungen/Handel > 65 % (Objektebene) WALT >5 Jahre bei Ankauf Objektbaujahre Ab 2000 Anlagegrenzen Einzelinvestment 8-30 Mio. EUR Anzahl Objekte 10-20 Deutschland bundesweit; jeweiliges Einzugsgebiet > 25.000 Einwohner mit Regionen stabiler Bevölkerungsprognose Risikoprofil Core / Core +", - {"entities": [[689, 702, "RISIKOPROFIL"]]}, - ), - ( - "Fondsvolumen 300 Mio. EUR Zielrendite (IRR) > 6,0 % p. a. Ausschuttung >5,0 % p. a. Ankaufszeitraum 2024-2026 Laufzeit 31.12.2036 Mindestanlage 10 Mio. EUR Anlageausschuss Ja, entscheidet u. a. über Objekterwerb (Mitglied kann ab 20 Mio. EUR gestellt werden) Gebührenstruktur Marktüblich (auf Anfrage) Projektentwicklungen keine Forward-Deals Möglich, maximal 18 Monate Vorlauf; keine Projektentwicklungsrisiken beim Fonds Erbbaurechte Möglich, sofern Laufzeit > 60 Jahre und angemessene Entschädigung bei Ablauf und Heimfall Status Objektpipeline vorhanden: siehe Folie 16 ff.", - {"entities": [[44, 57, "RENDITE"], [71, 83, "AUSSCHÜTTUNGSRENDITE"], [120, 130, "LAUFZEIT"]]}, - ), - ( - "Niederlande (max. 35 %) Länderallokation Frankreich (max. 35 %) (in % vom Zielvolumen) Skandinavien (Schweden, Dänemark) (max. 35 %) Deutschland (<= 10 %)", - {"entities": [[0, 23, "LÄNDERALLOKATION"], [41, 63, "LÄNDERALLOKATION"], [87, 132, "LÄNDERALLOKATION"], [133, 154, "LÄNDERALLOKATION"]]}, - ), - ( - "Führender Immobilien-Investmentmanager in den Nordics für globale ll institutionelle Investoren in Value Add und Core Strategien", - {"entities": [[101, 119, "RISIKOPROFIL"]]}, - ), - ( - "Core und Core+ Fonds", - {"entities": [[0, 14, "RISIKOPROFIL"]]}, - ), - ( - "Risikoprofil Core / Core+", - {"entities": [[13, 25, "RISIKOPROFIL"]]}, - ), - ( - "Durchschnittlich geplante jährliche Ausschüttung von 4,5-5,5% auf das investierte Eigenkapital an die Anleger Geplante Gesamtrendite von 5-6% (IRR) auf das eingezahlte Eigenkapital", - {"entities": [[54, 62, "AUSSCHÜTTUNGSRENDITE"], [138, 142, "RENDITE"]]}, - ), - ( - "Geografische Zielallokation nach Investitionsphase des Fonds: 1) Schweden 20-60% Allokation Länder 2) Finnland 20-60% 3) Norwegen 10-40% 4) Dänemark 10-40%", - {"entities": [[65, 80, "LÄNDERALLOKATION"], [102, 117, "LÄNDERALLOKATION"], [121, 136, "LÄNDERALLOKATION"], [140, 155, "LÄNDERALLOKATION"]]}, - ), - ( - "Deutsches Spezial-Sondervermögen mit festen Anlagebedingungen ($284 KAGB) Immobilien- oder Infrastrukturquote (nach Solvency II) Core / Core+ Euro Hauptstadtregionen und andere Großstädte in den Nordics €500 Mio. 4,5-5,5% 15 Jahre; Fonds hat unbegrenzte Laufzeit; Investmentphase 4 Jahre Maximaler Fremdkapitalanteil 50% (LTV-Ziel bei Ankauf), Langfristiges LTV-Ziel auf Fondsebene ist 45% 0,625% p. a. des Bruttofondsvermögens Zeichnungen ab € 30 Mio. - 0,03 % Rabatt Zeichnungen ab € 50 Mio. - zusatzl. 0,03 % Rabatt 1,1% des Verkehrswertes 0,6% der Bruttoverkaufswert 10% wenn Hurdle Rate 5,0 % p. a. (IRR netto) überschritten wird (nach 15 Jahren berechnet) Ja", - {"entities": [[129, 141, "RISIKOPROFIL"], [213, 221, "ZIELRENDITE"], [242, 262, "LAUFZEIT"]]}, - ), - ( - "Standort Helsinki, Finnland Sektor Bildungswesen, Schule& Kindertagesstätte Vermietbare Fläche 3.321 m? Leerstand bei Ankauf 0% / 0% Ankaufspreis+ Investitionen €21,4 Mio. + €0,2 Mio Eigenkapital €21,6 Mio. Ankaufs- / Stabilisierungs- / Exitrendite 5,0%/ 5,5%/ 5,0% NOI zum Ankaufszeitpunkt / Exit-NOI €1.1m/ €1.2m Zielrenditen (netto für LPs) 5,4% IRR/ 1.5x EM / DY 4,3% Ankauf / Exit Dezember 2023/ Dezember 2033", - {"entities": [[345, 349, "ZIELRENDITE"]]}, - ), - ( - "Evergreen/offene Fondsstrukturenv Core / Core+ Strategien", - {"entities": [[34, 46, "RISIKOPROFIL"]]}, - ), - ( - "BEE Henderson German 2012 Logistik Core/D/Art. 8 € 336 Mio. 12 (voll investiert) 13,0 % p.a.", - {"entities": [[35, 39, "RISIKOPROFIL"], [81, 87, "RENDITE"]]}, - ), - ( - "ICF German Logistics 2014 Logistik Core/D/Art. 8 € 400 Mio. 16 (voll investiert) 12,0 % p.a.", - {"entities": [[35, 39, "RISIKOPROFIL"], [81, 87, "RENDITE"]]}, - ), - ( - "Individualmandat 2015 Logistik Core / D+AU/ ArTt. 6 € 200 Mio. 8 (realisiert) 8,0 % p.a.", - {"entities": [[31, 35, "RISIKOPROFIL"], [78, 83, "RENDITE"]]}, - ), - ( - "European Logistics Partnership” 2017 Logistik Value-Add / Europ/a - € 1.000 Mio. 28 (realisiert) 20,0 % p.a.", - {"entities": [[46, 55, "RISIKOPROFIL"], [97, 103, "RENDITE"]]}, - ), - ( - "European Core Logistics Fund (ECLF 1) 2021 Logistik Core / Euro/p Arat. 8 € 314 Mio. 12 (voll investiert) 7,50 % p.a.", - {"entities": [[9, 13, "RISIKOPROFIL"], [106, 112, "RENDITE"]]}, - ), - ( - "P-Logistik Europa Fonds (ECLF 2) 2022 Logistik Core / Euro/p Arat. 8 € 150 Mio.? A (voll investiert) 6,5 % p.a.?", - {"entities": [[47, 51, "RISIKOPROFIL"], [101, 106, "RENDITE"]]}, - ), - ( - "First Business Parks 2015 Light Industrial Value Add / D+AUT € 100 Mio. 6 (realisiert) 16,0 % p.a.", - {"entities": [[43, 52, "RISIKOPROFIL"], [87, 93, "RENDITE"]]}, - ), - ( - "Unternehmensimmobilien Club 1 2016 Light Industrial Core+/D € 186 Mio. 9 (voll investiert) 13,0 % p.a.", - {"entities": [[91, 97, "RENDITE"]]}, - ), - ( - "Unternehmensimmobilien Club 1 2016 Light Industrial Core+/D € 186 Mio. 9 (voll investiert) 13,0 % p.a.", - {"entities": [[52, 57, "RISIKOPROFIL"], [91, 97, "RENDITE"]]}, - ), - ( - "Unternehmensimmobilien Club 2 2021 Light Industrial Core+/D € 262 Mio. 12 (voll investiert) 9,00 % p.a.", - {"entities": [[52, 57, "RISIKOPROFIL"], [92, 98, "RENDITE"]]}, - ), - ( - "Individualmandat 2022 Light Industrial Value-Add / Nordics € 100 Mio. 5 (voll investiert) 18,0 % p.a.", - {"entities": [[39, 48, "RISIKOPROFIL"], [90, 96, "RENDITE"]]}, - ), - ( - "EUROPEAN CORE LOGISTICS FUND 3", - {"entities": [[9, 13, "RISIKOPROFIL"]]}, - ), - ( - "Core Investitionen", - {"entities": [[0, 4, "RISIKOPROFIL"]]}, - ), - ( - "8 % IRR", - {"entities": [[0, 3, "RENDITE"]]}, - ), - ( - "Rendite-Risiko-Profil Core ° Geographischer Fokus Kontinentaleuropaische Kernvolkswirtschaften nach Allokationsprofil * Sektoraler Fokus Logistikimmobilien nach Allokationsprofil Kapitalstruktur ° Eigenkapital € 250 Mio. ° Fremdkapital 50 % angestrebt, max. 60 % der Immobilienwerte (Objektebene) °e Mindestzeichnung € 10 Mio. Vehikelstruktur ° Rechtsform Immobilien-Spezial-AlF mit festen Anlagebedingungen nach 3 284 KAGB ° Klassifikation Artikel 8 Offenlegungsverordnung ¢ Anlagehorizont 10 Jahre mit Verlängerungsoption um 2 Jahre! ° Geplante Auflage 01 2025 Performanceziel? ° Ausschüttung 6,0 % p.a. (Durchschnitt 10 Jahre Haltedauer) ° Interner Zinsfuß (IRR) 8,0 % p.a. (10 Jahre Haltedauer, Target-IRR)", - {"entities": [[22, 26, "RISIKOPROFIL"], [596, 601, "AUSSCHÜTTUNGSRENDITE"], [667, 672, "RENDITE"]]}, - ), - ( - "Core/Core+, mit Cash-Flow-Stabilität", - {"entities": [[0, 10, "RISIKOPROFIL"]]}, - ), - ( - "Zielausschüttung: min. 5,10%", - {"entities": [[24, 29, "ZIELAUSSCHÜTTUNG"]]}, - ), - ( - "Zielrendite (IRR): min. 5,50%", - {"entities": [[24, 29, "ZIELRENDITE"]]}, - ), - ( - "Rewe & Lidl Maxhütte-Haidhof é ae: 6 s Bahnhof Ankermieter REWE & Lidl er WALT 20 und 17 Jahre Miete p.a. 1.127.916 € Kaufpreis 21,43 Mio. € Faktor 19,00 x LTV / Zins 80% / 4,0% Ausschüttung 5,7 % IRR 7,1%", - {"entities": [[193, 198, "AUSSCHÜTTUNGSRENDITE"], [203, 207, "ZIELRENDITE"]]}, - ), - ( - "Real Estate Prime Europe Access the Core of European Prime Cities with a green SRI fund including a genuine low carbon commitment", - {"entities": [[36, 40, "RISIKOPROFIL"]]}, - ), - ( - "(FR, UK, DE, BE, NL, LU, Nordics, Allocation SP, IT, CH)", - {"entities": [[1, 32, "LÄNDERALLOKATION"], [45, 55, "LÄNDERALLOKATION"]]}, - ), - ( - "IRR: 6% - 7%", - {"entities": [[5, 12, "RENDITE"]]}, - ), - ( - "Europe | Germany 67 Value Add", - {"entities": [[9, 16, "LÄNDERALLOKATION"], [20, 29, "RISIKOPROFIL"]]}, - ), - ( - "Germany, Norway 336 Core Plus", - {"entities": [[0, 7, "LÄNDERALLOKATION"], [20, 29, "RISIKOPROFIL"]]}, - ), - ( - "UK", - {"entities": [[0, 2, "LÄNDERALLOKATION"]]}, - ), - ( - "NORWAY", - {"entities": [[0, 6, "LÄNDERALLOKATION"]]}, - ), - ( - "9.8% IRR", - {"entities": [[0, 4, "RENDITE"]]}, - ), - ( - "Investment volume down 52% to €2.3 billion, with 4,000 100 14% value-add and core-plus increasing YoY", - {"entities": [[63, 86, "RISIKOPROFIL"]]}, - ), - ( - "Geared Gross IRR‘ seeking a range of 16-18% per annum", - {"entities": [[37, 43, "RENDITE"]]}, - ), - ( - "Open-ended fund 24 months, incl. rolling reinvestment Sale of individual assets with respective management contracts or geared leases IRR: >6.5% | CoC: >5.0%", - {"entities": [[0, 10, "LAUFZEIT"], [139, 144, "RENDITE"]]}, - ), - ( - "Our investment strategy focuses on investing in upscale hotels in European prime locations, including DACH, Italy, Spain, Portugal, France, UK, Denmark, Benelux,and Poland.", - {"entities": [[102, 171, "LÄNDERALLOKATION"]]}, - ), - ( - "Core+ assets with value-add potential, Emerging Gateway Cities Helsinki] Core+ with Value well-mitigated risk and great upside Potential potential through asset improvement or = Max. 20% UK & Ireland {no contract renegotiation currency risk hedging], 80% tinental E > IRR target of 6-9%", - {"entities": [[0, 5, "RISIKOPROFIL"], [282, 286, "RENDITE"]]}, - ), - ( - "10% net IRR since inception in 2018?", - {"entities": [[0, 3, "RENDITE"]]}, - ), - ( - "Eurozone: Benelux, France and Germany", - {"entities": [[10, 37, "LÄNDERALLOKATION"]]}, - ), - ( - "Open-ended, with quarterly liquidity (redemption rights, dual pricing)", - {"entities": [[0, 10, "LAUFZEIT"]]}, - ), - ( - "Class A & B (Institutional): 0.93% on NAV; Class D (Wholesale): 1.80% on NAV; Class P (Wholesale): 1.25% on NAV", - {"entities": [[29, 34, "MANAGMENTGEBÜHREN"], [64, 69, "MANAGMENTGEBÜHREN"], [99, 104, "MANAGMENTGEBÜHREN"]]}, - ), - ( - "Risk profile: favour core > © at least and core+ assets with a targeted N 2 n allocation to value add assets to enhance returns", - {"entities": [[21, 25, "RISIKOPROFIL"], [43, 48, "RISIKOPROFIL"]]}, - ), - ( - "The Netherlands (38 assets) = Germany (9 assets) 10 largest Country assets split France (8 assets)", - {"entities": [[0, 15, "LÄNDERALLOKATION"], [30, 37, "LÄNDERALLOKATION"], [81, 87, "LÄNDERALLOKATION"]]}, - ), - ( - "Expected IRR 10.9%", - {"entities": [[13, 18, "ZIELRENDITE"]]}, - ), - ( - "Structure Open-end, perpetual life, Luxembourg domiciled Initial Target Size* €2 billion 6-8% total return,", - {"entities": [[10, 18, "LAUFZEIT"], [89, 93, "RENDITE"]]}, - ), - ( - "Geographic Focus: UK, Ireland, Iberia, Nordics, Netherlands, Germany, France, Italy", - {"entities": [[18, 83, "LÄNDERALLOKATION"]]}, - ), - ( - "IRR of 13-14%", - {"entities": [[7, 13, "RENDITE"]]}, - ), - ( - "Value-add", - {"entities": [[0, 9, "RISIKOPROFIL"]]}, - ), - ( - "Geographic allocation NORDICS UNITED KINGDOM GERMANY FRANCE PORTUGAL BENELUX", - {"entities": [[22, 76, "LÄNDERALLOKATION"]]}, - ), - ( - "Strong track record delivering a 17% net IRR, 1.7x net multiple across all divested assets (both discretionary and non-discretionary mandates)", - {"entities": [[33, 36, "RENDITE"]]}, - ), - ( - "Targeting a 7-8% net annual return and a 3-4% dividend yield, reflecting a target LTV of 35% (capped at 37.5%)", - {"entities": [[12, 16, "RENDITE"]]}, - ), - ( - "Sweden Norway Denmark Finland", - {"entities": [[0, 29, "LÄNDERALLOKATION"]]}, - ), - ( - "Logistics Residential Office Other", - {"entities": [[0, 34, "SEKTORENALLOKATION"]]}, - ), - ( - "Fund Term Open-ended with an initial 24-month lock-in for new investors", - {"entities": [[10, 20, "LAUFZEIT"]]}, - ), - ( - "Management fee of 85 bps on NAV.", - {"entities": [[18, 24, "MANAGMENTGEBÜHREN"]]}, - ), - ( - "Core/Core+ strategy, with tactical exposure to development projects aiming at enhancing the quality of the portfolio over time", - {"entities": [[0, 10, "RISIKOPROFIL"]]}, - ), - ( - "Fund term: Open-ended", - {"entities": [[11, 21, "LAUFZEIT"]]}, - ), - ( - "Return targets: The fund targets a net internal rate of return (IRR) of 8% and a net annual income yield of 5% with planned quarterly distributions’.", - {"entities": [[72, 74, "RENDITE"]]}, - ), - ( - "Geographic scope: The fund has a broad mandate to invest in commercial and residential real estate across Sweden, Denmark, Finland, and Norway. 50% LTV Asset selection: Heirs to acquire high-quality, income-generating properties in major Nordic cities and enhance their value through active asset management. Portfolio construction: The goal is to build diversified portfolios that are appealing to core buyers upon exit.", - {"entities": [[106, 142, "LÄNDERALLOKATION"]]}, - ), - ( - "Experience: Since 2012, | | has demonstrated its capability to build diversified and resilient portfolios for its core-plus funds. German Real Estate Quota advantage . Local expertise: extensive local relationships and proprietary deal flow in key Nordic markets provide a strategic advantage.", - {"entities": [[114, 123, "RISIKOPROFIL"]]}, - ), - ( - "Target returns: 8% net IRR with 5% net annual income yield! * Geographic focus: Sweden, Denmark, Norway and Finland « Target leverage: 50% LTV (excluding short-term borrowing) « Sector exposure: office, logistics, public properties, retail (focused on grocery anchored and necessity driven retail) and residentials « Investment focus: high quality properties,", - {"entities": [[16, 18, "RENDITE"], [80, 115, "LÄNDERALLOKATION"], [195, 239, "SEKTORENALLOKATION"]]}, - ), - ( - "The Fund 2 xemoours common limited partnership (SCS) (SICAV-RAIF) Investment Objective To pursue investments in commercial and residential properties throughout the Nordic Region Fund Target Size €300 million (equity) Return Targets Target net IRR of 8%, target net annual income yield of 5%", - {"entities": [[251, 253, "RENDITE"]]}, - ) - -] - - - - diff --git a/project/backend/spacy-service/spacy_training/training_model.py b/project/backend/spacy-service/spacy_training/training_model.py deleted file mode 100644 index 0ac3a1a..0000000 --- a/project/backend/spacy-service/spacy_training/training_model.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from pathlib import Path - -import spacy -from spacy.cli.train import train - -from spacy.tokens import DocBin - -from tqdm import tqdm - -from training_data import TRAINING_DATA - -nlp = spacy.blank("de") - -# create a DocBin object -db = DocBin() - -for text, annot in tqdm(TRAINING_DATA): - doc = nlp.make_doc(text) - ents = [] - # add character indexes - for start, end, label in annot["entities"]: - span = doc.char_span(start, end, label=label, alignment_mode="contract") - if span is None: - print(f"Skipping entity: |{text[start:end]}| Start: {start}, End: {end}, Label: {label}") - else: - ents.append(span) - # label the text with the ents - doc.ents = ents - db.add(doc) - -# save the DocBin object -os.makedirs("./data", exist_ok=True) -db.to_disk("./data/train.spacy") - -config_path = Path("config.cfg") -output_path = Path("output") - -print("Starte Training...") -train(config_path, output_path) diff --git a/project/backend/spacy-service/spacy_training/training_running.json b/project/backend/spacy-service/spacy_training/training_running.json index 0aaa3c2..ef4cde9 100644 --- a/project/backend/spacy-service/spacy_training/training_running.json +++ b/project/backend/spacy-service/spacy_training/training_running.json @@ -1 +1 @@ -{"running": false} +{"running": false} \ No newline at end of file diff --git a/project/docker-compose.yml b/project/docker-compose.yml index 954125f..1c5a848 100644 --- a/project/docker-compose.yml +++ b/project/docker-compose.yml @@ -37,8 +37,6 @@ services: retries: 10 ports: - 5050:5000 - volumes: - - ./backend/spacy-service/spacy_training:/app/spacy_training ocr: build: diff --git a/project/frontend/package.json b/project/frontend/package.json index cf2c916..9ba5f74 100644 --- a/project/frontend/package.json +++ b/project/frontend/package.json @@ -23,7 +23,6 @@ "@tanstack/react-router": "^1.114.3", "@tanstack/react-router-devtools": "^1.114.3", "@tanstack/router-plugin": "^1.114.3", - "file-saver": "^2.0.5", "react": "^19.0.0", "react-dom": "^19.0.0", "react-material-file-upload": "^0.0.4", diff --git a/project/frontend/src/components/KPIForm.tsx b/project/frontend/src/components/KPIForm.tsx index bec82f6..452dfd8 100644 --- a/project/frontend/src/components/KPIForm.tsx +++ b/project/frontend/src/components/KPIForm.tsx @@ -400,13 +400,14 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false, function generateSpacyEntries(formData: Partial) { const label = formData.name?.trim().toUpperCase() || ""; return (formData.examples || []).map(({ sentence, value }) => { - const start = sentence.indexOf(value); + const trimmedValue = value.trim(); + const start = sentence.indexOf(trimmedValue); if (start === -1) { - throw new Error(`"${value}" nicht gefunden in Satz: "${sentence}"`); + throw new Error(`"${trimmedValue}" nicht gefunden in Satz: "${sentence}"`); } return { text: sentence, - entities: [[start, start + value.length, label]] + entities: [[start, start + trimmedValue.length, label]] }; }); } diff --git a/project/frontend/src/routes/config.tsx b/project/frontend/src/routes/config.tsx index 6f94090..ae32e9a 100644 --- a/project/frontend/src/routes/config.tsx +++ b/project/frontend/src/routes/config.tsx @@ -51,7 +51,7 @@ function ConfigPage() { const data = await res.json(); if (data.running) { setTrainingRunning(true); - pollTrainingStatus(); + pollTrainingStatus(); } } catch (err) { console.error("Initiale Trainingsstatus-Abfrage fehlgeschlagen", err); @@ -78,23 +78,30 @@ function ConfigPage() { } }; - const handleTriggerTraining = () => { + const handleTriggerTraining = async () => { setTrainingRunning(true); - setSnackbarMessage("Training wurde gestartet."); - setSnackbarOpen(true); - fetch(`${API_HOST}/api/spacy/train`, { - method: "POST", - }).catch(err => { + try { + const response = await fetch(`${API_HOST}/api/spacy/train`, { + method: "POST", + }); + + if (!response.ok) throw new Error("Training konnte nicht gestartet werden"); + + // Erfolgsmeldung erst hier anzeigen + setSnackbarMessage("Training wurde gestartet."); + setSnackbarOpen(true); + + pollTrainingStatus(); // jetzt starten + } catch (err) { + console.error(err); setSnackbarMessage("Fehler beim Starten des Trainings."); setSnackbarOpen(true); - console.error(err); - }); - - pollTrainingStatus(); // Starte Überwachung - + setTrainingRunning(false); + } }; + const pollTrainingStatus = () => { const interval = setInterval(async () => { try {