Mergekonflikt in pdfViewer.tsx behoben – Text-Highlighting integriert

pull/47/head
Abdulrahman Dabbagh 2025-06-04 15:55:24 +02:00
commit 2d88e793e0
53 changed files with 3294 additions and 7788 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -2,3 +2,4 @@ API_KEY=
DATABASE_URL=postgresql://admin:admin@db:5432
POSTGRES_PASSWORD=admin
POSTGRES_USER=admin
COORDINATOR_URL="coordinator:5000"

View File

@ -18,4 +18,4 @@ COPY . .
ENV PYTHONUNBUFFERED=1
EXPOSE 5000
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
CMD ["gunicorn","--timeout", "10","--workers", "2", "--bind", "0.0.0.0:5000", "app:app"]

View File

@ -0,0 +1,21 @@
# 1. Python-Image verwenden
FROM python:3.11-alpine
# 2. Arbeitsverzeichnis im Container setzen
WORKDIR /app
# 3. production-style server mit gunicorn
RUN pip install gunicorn eventlet
# 4. requirements.txt kopieren und Pakete installieren
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 5. Quellcode kopieren (z.B. app.py)
COPY . .
ENV PYTHONUNBUFFERED=1
EXPOSE 5000
CMD ["gunicorn", "--worker-class", "eventlet", "-w", "1", "--bind", "0.0.0.0:5000", "app:app"]

View File

@ -1,10 +1,14 @@
from flask import Flask
from flask_cors import CORS
import os
from dotenv import load_dotenv
from controller import register_routes
from model.database import init_db
from controller.socketIO import socketio
app = Flask(__name__)
CORS(app)
socketio.init_app(app)
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
@ -25,4 +29,4 @@ def health_check():
# für Docker wichtig: host='0.0.0.0'
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0")
socketio.run(app,debug=True, host="0.0.0.0", port=5050)

View File

@ -1,9 +1,11 @@
from controller.spacy_contoller import spacy_controller
from controller.kpi_setting_controller import kpi_setting_controller
from controller.pitch_book_controller import pitch_book_controller
from controller.progress_controller import progress_controller
def register_routes(app):
app.register_blueprint(kpi_setting_controller)
app.register_blueprint(pitch_book_controller)
app.register_blueprint(spacy_controller)
app.register_blueprint(progress_controller)

View File

@ -4,6 +4,7 @@ from model.pitch_book_model import PitchBookModel
from io import BytesIO
from werkzeug.utils import secure_filename
import puremagic
from controller.socketIO import socketio
pitch_book_controller = Blueprint("pitch_books", __name__, url_prefix="/api/pitch_book")
@ -54,6 +55,7 @@ def upload_file():
db.session.add(new_file)
db.session.commit()
socketio.emit("progress", {"id": new_file.id, "progress": 0})
return jsonify(new_file.to_dict()), 201
except Exception as e:
print(e)
@ -81,6 +83,7 @@ def update_file(id):
print(e)
if "kpi" in request.form:
socketio.emit("progress", {"id": id, "progress": 100})
file.kpi = request.form.get("kpi")
db.session.commit()

View File

@ -0,0 +1,19 @@
from flask import Blueprint, request, jsonify
from controller.socketIO import socketio
progress_controller = Blueprint("progress", __name__, url_prefix="/api/progress")
@progress_controller.route("/", methods=["POST"])
def progress():
data = request.get_json()
if 'id' not in data or 'progress' not in data:
return jsonify({"error": "Missing required fields. [id, progress]"}), 400
if not isinstance(data['progress'], (int, float)) or data['progress'] < 0 or data['progress'] >= 100:
return jsonify({"error": "Invalid progress value"}), 400
socketio.emit("progress", {"id": data["id"], "progress": data["progress"]})
# Process the data and return a response
return jsonify({"message": "Progress updated"})

View File

@ -0,0 +1,3 @@
from flask_socketio import SocketIO
socketio = SocketIO(cors_allowed_origins="*")

View File

@ -1,3 +1,4 @@
bidict==0.23.1
black==25.1.0
blinker==1.9.0
cfgv==3.4.0
@ -6,8 +7,11 @@ distlib==0.3.9
filelock==3.18.0
flake8==7.2.0
Flask==3.1.1
flask-cors==6.0.0
Flask-SocketIO==5.5.1
Flask-SQLAlchemy==3.1.1
greenlet==3.2.2
h11==0.16.0
identify==2.6.12
itsdangerous==2.2.0
Jinja2==3.1.6
@ -24,8 +28,12 @@ puremagic==1.29
pycodestyle==2.13.0
pyflakes==3.3.2
python-dotenv==1.1.0
python-engineio==4.12.1
python-socketio==5.13.0
PyYAML==6.0.2
simple-websocket==1.1.0
SQLAlchemy==2.0.41
typing_extensions==4.13.2
virtualenv==20.31.2
Werkzeug==3.1.3
wsproto==1.2.0

View File

@ -0,0 +1,20 @@
# SpaCy Microservice
## Den Service mit in einem Docker-Container starten
### 1. Build
```bash
docker build -t spacy-service .
```
### 2. Starten
```bash
docker run -p 5050:5050 spacy-service
```
## Beispielaufruf:
```bash
curl -X POST http://localhost:5050/extraction \
-H "Content-Type: application/json" \
-d @text-per-page.json
```

View File

@ -3,7 +3,7 @@ import os
import json
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, "../spacy_training/output/model-best")
model_path = os.path.join(current_dir, "../spacy_training/output/model-last")
nlp = spacy.load(model_path)

View File

@ -22,10 +22,14 @@
],
"ner":[
"AUSSCH\u00dcTTUNGSRENDITE",
"Aussch\u00fcttungsrendite",
"Laufzeit",
"LAUFZEIT",
"L\u00c4NDERALLOKATION",
"MANAGMENTGEB\u00dcHREN",
"RENDITE",
"RISIKOPROFIL",
"Risikoprofil"
"SEKTORENALLOKATION",
"ZIELAUSSCH\u00dcTTUNG",
"ZIELRENDITE"
]
},
"pipeline":[
@ -40,37 +44,57 @@
],
"performance":{
"ents_f":0.8888888889,
"ents_p":0.8205128205,
"ents_r":0.9696969697,
"ents_f":0.9637305699,
"ents_p":0.93,
"ents_r":1.0,
"ents_per_type":{
"RISIKOPROFIL":{
"p":1.0,
"r":0.9705882353,
"f":0.9850746269
},
"Risikoprofil":{
"p":0.8,
"r":1.0,
"f":0.8888888889
"f":1.0
},
"Laufzeit":{
"p":0.9,
"L\u00c4NDERALLOKATION":{
"p":0.90625,
"r":1.0,
"f":0.9473684211
"f":0.9508196721
},
"AUSSCH\u00dcTTUNGSRENDITE":{
"p":0.5925925926,
"r":0.9411764706,
"f":0.7272727273
},
"Aussch\u00fcttungsrendite":{
"p":0.6666666667,
"p":0.7105263158,
"r":1.0,
"f":0.8
"f":0.8307692308
},
"LAUFZEIT":{
"p":1.0,
"r":1.0,
"f":1.0
},
"RENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"ZIELRENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"ZIELAUSSCH\u00dcTTUNG":{
"p":1.0,
"r":1.0,
"f":1.0
},
"MANAGMENTGEB\u00dcHREN":{
"p":1.0,
"r":1.0,
"f":1.0
},
"SEKTORENALLOKATION":{
"p":1.0,
"r":1.0,
"f":1.0
}
},
"tok2vec_loss":119.7162696429,
"ner_loss":824.8371582031
"tok2vec_loss":26.5951735583,
"ner_loss":742.7924194336
}
}

View File

@ -1 +1 @@
¥movesÚì{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ
¥movesÚL{"0":{},"1":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"2":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"3":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"4":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ

View File

@ -22,10 +22,14 @@
],
"ner":[
"AUSSCH\u00dcTTUNGSRENDITE",
"Aussch\u00fcttungsrendite",
"Laufzeit",
"LAUFZEIT",
"L\u00c4NDERALLOKATION",
"MANAGMENTGEB\u00dcHREN",
"RENDITE",
"RISIKOPROFIL",
"Risikoprofil"
"SEKTORENALLOKATION",
"ZIELAUSSCH\u00dcTTUNG",
"ZIELRENDITE"
]
},
"pipeline":[
@ -40,37 +44,57 @@
],
"performance":{
"ents_f":0.8780487805,
"ents_p":0.9473684211,
"ents_r":0.8181818182,
"ents_f":0.9608938547,
"ents_p":1.0,
"ents_r":0.9247311828,
"ents_per_type":{
"RISIKOPROFIL":{
"p":1.0,
"r":0.9705882353,
"f":0.9850746269
},
"Risikoprofil":{
"p":0.8,
"r":1.0,
"f":0.8888888889
},
"AUSSCH\u00dcTTUNGSRENDITE":{
"p":0.7777777778,
"r":0.4117647059,
"f":0.5384615385
},
"Laufzeit":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Aussch\u00fcttungsrendite":{
"AUSSCH\u00dcTTUNGSRENDITE":{
"p":1.0,
"r":0.5,
"f":0.6666666667
"r":0.5925925926,
"f":0.7441860465
},
"LAUFZEIT":{
"p":1.0,
"r":1.0,
"f":1.0
},
"RENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"L\u00c4NDERALLOKATION":{
"p":1.0,
"r":0.8965517241,
"f":0.9454545455
},
"ZIELRENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"ZIELAUSSCH\u00dcTTUNG":{
"p":1.0,
"r":1.0,
"f":1.0
},
"MANAGMENTGEB\u00dcHREN":{
"p":1.0,
"r":1.0,
"f":1.0
},
"SEKTORENALLOKATION":{
"p":1.0,
"r":1.0,
"f":1.0
}
},
"tok2vec_loss":235.8388520621,
"ner_loss":1878.9451904297
"tok2vec_loss":33.6051129291,
"ner_loss":740.5764770508
}
}

View File

@ -1 +1 @@
¥movesÚì{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ
¥movesÚL{"0":{},"1":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"2":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"3":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"4":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ

View File

@ -205,44 +205,357 @@ TRAINING_DATA = [
),
(
"Zielmärkte Klassifizierung SFDR Invastitionsfokus Rendite- / Risikoprofil Rechtsform Eigenkapital /FK Quote Investftionsvolumen Prognostizierte Gesamtrendite {IRR)* Prognostizierte Ausschüttungsrandite* Mindestanlage Mitgliedschaft Im Anlagesusschuss Ankaufs- / Verkaufs- / Verkaufs(Teflimmobilfe)- / Baumanagementgebahr (inkl. USt.) Parformanceabhängige Vergütung Einmalige Strukturierungsgebühr Deutsche Metropolregianen und umliegende Regionen mit Städten >50T Einwohner Artikel 8 Wohnimmobilien Deutschland Aktive Bestandsentwicklung Offener Spezial-AlF mit festen Anlagebedingungen rd. 200 Mio. € / max. 20% rd. 250 Mio. € 7,5 % (nach Kosten & Gebühren, vor Steuern) 8 4,0 % {nach Kosten & Gebühren, var Steuern} 5Mio.€ Ab 10 Mio. € 1,40 % / 0,80 % /2,12% / 4,91 % Laufzeit / Investtionszeltraum Ausschüttungsintervalle 20 % über einer @ Ausschüttungsrendite von 4,0 % 0,1% der bis zum 31.12.2023 erfolgten Kapitalzusagen (max. 200.000 &) 10 bis 12 Jahre / bis zu 24 Monate angestrebt Mindestens jährlich",
{"entities": [[945, 960, "Laufzeit"]]},
{"entities": [[945, 960, "LAUFZEIT"]]},
),
(
"Laufzeit / Investtionszeltraum,10 bis 12 Jahre / bis zu 24 Monate angestrebt Ausschüttungsintervalle,Mindestens jährlich",
{"entities": [[31, 46, "Laufzeit"]]},
{"entities": [[31, 46, "LAUFZEIT"]]},
),
(
"10-12 Jahre Laufzeit bei einem LTV von bis zu 20%",
{"entities": [[0, 11, "Laufzeit"]]},
{"entities": [[0, 11, "LAUFZEIT"]]},
),
(
"vom Manager festgelegter Stil Rechtsform Jahr des ersten Closings Laufzeit Geplantes Jahr der Auflösung Ziel-Netto-IRR / Gesamtrendite* Zielvolumen des Anlagevehikels Ziel-LTYAktueller LTV Zielsektoren Zielanlagestrategie Fonds Offen Deutschland Core, Core + Offener Immobilien-Spezialfonds 2022 10 - 12 Jahre",
{"entities": [[297, 310, "Laufzeit"], [247, 259, "Risikoprofil"]]},
{"entities": [[297, 310, "LAUFZEIT"], [247, 259, "RISIKOPROFIL"]]},
),
(
"Allgemeine Annahmen Ankaufsphase Haltedauer Zielobjektgröße Finanzierung Investitions-annahmen Zielrendite 24 Monate Investmentzeitraum 10 Jahre (+) EUR 20-75 Mio. Keine externe Finanzierung zum Auftakt (ausschließlich Darlehen der Anteilseigner). Die Finanzierung wird nach der Ankaufsphase und Stabilisierung der Zinssätze neu geprüft. Angestrebter LTV zwischen 25-40 % Investitionen für Renovierungen und ESG- Verbesserungen werden für jedes Objekt einzeln festgelegt. 5,00-5,25 % Ausschüttungsrendites",
{"entities": [[136, 148, "Laufzeit"], [472, 483, "Ausschüttungsrendite"]]},
{"entities": [[136, 148, "LAUFZEIT"], [472, 483, "AUSSCHÜTTUNGSRENDITE"]]},
),
(
"Zielrendite 5,00-5,25 % Ausschüttungsrendite 1) Ankauf von Objekten an Tag eins mit 100% Eigenkapital. Die Strategie unterstellt die Aufnahme von Fremdkapital, sobald sich die Zins- und Finanzierungskonditionen nachhaltig stabilisieren. Strategie - Übersicht Risikoprofil Core+",
{"entities": [[12, 23, "Ausschüttungsrendite"], [272, 277, "Risikoprofil"]]},
{"entities": [[12, 23, "AUSSCHÜTTUNGSRENDITE"], [272, 277, "RISIKOPROFIL"]]},
),
(
"Vehicle lifetime / investment period Open-ended fund",
{"entities": [[37, 52, "Laufzeit"]]},
{"entities": [[37, 52, "LAUFZEIT"]]},
),
(
"Vehicle / domicile Alternative Investment Fund / Luxembourg (e.g. SCSp SICAV-RAIF) Investment strategy eturn pro Real Estate (PropCo + OpCo) Investing in upscale hotels with long-term management contracts in major European destinations Core/Core+ with OpCo premium Management Agreements solely with financially strong and experienced partners/ global brands Cash flow-oriented Cash-flow pattern Target equity /AuM € 400m equity / € 800m AuM (50% Loan-to-Value) Vehicle lifetime / investment period Open-ended fund",
{"entities": [[498, 513, "Laufzeit"], [236, 245, "Risikoprofil"]]},
{"entities": [[498, 513, "LAUFZEIT"], [236, 245, "RISIKOPROFIL"]]},
),
(
"Vehicle type (Lux-RAIF) (net of fees) IRR6.5% ACCOR Vehicle structure Open-ended Targetvehiclesize € 400m (equity) Manager-defined Core/Core+ with | style OpCo Premium darge CLV. 50% Pt H | LTO N WORLDWIDE Year of first closing 2020 Target no. ofinvestors 1-5 Fund life (yrs} Open-ended Min-commitmentper —¢ 400m",
{"entities": [[131, 141, "Risikoprofil"], [70, 80, "Laufzeit"]]},
{"entities": [[131, 141, "RISIKOPROFIL"], [70, 80, "LAUFZEIT"]]},
),
(
"Fund term: Open-ended",
{"entities": [[11, 21, "Laufzeit"]]},
{"entities": [[11, 21, "LAUFZEIT"]]},
),
(
"Abdeckung der Risiko-Rendite-Bandbreite (Core, Core+, Value-Add)",
{"entities": [[41, 63, "RISIKOPROFIL"]]},
),
(
"5,1% - 8,5% IRR!",
{"entities": [[0, 11, "RENDITE"]]},
),
(
"Retailinvestitionsvolumen nach Ländern (2024) Vereinigtes Königreich, 26,4% Deutschland, 19,0% Andere, 19,7% Italien, 8,2% Irland, 3,3% N | Frankreich, Spanien, 8,1%",
{"entities": [[46, 75, "LÄNDERALLOKATION"], [76, 94, "LÄNDERALLOKATION"], [95, 108, "LÄNDERALLOKATION"], [109, 122, "LÄNDERALLOKATION"], [123, 135, "LÄNDERALLOKATION"]]},
),
(
"Erwartete IRR 5 (je nach Objekt- A(E) 6.00% - 8,00%",
{"entities": [[39, 52, "RENDITE"]]},
),
(
"Zielmarkts Deutsche Metropolregianen und umliegende Regionen mit Städten >50T Einwohner Klassifizierung SFDR Artikel 8 Invastitionsfokus Wohnimmobilien Deutschland Rendite- / Risikoprofil Aktive Bestandsentwicklung Rechtsform Offener Spezial-AlF mit festen Anlagebedingungen Eigenkapital /FK Quote rd. 200 Mio. € / max. 20% Investftionsvolumen rd. 250 Mio. € Prognostiderte Gesamtrendite {IRR)* 7,5 % (nach Kosten & Gebühren, vor Steuern) Prognostizierte Ausschüttungsrandite* @ 4,0 % {nach Kosten & Gebühren, var Steuern} Mindestanlage 5Mio.€ Mitgliedschaft Im Anlagesusschuss Ab 10 Mio. € Ankaufs- / Verkaufs- / Verkaufs(Teflimmobilfe)- / Baumanagementgebahr (inkl. USt) 1,40 %/080%/212%/491% Parformanceabhängige Vergütung 20 % über einer ® Ausschüttungsrendite von 4,0% Einmalige Strukturierungsgebühr 0,1% der bis zum 31.12.2023 erfolgten Kapitalzusagen (max. 200.000 €) Laufzelt / Investtonszeltraum 10 bis 12 Jahre / bis zu 24 Monate angestrebt Ausschüttungsintervalle Mindestens jährlich",
{"entities": [[396, 401, "RENDITE"], [482, 487, "AUSSCHÜTTUNGSRENDITE"], [914, 929, "LAUFZEIT"]]},
),
(
"= Prognostizierte jährliche Ausschüttung von @ 4,0%* = Prognostizierte Gesamtrendite (IRR) von 7,5%*",
{"entities": [[48, 52, "AUSSCHÜTTUNGSRENDITE"], [96, 100, "RENDITE"]]},
),
(
"Prognose: 7,5%+ IRR auf Fondsebene",
{"entities": [[10, 14, "RENDITE"]]},
),
(
"= Prognostizierte jährliche Ausschüttung* von 84,0% = Prognostizierte Gesamtrendite (IRR}* von 7,5%",
{"entities": [[96, 100, "RENDITE"], [49, 53, "AUSSCHÜTTUNGSRENDITE"]]},
),
(
"= Lagefokussierung: Metropolregionen Deutschlands = Finanzierung: max. 20% LTV = Risikoprofil: Core, Core +",
{"entities": [[95, 107, "RISIKOPROFIL"]]},
),
(
"Performance-Fee: 20% über einer @ Ausschüttungsrendite von 4,0%",
{"entities": [[61, 65, "AUSSCHÜTTUNGSRENDITE"]]},
),
(
"Fondstyp Offener Spezial-AIF nach KAGB mit festen Anlagebedingungen ESG-Klassifizierung Fonds gemäß Artikel 8 EU-Offenlegungsverordnung KVG IntReal GmbH, Hamburg Anlagestrategie Aufbau eines Objektportfolios aus Ärztehäusern, die langfristig vermietet sind Ärztehäuser, Laborimmobilien, im Verbund mit Ärztehäusern auch ambulant Zielobjekte betreute Wohngemeinschaften; Mietanteil Medizin und medizinnahe Dienstleistungen/Handel > 65 % (Objektebene) WALT >5 Jahre bei Ankauf Objektbaujahre Ab 2000 Anlagegrenzen Einzelinvestment 8-30 Mio. EUR Anzahl Objekte 10-20 Deutschland bundesweit; jeweiliges Einzugsgebiet > 25.000 Einwohner mit Regionen stabiler Bevölkerungsprognose Risikoprofil Core / Core +",
{"entities": [[689, 702, "RISIKOPROFIL"]]},
),
(
"Fondsvolumen 300 Mio. EUR Zielrendite (IRR) > 6,0 % p. a. Ausschuttung >5,0 % p. a. Ankaufszeitraum 2024-2026 Laufzeit 31.12.2036 Mindestanlage 10 Mio. EUR Anlageausschuss Ja, entscheidet u. a. über Objekterwerb (Mitglied kann ab 20 Mio. EUR gestellt werden) Gebührenstruktur Marktüblich (auf Anfrage) Projektentwicklungen keine Forward-Deals Möglich, maximal 18 Monate Vorlauf; keine Projektentwicklungsrisiken beim Fonds Erbbaurechte Möglich, sofern Laufzeit > 60 Jahre und angemessene Entschädigung bei Ablauf und Heimfall Status Objektpipeline vorhanden: siehe Folie 16 ff.",
{"entities": [[44, 57, "RENDITE"], [71, 83, "AUSSCHÜTTUNGSRENDITE"], [120, 130, "LAUFZEIT"]]},
),
(
"Niederlande (max. 35 %) Länderallokation Frankreich (max. 35 %) (in % vom Zielvolumen) Skandinavien (Schweden, Dänemark) (max. 35 %) Deutschland (<= 10 %)",
{"entities": [[0, 23, "LÄNDERALLOKATION"], [41, 63, "LÄNDERALLOKATION"], [87, 132, "LÄNDERALLOKATION"], [133, 154, "LÄNDERALLOKATION"]]},
),
(
"Führender Immobilien-Investmentmanager in den Nordics für globale ll institutionelle Investoren in Value Add und Core Strategien",
{"entities": [[101, 119, "RISIKOPROFIL"]]},
),
(
"Core und Core+ Fonds",
{"entities": [[0, 14, "RISIKOPROFIL"]]},
),
(
"Risikoprofil Core / Core+",
{"entities": [[13, 25, "RISIKOPROFIL"]]},
),
(
"Durchschnittlich geplante jährliche Ausschüttung von 4,5-5,5% auf das investierte Eigenkapital an die Anleger Geplante Gesamtrendite von 5-6% (IRR) auf das eingezahlte Eigenkapital",
{"entities": [[54, 62, "AUSSCHÜTTUNGSRENDITE"], [138, 142, "RENDITE"]]},
),
(
"Geografische Zielallokation nach Investitionsphase des Fonds: 1) Schweden 20-60% Allokation Länder 2) Finnland 20-60% 3) Norwegen 10-40% 4) Dänemark 10-40%",
{"entities": [[65, 80, "LÄNDERALLOKATION"], [102, 117, "LÄNDERALLOKATION"], [121, 136, "LÄNDERALLOKATION"], [140, 155, "LÄNDERALLOKATION"]]},
),
(
"Deutsches Spezial-Sondervermögen mit festen Anlagebedingungen ($284 KAGB) Immobilien- oder Infrastrukturquote (nach Solvency II) Core / Core+ Euro Hauptstadtregionen und andere Großstädte in den Nordics €500 Mio. 4,5-5,5% 15 Jahre; Fonds hat unbegrenzte Laufzeit; Investmentphase 4 Jahre Maximaler Fremdkapitalanteil 50% (LTV-Ziel bei Ankauf), Langfristiges LTV-Ziel auf Fondsebene ist 45% 0,625% p. a. des Bruttofondsvermögens Zeichnungen ab € 30 Mio. - 0,03 % Rabatt Zeichnungen ab € 50 Mio. - zusatzl. 0,03 % Rabatt 1,1% des Verkehrswertes 0,6% der Bruttoverkaufswert 10% wenn Hurdle Rate 5,0 % p. a. (IRR netto) überschritten wird (nach 15 Jahren berechnet) Ja",
{"entities": [[129, 141, "RISIKOPROFIL"], [213, 221, "ZIELRENDITE"], [242, 262, "LAUFZEIT"]]},
),
(
"Standort Helsinki, Finnland Sektor Bildungswesen, Schule& Kindertagesstätte Vermietbare Fläche 3.321 m? Leerstand bei Ankauf 0% / 0% Ankaufspreis+ Investitionen €21,4 Mio. + €0,2 Mio Eigenkapital €21,6 Mio. Ankaufs- / Stabilisierungs- / Exitrendite 5,0%/ 5,5%/ 5,0% NOI zum Ankaufszeitpunkt / Exit-NOI €1.1m/ €1.2m Zielrenditen (netto für LPs) 5,4% IRR/ 1.5x EM / DY 4,3% Ankauf / Exit Dezember 2023/ Dezember 2033",
{"entities": [[345, 349, "ZIELRENDITE"]]},
),
(
"Evergreen/offene Fondsstrukturenv Core / Core+ Strategien",
{"entities": [[34, 46, "RISIKOPROFIL"]]},
),
(
"BEE Henderson German 2012 Logistik Core/D/Art. 8 € 336 Mio. 12 (voll investiert) 13,0 % p.a.",
{"entities": [[35, 39, "RISIKOPROFIL"], [81, 87, "RENDITE"]]},
),
(
"ICF German Logistics 2014 Logistik Core/D/Art. 8 € 400 Mio. 16 (voll investiert) 12,0 % p.a.",
{"entities": [[35, 39, "RISIKOPROFIL"], [81, 87, "RENDITE"]]},
),
(
"Individualmandat 2015 Logistik Core / D+AU/ ArTt. 6 € 200 Mio. 8 (realisiert) 8,0 % p.a.",
{"entities": [[31, 35, "RISIKOPROFIL"], [78, 83, "RENDITE"]]},
),
(
"European Logistics Partnership” 2017 Logistik Value-Add / Europ/a - € 1.000 Mio. 28 (realisiert) 20,0 % p.a.",
{"entities": [[46, 55, "RISIKOPROFIL"], [97, 103, "RENDITE"]]},
),
(
"European Core Logistics Fund (ECLF 1) 2021 Logistik Core / Euro/p Arat. 8 € 314 Mio. 12 (voll investiert) 7,50 % p.a.",
{"entities": [[9, 13, "RISIKOPROFIL"], [106, 112, "RENDITE"]]},
),
(
"P-Logistik Europa Fonds (ECLF 2) 2022 Logistik Core / Euro/p Arat. 8 € 150 Mio.? A (voll investiert) 6,5 % p.a.?",
{"entities": [[47, 51, "RISIKOPROFIL"], [101, 106, "RENDITE"]]},
),
(
"First Business Parks 2015 Light Industrial Value Add / D+AUT € 100 Mio. 6 (realisiert) 16,0 % p.a.",
{"entities": [[43, 52, "RISIKOPROFIL"], [87, 93, "RENDITE"]]},
),
(
"Unternehmensimmobilien Club 1 2016 Light Industrial Core+/D € 186 Mio. 9 (voll investiert) 13,0 % p.a.",
{"entities": [[91, 97, "RENDITE"]]},
),
(
"Unternehmensimmobilien Club 1 2016 Light Industrial Core+/D € 186 Mio. 9 (voll investiert) 13,0 % p.a.",
{"entities": [[52, 57, "RISIKOPROFIL"], [91, 97, "RENDITE"]]},
),
(
"Unternehmensimmobilien Club 2 2021 Light Industrial Core+/D € 262 Mio. 12 (voll investiert) 9,00 % p.a.",
{"entities": [[52, 57, "RISIKOPROFIL"], [92, 98, "RENDITE"]]},
),
(
"Individualmandat 2022 Light Industrial Value-Add / Nordics € 100 Mio. 5 (voll investiert) 18,0 % p.a.",
{"entities": [[39, 48, "RISIKOPROFIL"], [90, 96, "RENDITE"]]},
),
(
"EUROPEAN CORE LOGISTICS FUND 3",
{"entities": [[9, 13, "RISIKOPROFIL"]]},
),
(
"Core Investitionen",
{"entities": [[0, 4, "RISIKOPROFIL"]]},
),
(
"8 % IRR",
{"entities": [[0, 3, "RENDITE"]]},
),
(
"Rendite-Risiko-Profil Core ° Geographischer Fokus Kontinentaleuropaische Kernvolkswirtschaften nach Allokationsprofil * Sektoraler Fokus Logistikimmobilien nach Allokationsprofil Kapitalstruktur ° Eigenkapital € 250 Mio. ° Fremdkapital 50 % angestrebt, max. 60 % der Immobilienwerte (Objektebene) °e Mindestzeichnung € 10 Mio. Vehikelstruktur ° Rechtsform Immobilien-Spezial-AlF mit festen Anlagebedingungen nach 3 284 KAGB ° Klassifikation Artikel 8 Offenlegungsverordnung ¢ Anlagehorizont 10 Jahre mit Verlängerungsoption um 2 Jahre! ° Geplante Auflage 01 2025 Performanceziel? ° Ausschüttung 6,0 % p.a. (Durchschnitt 10 Jahre Haltedauer) ° Interner Zinsfuß (IRR) 8,0 % p.a. (10 Jahre Haltedauer, Target-IRR)",
{"entities": [[22, 26, "RISIKOPROFIL"], [596, 601, "AUSSCHÜTTUNGSRENDITE"], [667, 672, "RENDITE"]]},
),
(
"Core/Core+, mit Cash-Flow-Stabilität",
{"entities": [[0, 10, "RISIKOPROFIL"]]},
),
(
"Zielausschüttung: min. 5,10%",
{"entities": [[24, 29, "ZIELAUSSCHÜTTUNG"]]},
),
(
"Zielrendite (IRR): min. 5,50%",
{"entities": [[24, 29, "ZIELRENDITE"]]},
),
(
"Rewe & Lidl Maxhütte-Haidhof é ae: 6 s Bahnhof Ankermieter REWE & Lidl er WALT 20 und 17 Jahre Miete p.a. 1.127.916 € Kaufpreis 21,43 Mio. € Faktor 19,00 x LTV / Zins 80% / 4,0% Ausschüttung 5,7 % IRR 7,1%",
{"entities": [[193, 198, "AUSSCHÜTTUNGSRENDITE"], [203, 207, "ZIELRENDITE"]]},
),
(
"Real Estate Prime Europe Access the Core of European Prime Cities with a green SRI fund including a genuine low carbon commitment",
{"entities": [[36, 40, "RISIKOPROFIL"]]},
),
(
"(FR, UK, DE, BE, NL, LU, Nordics, Allocation SP, IT, CH)",
{"entities": [[1, 32, "LÄNDERALLOKATION"], [45, 55, "LÄNDERALLOKATION"]]},
),
(
"IRR: 6% - 7%",
{"entities": [[5, 12, "RENDITE"]]},
),
(
"Europe | Germany 67 Value Add",
{"entities": [[9, 16, "LÄNDERALLOKATION"], [20, 29, "RISIKOPROFIL"]]},
),
(
"Germany, Norway 336 Core Plus",
{"entities": [[0, 7, "LÄNDERALLOKATION"], [20, 29, "RISIKOPROFIL"]]},
),
(
"UK",
{"entities": [[0, 2, "LÄNDERALLOKATION"]]},
),
(
"NORWAY",
{"entities": [[0, 6, "LÄNDERALLOKATION"]]},
),
(
"9.8% IRR",
{"entities": [[0, 4, "RENDITE"]]},
),
(
"Investment volume down 52% to €2.3 billion, with 4,000 100 14% value-add and core-plus increasing YoY",
{"entities": [[63, 86, "RISIKOPROFIL"]]},
),
(
"Geared Gross IRR seeking a range of 16-18% per annum",
{"entities": [[37, 43, "RENDITE"]]},
),
(
"Open-ended fund 24 months, incl. rolling reinvestment Sale of individual assets with respective management contracts or geared leases IRR: >6.5% | CoC: >5.0%",
{"entities": [[0, 10, "LAUFZEIT"], [139, 144, "RENDITE"]]},
),
(
"Our investment strategy focuses on investing in upscale hotels in European prime locations, including DACH, Italy, Spain, Portugal, France, UK, Denmark, Benelux,and Poland.",
{"entities": [[102, 171, "LÄNDERALLOKATION"]]},
),
(
"Core+ assets with value-add potential, Emerging Gateway Cities Helsinki] Core+ with Value well-mitigated risk and great upside Potential potential through asset improvement or = Max. 20% UK & Ireland {no contract renegotiation currency risk hedging], 80% tinental E > IRR target of 6-9%",
{"entities": [[0, 5, "RISIKOPROFIL"], [282, 286, "RENDITE"]]},
),
(
"10% net IRR since inception in 2018?",
{"entities": [[0, 3, "RENDITE"]]},
),
(
"Eurozone: Benelux, France and Germany",
{"entities": [[10, 37, "LÄNDERALLOKATION"]]},
),
(
"Open-ended, with quarterly liquidity (redemption rights, dual pricing)",
{"entities": [[0, 10, "LAUFZEIT"]]},
),
(
"Class A & B (Institutional): 0.93% on NAV; Class D (Wholesale): 1.80% on NAV; Class P (Wholesale): 1.25% on NAV",
{"entities": [[29, 34, "MANAGMENTGEBÜHREN"], [64, 69, "MANAGMENTGEBÜHREN"], [99, 104, "MANAGMENTGEBÜHREN"]]},
),
(
"Risk profile: favour core > © at least and core+ assets with a targeted N 2 n allocation to value add assets to enhance returns",
{"entities": [[21, 25, "RISIKOPROFIL"], [43, 48, "RISIKOPROFIL"]]},
),
(
"The Netherlands (38 assets) = Germany (9 assets) 10 largest Country assets split France (8 assets)",
{"entities": [[0, 15, "LÄNDERALLOKATION"], [30, 37, "LÄNDERALLOKATION"], [81, 87, "LÄNDERALLOKATION"]]},
),
(
"Expected IRR 10.9%",
{"entities": [[13, 18, "ZIELRENDITE"]]},
),
(
"Structure Open-end, perpetual life, Luxembourg domiciled Initial Target Size* €2 billion 6-8% total return,",
{"entities": [[10, 18, "LAUFZEIT"], [89, 93, "RENDITE"]]},
),
(
"Geographic Focus: UK, Ireland, Iberia, Nordics, Netherlands, Germany, France, Italy",
{"entities": [[18, 83, "LÄNDERALLOKATION"]]},
),
(
"IRR of 13-14%",
{"entities": [[7, 13, "RENDITE"]]},
),
(
"Value-add",
{"entities": [[0, 9, "RISIKOPROFIL"]]},
),
(
"Geographic allocation NORDICS UNITED KINGDOM GERMANY FRANCE PORTUGAL BENELUX",
{"entities": [[22, 76, "LÄNDERALLOKATION"]]},
),
(
"Strong track record delivering a 17% net IRR, 1.7x net multiple across all divested assets (both discretionary and non-discretionary mandates)",
{"entities": [[33, 36, "RENDITE"]]},
),
(
"Targeting a 7-8% net annual return and a 3-4% dividend yield, reflecting a target LTV of 35% (capped at 37.5%)",
{"entities": [[12, 16, "RENDITE"]]},
),
(
"Sweden Norway Denmark Finland",
{"entities": [[0, 29, "LÄNDERALLOKATION"]]},
),
(
"Logistics Residential Office Other",
{"entities": [[0, 34, "SEKTORENALLOKATION"]]},
),
(
"Fund Term Open-ended with an initial 24-month lock-in for new investors",
{"entities": [[10, 20, "LAUFZEIT"]]},
),
(
"Management fee of 85 bps on NAV.",
{"entities": [[18, 24, "MANAGMENTGEBÜHREN"]]},
),
(
"Core/Core+ strategy, with tactical exposure to development projects aiming at enhancing the quality of the portfolio over time",
{"entities": [[0, 10, "RISIKOPROFIL"]]},
),
(
"Fund term: Open-ended",
{"entities": [[11, 21, "LAUFZEIT"]]},
),
(
"Return targets: The fund targets a net internal rate of return (IRR) of 8% and a net annual income yield of 5% with planned quarterly distributions.",
{"entities": [[72, 74, "RENDITE"]]},
),
(
"Geographic scope: The fund has a broad mandate to invest in commercial and residential real estate across Sweden, Denmark, Finland, and Norway. 50% LTV Asset selection: Heirs to acquire high-quality, income-generating properties in major Nordic cities and enhance their value through active asset management. Portfolio construction: The goal is to build diversified portfolios that are appealing to core buyers upon exit.",
{"entities": [[106, 142, "LÄNDERALLOKATION"]]},
),
(
"Experience: Since 2012, | | has demonstrated its capability to build diversified and resilient portfolios for its core-plus funds. German Real Estate Quota advantage . Local expertise: extensive local relationships and proprietary deal flow in key Nordic markets provide a strategic advantage.",
{"entities": [[114, 123, "RISIKOPROFIL"]]},
),
(
"Target returns: 8% net IRR with 5% net annual income yield! * Geographic focus: Sweden, Denmark, Norway and Finland « Target leverage: 50% LTV (excluding short-term borrowing) « Sector exposure: office, logistics, public properties, retail (focused on grocery anchored and necessity driven retail) and residentials « Investment focus: high quality properties,",
{"entities": [[16, 18, "RENDITE"], [80, 115, "LÄNDERALLOKATION"], [195, 239, "SEKTORENALLOKATION"]]},
),
(
"The Fund 2 xemoours common limited partnership (SCS) (SICAV-RAIF) Investment Objective To pursue investments in commercial and residential properties throughout the Nordic Region Fund Target Size €300 million (equity) Return Targets Target net IRR of 8%, target net annual income yield of 5%",
{"entities": [[251, 253, "RENDITE"]]},
)
]

View File

@ -1,5 +1,8 @@
import os
from pathlib import Path
import spacy
from spacy.cli.train import train
from spacy.tokens import DocBin
@ -29,3 +32,9 @@ for text, annot in tqdm(TRAINING_DATA):
# save the DocBin object
os.makedirs("./data", exist_ok=True)
db.to_disk("./data/train.spacy")
config_path = Path("config.cfg")
output_path = Path("output")
print("Starte Training...")
train(config_path, output_path)

View File

@ -0,0 +1 @@
COORDINATOR_URL=""

View File

@ -0,0 +1,97 @@
# Validate Service
Ein Flask-basierter Microservice zur asynchronen Verarbeitung und Validierung von Entitäten aus zwei verschiedenen NLP-Services (SpaCy und Exxeta).
## Funktionsweise
Das Service empfängt für jede eindeutige ID zwei POST-Requests von verschiedenen Services:
1. **SpaCy Service** - sendet extrahierte Entitäten
2. **Exxeta Service** - sendet extrahierte Entitäten
Beim ersten Request werden die Daten zwischengespeichert. Beim zweiten Request startet die asynchrone Verarbeitung. Nach der Verarbeitung werden die Ergebnisse an einen nachgelagerten Service weitergeleitet.
## API Endpoints
### POST /validate
Empfängt Entitätsdaten von SpaCy oder Exxeta Services.
**Request Body:**
```json
{
"id": "pitch_book_id",
"service": "spacy|exxeta",
"entities": [
{
"label": "PERSON",
"entity": "Max Mustermann",
"page": 1
}
]
}
```
**Response:**
- **200**: Daten erfolgreich verarbeitet
- **400**: Fehlende oder ungültige Parameter
- **500**: Serverfehler
## Installation und Start
1. **Abhängigkeiten installieren:**
```bash
pip install -r requirements.txt
```
2. **Service starten:**
```bash
python app.py
```
Das Service läuft standardmäßig auf `http://localhost:5050`
## Konfiguration
Umgebungsvariablen in `config.py`:
- `COORDINATOR_URL`: URL des Koordinators
## Verarbeitungslogik
1. **Zwischenspeicherung**: Beim ersten Request wird das JSON in einem Thread-sicheren Dictionary gespeichert
2. **Trigger**: Beim zweiten Request wird die asynchrone Verarbeitung gestartet
3. **Merge & Validate**: Die `merge_and_validate_entities` Funktion führt die Validierung durch:
- Normalisiert Entitäten (entfernt Zeilenumbrüche, konvertiert zu lowercase)
- Matched Entitäten basierend auf Label, normalisiertem Text und Seitenzahl
- Kennzeichnet Entitäten als "validated" (beide Services) oder "single-source"
4. **Weiterleitung**: Ergebnisse werden an den nächsten Service gesendet
5. **Cleanup**: Verarbeitete Daten werden aus dem Speicher entfernt
## Architektur
```
┌─────────────────┐ ┌─────────────────┐
│ SpaCy Service │ │ Exxeta Service │
└─────────┬───────┘ └─────────┬───────┘
│ │
│ POST /validate │ POST /validate
│ (service_type:spacy) │ (service_type:exxeta)
▼ ▼
┌─────────────────────────────────────┐
│ Validate Service │
│ ┌─────────────────────────────┐ │
│ │ Zwischenspeicher │ │
│ │ (Thread-safe Dictionary) │ │
│ └─────────────────────────────┘ │
│ ┌─────────────────────────────┐ │
│ │ Asynchrone Verarbeitung │ │
│ │ (merge_and_validate_entities)│ │
│ └─────────────────────────────┘ │
└─────────────┬───────────────────────┘
│ POST (processed data)
┌─────────────────────────────┐
│ Nachgelagerter Service │
└─────────────────────────────┘
```

View File

@ -0,0 +1,130 @@
from flask import Flask, request, jsonify
import threading
from merge_logic import merge_entities
from validate_logic import validate_entities
from dotenv import load_dotenv
import os
import requests
import json
app = Flask(__name__)
load_dotenv()
coordinator_url = os.getenv("COORDINATOR_URL") or ""
# todo add persistence layer
data_storage = {} # {id: {spacy_data: [], exxeta_data: []}}
storage_lock = threading.Lock()
def send_to_coordinator_service(processed_data, request_id):
if not coordinator_url:
print("Not processed, missing url", processed_data)
return
try:
payload = {
"kpi": json.dumps(processed_data),
}
requests.put(
"http://" + coordinator_url + "/api/pitch_book/" + str(request_id),
data=payload,
)
print(f"Result PitchBook {request_id} sent to coordinator")
except Exception as e:
print(f"Error sending ID {request_id}: {e}")
def process_data_async(request_id, spacy_data, exxeta_data):
try:
print(f"Start asynchronous processing for PitchBook: {request_id}")
# Perform merge
merged_entities = merge_entities(spacy_data, exxeta_data)
valid_entities = validate_entities(merged_entities)
# Send result to next service
send_to_coordinator_service(valid_entities, request_id)
# Remove processed data from storage
with storage_lock:
if request_id in data_storage:
del data_storage[request_id]
except Exception as e:
print(f"Error during asynchronous processing for ID {request_id}: {e}")
@app.route("/validate", methods=["POST"])
def validate():
try:
json_data = request.get_json()
if not json_data:
return jsonify({"error": "Missing JSON data"}), 400
# extract ID and service_type from the data
request_id = json_data.get("id")
service_type = json_data.get("service") # 'spacy' or 'exxeta'
entities = json_data.get("entities", [])
if not request_id or not service_type:
return jsonify({"error": "ID and service_type are required"}), 400
if service_type not in ["spacy", "exxeta"]:
return jsonify({"error": "service_type has to be 'spacy' or 'exxeta'"}), 400
with storage_lock:
# Initialize entry if not already present
if request_id not in data_storage:
data_storage[request_id] = {
"spacy_data": None,
"exxeta_data": None,
}
# Store the data based on the service type
data_storage[request_id][f"{service_type}_data"] = entities
# Check if both datasets are present
stored_data = data_storage[request_id]
spacy_data = stored_data["spacy_data"]
exxeta_data = stored_data["exxeta_data"]
# If both datasets are present, start asynchronous processing
if spacy_data is not None and exxeta_data is not None:
# Start asynchronous processing in a separate thread
processing_thread = threading.Thread(
target=process_data_async,
args=(request_id, spacy_data, exxeta_data),
daemon=True,
)
processing_thread.start()
return (
jsonify(
{
"message": f"Second dataset for ID {request_id} received. Processing started.",
}
),
200,
)
else:
return (
jsonify(
{
"message": f"First dataset for ID {request_id} from {service_type} stored. Waiting for second dataset.",
}
),
200,
)
except Exception as e:
print(f"Error occurred: {str(e)}")
return jsonify({"error": f"Fehler: {str(e)}"}), 500
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=5050)

View File

@ -0,0 +1,68 @@
def normalize_entity(entity_str):
if not entity_str:
return ""
normalized = entity_str.replace("\n", " ")
normalized = "".join(normalized.lower().split())
return normalized
def merge_entities(spacy_data, exxeta_data):
merged = []
seen = set()
# Process SpaCy entities first
for s in spacy_data:
s_entity_norm = normalize_entity(s["entity"])
s_page = s["page"]
# Look for matching Exxeta entities
found = False
for e in exxeta_data:
e_entity_norm = normalize_entity(e["entity"])
e_page = e["page"]
# Match if normalized entity and page match
if (
s["label"] == e["label"]
and s_entity_norm == e_entity_norm
and s_page == e_page
):
merged.append(
{
"label": s["label"],
"entity": s["entity"],
"page": s_page,
"status": "validated",
}
)
seen.add((e["entity"], e_page))
found = True
break
# If no match found, add as single-source
if not found:
merged.append(
{
"label": s["label"],
"entity": s["entity"],
"page": s_page,
"status": "single-source",
"source": "spacy",
}
)
# Add remaining Exxeta entities not already processed
for e in exxeta_data:
if (e["entity"], e["page"]) not in seen:
merged.append(
{
"label": e["label"],
"entity": e["entity"],
"page": e["page"],
"status": "single-source",
"source": "exxeta",
}
)
return merged

View File

@ -0,0 +1,14 @@
blinker==1.9.0
certifi==2025.4.26
charset-normalizer==3.4.2
click==8.2.1
dotenv==0.9.9
Flask==3.1.1
idna==3.10
itsdangerous==2.2.0
Jinja2==3.1.6
MarkupSafe==3.0.2
python-dotenv==1.1.0
requests==2.32.3
urllib3==2.4.0
Werkzeug==3.1.3

View File

@ -0,0 +1,12 @@
def validate_entities(entities):
return entities
#todo
valid = []
for entity in entities:
if entity["type"] == "PERSON":
if entity["name"] == "John Doe":
valid.append(entity)
elif entity["type"] == "ORG":
if entity["name"] == "Exxeta":
valid.append(entity)
return valid

View File

@ -19,18 +19,18 @@ services:
coordinator:
build:
context: backend/coordinator
dockerfile: ../../Dockerfile
env_file:
- .env
depends_on:
- db
db:
condition: service_healthy
healthcheck:
test: wget --spider --no-verbose http://127.0.0.1:5000/health || exit 1
interval: 10s
timeout: 5s
retries: 5
ports:
- 5000:5000
- 5050:5000
spacy:
build:
@ -42,3 +42,11 @@ services:
dockerfile: ../../Dockerfile
env_file:
- .env
validate:
build:
context: backend/validate-service
dockerfile: ../../Dockerfile
env_file:
- .env
ports:
- 5051:5000

View File

@ -12,7 +12,7 @@
},
"formatter": {
"enabled": true,
"indentStyle": "space"
"indentStyle": "tab"
},
"organizeImports": {
"enabled": true

Binary file not shown.

View File

@ -20,4 +20,4 @@ server {
location = /50x.html {
root /usr/share/nginx/html;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,8 @@
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-material-file-upload": "^0.0.4",
"react-pdf": "^9.2.1"
"react-pdf": "^8.0.2",
"socket.io-client": "^4.8.1"
},
"devDependencies": {
"@biomejs/biome": "1.9.4",

View File

@ -0,0 +1,33 @@
import Box from "@mui/material/Box";
import CircularProgress, {
type CircularProgressProps,
} from "@mui/material/CircularProgress";
import Typography from "@mui/material/Typography";
export function CircularProgressWithLabel(
props: CircularProgressProps & { value: number },
) {
return (
<Box sx={{ position: "relative", display: "inline-flex" }}>
<CircularProgress {...props} />
<Box
sx={{
top: 0,
left: 0,
bottom: 0,
right: 0,
position: "absolute",
display: "flex",
alignItems: "center",
justifyContent: "center",
}}
>
<Typography
variant="subtitle1"
component="div"
sx={{ color: "inherit" }}
>{`${Math.round(props.value)}%`}</Typography>
</Box>
</Box>
);
}

View File

@ -1,100 +1,178 @@
import { useState } from 'react'
import FileUpload from 'react-material-file-upload'
import {Box, Button, IconButton, Paper} from '@mui/material'
import { useNavigate } from '@tanstack/react-router'
import SettingsIcon from '@mui/icons-material/Settings';
import SettingsIcon from "@mui/icons-material/Settings";
import { Backdrop, Box, Button, IconButton, Paper } from "@mui/material";
import { useNavigate } from "@tanstack/react-router";
import { useCallback, useEffect, useState } from "react";
import FileUpload from "react-material-file-upload";
import { socket } from "../socket";
import { CircularProgressWithLabel } from "./CircularProgressWithLabel";
const PROGRESS = false;
export default function UploadPage() {
const [files, setFiles] = useState<File[]>([])
const fileTypes = ["pdf"];
const navigate = useNavigate()
const [files, setFiles] = useState<File[]>([]);
const [pageId, setPageId] = useState<string | null>(null);
const [loadingState, setLoadingState] = useState<number | null>(null);
const fileTypes = ["pdf"];
const navigate = useNavigate();
return (
<Box
display="flex"
flexDirection="column"
alignItems="center"
justifyContent="center"
height="100vh"
bgcolor="white"
>
<Box
width="100%"
maxWidth="1300px"
display="flex"
justifyContent="flex-end"
px={2}
>
<IconButton onClick={() => navigate({ to: '/config' })}>
<SettingsIcon fontSize="large"/>
</IconButton>
</Box>
<Paper
elevation={3}
sx={{
width: 900,
height: 500,
backgroundColor: '#eeeeee',
borderRadius: 4,
display: 'flex',
justifyContent: 'center',
alignItems: 'center',
}}
>
<Box sx={{
height: '100%',
width: '100%',
maxWidth: '100%',
margin: '0px',
padding: '0px',
'& .MuiBox-root': {
display: 'flex',
flexDirection: 'column',
alignItems: 'center',
justifyContent: 'center',
border: 'none',
textAlign: 'center',
},
}}>
<FileUpload
value={files}
onChange={setFiles}
accept={`.${fileTypes.join(', .')}`}
title="Hier Dokument hinziehen"
buttonText="Datei auswählen"
sx={{
height: '100%',
width: '100%',
padding: '0px',
'& svg': {
color: '#9e9e9e',
},
'& .MuiOutlinedInput-notchedOutline': {
border: 'none',
},
'& .MuiButton-root': {
backgroundColor: '#9e9e9e',
},
'& .MuiTypography-root': {
fontSize: '1.25rem',
fontWeight: 500,
marginBottom: 1,
},
}}
/>
</Box>
</Paper>
<Button
variant="contained"
sx={{
mt: 4,
backgroundColor: '#383838',
}}
disabled={files.length === 0}
onClick={() => navigate({ to: '/extractedResult' })}
>
Kennzahlen extrahieren
</Button>
</Box>
)
}
const uploadFile = useCallback(async () => {
const formData = new FormData();
formData.append("file", files[0]);
const response = await fetch("http://localhost:5050/api/pitch_book", {
method: "POST",
body: formData,
});
if (response.ok) {
console.log("File uploaded successfully");
const data = await response.json();
console.log(data);
setPageId(data.id);
setLoadingState(0);
!PROGRESS &&
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook: data.id },
});
} else {
console.error("Failed to upload file");
}
}, [files, navigate]);
const onConnection = useCallback(() => {
console.log("connected");
}, []);
const onProgress = useCallback(
(progress: { id: number; progress: number }) => {
console.log("Progress:", progress);
console.log(pageId);
if (Number(pageId) === progress.id) {
setLoadingState(progress.progress);
if (progress.progress === 100) {
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook: progress.id.toString() },
});
}
}
},
[pageId, navigate],
);
useEffect(() => {
socket.on("connect", onConnection);
socket.on("progress", onProgress);
return () => {
socket.off("connect", onConnection);
socket.off("progress", onProgress);
};
}, [onConnection, onProgress]);
return (
<>
{PROGRESS && (
<Backdrop
sx={(theme) => ({ color: "#fff", zIndex: theme.zIndex.drawer + 1 })}
open={pageId !== null && loadingState !== null}
>
<CircularProgressWithLabel
color="inherit"
value={loadingState || 0}
size={60}
/>
</Backdrop>
)}
<Box
display="flex"
flexDirection="column"
alignItems="center"
justifyContent="center"
height="100vh"
bgcolor="white"
>
<Box
width="100%"
maxWidth="1300px"
display="flex"
justifyContent="flex-end"
px={2}
>
<IconButton onClick={() => navigate({ to: "/config" })}>
<SettingsIcon fontSize="large" />
</IconButton>
</Box>
<Paper
elevation={3}
sx={{
width: 900,
height: 500,
backgroundColor: "#eeeeee",
borderRadius: 4,
display: "flex",
justifyContent: "center",
alignItems: "center",
}}
>
<Box
sx={{
height: "100%",
width: "100%",
maxWidth: "100%",
margin: "0px",
padding: "0px",
"& .MuiBox-root": {
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
border: "none",
textAlign: "center",
},
}}
>
<FileUpload
value={files}
onChange={setFiles}
accept={`.${fileTypes.join(", .")}`}
title="Hier Dokument hinziehen"
buttonText="Datei auswählen"
sx={{
height: "100%",
width: "100%",
padding: "0px",
"& svg": {
color: "#9e9e9e",
},
"& .MuiOutlinedInput-notchedOutline": {
border: "none",
},
"& .MuiButton-root": {
backgroundColor: "#9e9e9e",
},
"& .MuiTypography-root": {
fontSize: "1.25rem",
fontWeight: 500,
marginBottom: 1,
},
}}
/>
</Box>
</Paper>
<Button
variant="contained"
sx={{
mt: 4,
backgroundColor: "#383838",
}}
disabled={files.length === 0}
onClick={uploadFile}
>
Kennzahlen extrahieren
</Button>
</Box>
</>
);
}

View File

@ -1,3 +1,4 @@
<<<<<<< HEAD
import { Document, Page, pdfjs } from "react-pdf";
import { useState, useRef, useEffect, useCallback } from 'react';
import 'react-pdf/dist/esm/Page/AnnotationLayer.css';
@ -23,13 +24,42 @@ type Props = {
const [numPages, setNumPages] = useState<number | null>(null);
const [pageNumber, setPageNumber] = useState(1);
const [containerWidth, setContainerWidth] = useState<number | null>(null);
=======
import { useEffect, useRef, useState } from "react";
import { Document, Page } from "react-pdf";
import "react-pdf/dist/esm/Page/AnnotationLayer.css";
import "react-pdf/dist/esm/Page/TextLayer.css";
import ArrowCircleLeftIcon from "@mui/icons-material/ArrowCircleLeft";
import ArrowCircleRightIcon from "@mui/icons-material/ArrowCircleRight";
import { Box, IconButton } from "@mui/material";
const containerRef = useRef<HTMLDivElement>(null);
interface PDFViewerProps {
pitchBookId: string;
}
export default function PDFViewer({ pitchBookId }: PDFViewerProps) {
const [numPages, setNumPages] = useState<number | null>(null);
const [pageNumber, setPageNumber] = useState(1);
const [containerWidth, setContainerWidth] = useState<number | null>(null);
const containerRef = useRef<HTMLDivElement>(null);
const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
setNumPages(numPages);
};
const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
setNumPages(numPages);
};
>>>>>>> origin/main
useEffect(() => {
const updateWidth = () => {
if (containerRef.current) {
setContainerWidth(containerRef.current.offsetWidth);
}
};
updateWidth();
window.addEventListener("resize", updateWidth);
return () => window.removeEventListener("resize", updateWidth);
}, []);
<<<<<<< HEAD
const [highlightLabels, setHighlightLabels] = useState<string[]>([]);
// Funktion zum Einfärben von Text
@ -125,4 +155,66 @@ type Props = {
</Box>
</Box>
);
}
}
=======
return (
<Box
display="flex"
flexDirection="column"
justifyContent="center"
alignItems="center"
width="100%"
height="100%"
p={2}
>
<Box
ref={containerRef}
sx={{
width: "100%",
maxHeight: "90vh",
overflow: "auto",
display: "flex",
justifyContent: "center",
alignItems: "center",
}}
>
<Document
file={`http://localhost:5050/api/pitch_book/${pitchBookId}/download`}
onLoadSuccess={onDocumentLoadSuccess}
onLoadError={(error) =>
console.error("Es gab ein Fehler beim Laden des PDFs:", error)
}
onSourceError={(error) => console.error("Ungültige PDF:", error)}
>
{containerWidth && (
<Page pageNumber={pageNumber} width={containerWidth * 0.8} />
)}
</Document>
</Box>
<Box
mt={2}
display="flex"
alignItems="center"
justifyContent="center"
gap={1}
>
<IconButton
disabled={pageNumber <= 1}
onClick={() => setPageNumber((p) => p - 1)}
>
<ArrowCircleLeftIcon fontSize="large" />
</IconButton>
<span>
{pageNumber} / {numPages}
</span>
<IconButton
disabled={pageNumber >= (numPages || 1)}
onClick={() => setPageNumber((p) => p + 1)}
>
<ArrowCircleRightIcon fontSize="large" />
</IconButton>
</Box>
</Box>
);
}
>>>>>>> origin/main

View File

@ -34,9 +34,8 @@ declare module "@tanstack/react-router" {
}
}
// Initialize PDF.js worker
pdfjs.GlobalWorkerOptions.workerSrc = new URL(
"pdfjs-dist/build/pdf.worker.min.mjs",
"pdfjs-dist/build/pdf.worker.min.js",
import.meta.url,
).toString();

View File

@ -11,18 +11,12 @@
// Import Routes
import { Route as rootRoute } from './routes/__root'
import { Route as ExtractedResultImport } from './routes/extractedResult'
import { Route as ConfigImport } from './routes/config'
import { Route as IndexImport } from './routes/index'
import { Route as ExtractedResultPitchBookImport } from './routes/extractedResult.$pitchBook'
// Create/Update Routes
const ExtractedResultRoute = ExtractedResultImport.update({
id: '/extractedResult',
path: '/extractedResult',
getParentRoute: () => rootRoute,
} as any)
const ConfigRoute = ConfigImport.update({
id: '/config',
path: '/config',
@ -35,6 +29,12 @@ const IndexRoute = IndexImport.update({
getParentRoute: () => rootRoute,
} as any)
const ExtractedResultPitchBookRoute = ExtractedResultPitchBookImport.update({
id: '/extractedResult/$pitchBook',
path: '/extractedResult/$pitchBook',
getParentRoute: () => rootRoute,
} as any)
// Populate the FileRoutesByPath interface
declare module '@tanstack/react-router' {
@ -53,11 +53,11 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof ConfigImport
parentRoute: typeof rootRoute
}
'/extractedResult': {
id: '/extractedResult'
path: '/extractedResult'
fullPath: '/extractedResult'
preLoaderRoute: typeof ExtractedResultImport
'/extractedResult/$pitchBook': {
id: '/extractedResult/$pitchBook'
path: '/extractedResult/$pitchBook'
fullPath: '/extractedResult/$pitchBook'
preLoaderRoute: typeof ExtractedResultPitchBookImport
parentRoute: typeof rootRoute
}
}
@ -68,41 +68,41 @@ declare module '@tanstack/react-router' {
export interface FileRoutesByFullPath {
'/': typeof IndexRoute
'/config': typeof ConfigRoute
'/extractedResult': typeof ExtractedResultRoute
'/extractedResult/$pitchBook': typeof ExtractedResultPitchBookRoute
}
export interface FileRoutesByTo {
'/': typeof IndexRoute
'/config': typeof ConfigRoute
'/extractedResult': typeof ExtractedResultRoute
'/extractedResult/$pitchBook': typeof ExtractedResultPitchBookRoute
}
export interface FileRoutesById {
__root__: typeof rootRoute
'/': typeof IndexRoute
'/config': typeof ConfigRoute
'/extractedResult': typeof ExtractedResultRoute
'/extractedResult/$pitchBook': typeof ExtractedResultPitchBookRoute
}
export interface FileRouteTypes {
fileRoutesByFullPath: FileRoutesByFullPath
fullPaths: '/' | '/config' | '/extractedResult'
fullPaths: '/' | '/config' | '/extractedResult/$pitchBook'
fileRoutesByTo: FileRoutesByTo
to: '/' | '/config' | '/extractedResult'
id: '__root__' | '/' | '/config' | '/extractedResult'
to: '/' | '/config' | '/extractedResult/$pitchBook'
id: '__root__' | '/' | '/config' | '/extractedResult/$pitchBook'
fileRoutesById: FileRoutesById
}
export interface RootRouteChildren {
IndexRoute: typeof IndexRoute
ConfigRoute: typeof ConfigRoute
ExtractedResultRoute: typeof ExtractedResultRoute
ExtractedResultPitchBookRoute: typeof ExtractedResultPitchBookRoute
}
const rootRouteChildren: RootRouteChildren = {
IndexRoute: IndexRoute,
ConfigRoute: ConfigRoute,
ExtractedResultRoute: ExtractedResultRoute,
ExtractedResultPitchBookRoute: ExtractedResultPitchBookRoute,
}
export const routeTree = rootRoute
@ -117,7 +117,7 @@ export const routeTree = rootRoute
"children": [
"/",
"/config",
"/extractedResult"
"/extractedResult/$pitchBook"
]
},
"/": {
@ -126,8 +126,8 @@ export const routeTree = rootRoute
"/config": {
"filePath": "config.tsx"
},
"/extractedResult": {
"filePath": "extractedResult.tsx"
"/extractedResult/$pitchBook": {
"filePath": "extractedResult.$pitchBook.tsx"
}
}
}

View File

@ -0,0 +1,100 @@
import ContentPasteIcon from "@mui/icons-material/ContentPaste";
import { Box, Button, Paper, Typography } from "@mui/material";
import { createFileRoute, useNavigate } from "@tanstack/react-router";
import KennzahlenTable from "../components/KennzahlenTable";
import PDFViewer from "../components/pdfViewer";
export const Route = createFileRoute("/extractedResult/$pitchBook")({
component: ExtractedResultsPage,
});
function ExtractedResultsPage() {
const { pitchBook } = Route.useParams();
const navigate = useNavigate();
const status: "green" | "yellow" | "red" = "red";
const statusColor = {
red: "#f43131",
yellow: "#f6ed48",
green: "#3fd942",
}[status];
return (
<Box p={4}>
<Box display="flex" alignItems="center" gap={3}>
<Box
sx={{
width: 45,
height: 45,
borderRadius: "50%",
backgroundColor: statusColor,
top: 32,
left: 32,
}}
/>
<Typography variant="h5" gutterBottom>
Kennzahlen extrahiert aus: <br />
<strong>FONDSNAME: TODO</strong>
</Typography>
</Box>
<Box
display="flex"
gap={4}
sx={{
width: "100vw",
maxWidth: "100%",
height: "80vh",
mt: 4,
}}
>
<Paper
elevation={2}
sx={{
width: "45%",
height: "100%",
borderRadius: 2,
backgroundColor: "#eeeeee",
padding: 2,
overflow: "auto",
}}
>
<KennzahlenTable />
</Paper>
<Box
display="flex"
flexDirection="column"
justifyContent="space-between"
gap={5}
sx={{ width: "55%", height: "95%" }}
>
<Paper
elevation={2}
sx={{
height: "100%",
borderRadius: 2,
backgroundColor: "#eeeeee",
display: "flex",
alignItems: "center",
justifyContent: "center",
}}
>
<PDFViewer pitchBookId={pitchBook} />
</Paper>
<Box mt={2} display="flex" justifyContent="flex-end" gap={2}>
<Button variant="contained" sx={{ backgroundColor: "#383838" }}>
<ContentPasteIcon sx={{ fontSize: 18, mr: 1 }} />
Kennzahlenzeile kopieren
</Button>
<Button
variant="contained"
sx={{ backgroundColor: "#383838" }}
onClick={() => navigate({ to: "/" })}
>
Neu hochladen
</Button>
</Box>
</Box>
</Box>
</Box>
);
}

View File

@ -0,0 +1,6 @@
import { io } from "socket.io-client";
// "undefined" means the URL will be computed from the `window.location` object
// const URL = process.env.NODE_ENV === 'production' ? undefined : 'http://localhost:4000';
export const socket = io("http://localhost:5050");