Merge remote-tracking branch 'origin/main' into #76-Hinzufügen-einer-Seitenanzahl-bei-neuen-Kennzahlen

# Conflicts:
#	project/frontend/src/components/KennzahlenTable.tsx
pull/78/head
s8613 2025-06-20 06:35:00 +02:00
commit e3149e0aa4
19 changed files with 490 additions and 387 deletions

View File

@ -5,7 +5,6 @@ from dotenv import load_dotenv
from controller import register_routes from controller import register_routes
from model.database import init_db from model.database import init_db
from controller.socketIO import socketio from controller.socketIO import socketio
from controller.kennzahlen import kennzahlen_bp
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
@ -22,9 +21,6 @@ init_db(app)
register_routes(app) register_routes(app)
# Register blueprints
app.register_blueprint(kennzahlen_bp)
@app.route("/health") @app.route("/health")
def health_check(): def health_check():
return "OK" return "OK"

View File

@ -1,61 +0,0 @@
from flask import Blueprint, jsonify, request
from model.kennzahl import Kennzahl
from model.database import db
kennzahlen_bp = Blueprint('kennzahlen', __name__)
# Beispieldaten
EXAMPLE_DATA = [
{"pdf_id": "example", "label": "Fondsname", "value": "Fund Real Estate Prime Europe", "page": 1, "status": "ok"},
{"pdf_id": "example", "label": "Fondsmanager", "value": "", "page": 1, "status": "error"},
{"pdf_id": "example", "label": "Risikoprofil", "value": "Core/Core+", "page": 10, "status": "warning"},
{"pdf_id": "example", "label": "LTV", "value": "30-35 %", "page": 8, "status": "ok"},
{"pdf_id": "example", "label": "Ausschüttungsrendite", "value": "4%", "page": 34, "status": "ok"}
]
@kennzahlen_bp.route('/api/kennzahlen/init', methods=['POST'])
def init_kennzahlen():
try:
# Lösche existierende Beispieldaten
Kennzahl.query.filter_by(pdf_id='example').delete()
# Füge Beispieldaten ein
for data in EXAMPLE_DATA:
kennzahl = Kennzahl(
pdf_id=data['pdf_id'],
label=data['label'],
value=data['value'],
page=data['page'],
status=data['status']
)
db.session.add(kennzahl)
db.session.commit()
return jsonify({"message": "Kennzahlen erfolgreich initialisiert"})
except Exception as e:
db.session.rollback()
return jsonify({"error": str(e)}), 500
@kennzahlen_bp.route('/api/kennzahlen', methods=['GET'])
def get_kennzahlen():
pdf_id = request.args.get('pdf_id', 'example') # Default zu 'example' für Beispieldaten
kennzahlen = Kennzahl.query.filter_by(pdf_id=pdf_id).all()
return jsonify([k.to_dict() for k in kennzahlen])
@kennzahlen_bp.route('/api/kennzahlen/<label>', methods=['PUT'])
def update_kennzahl(label):
data = request.get_json()
pdf_id = request.args.get('pdf_id', 'example') # Default zu 'example' für Beispieldaten
kennzahl = Kennzahl.query.filter_by(pdf_id=pdf_id, label=label).first()
if not kennzahl:
return jsonify({'error': 'Kennzahl nicht gefunden'}), 404
kennzahl.value = data.get('value', kennzahl.value)
db.session.commit()
return jsonify(kennzahl.to_dict())

View File

@ -35,7 +35,7 @@ def create_kpi_setting():
"translation", "translation",
"example", "example",
"position", "position",
"active" "active",
] ]
for field in required_fields: for field in required_fields:
if field not in data: if field not in data:
@ -61,7 +61,7 @@ def create_kpi_setting():
translation=data["translation"], translation=data["translation"],
example=data["example"], example=data["example"],
position=data["position"], position=data["position"],
active=data["active"] active=data["active"],
) )
db.session.add(new_kpi_setting) db.session.add(new_kpi_setting)
@ -136,7 +136,12 @@ def update_kpi_positions():
try: try:
for update_item in data: for update_item in data:
if "id" not in update_item or "position" not in update_item: if "id" not in update_item or "position" not in update_item:
return jsonify({"error": "Each item must have 'id' and 'position' fields"}), 400 return (
jsonify(
{"error": "Each item must have 'id' and 'position' fields"}
),
400,
)
kpi_setting = KPISettingModel.query.get_or_404(update_item["id"]) kpi_setting = KPISettingModel.query.get_or_404(update_item["id"])
kpi_setting.position = update_item["position"] kpi_setting.position = update_item["position"]
@ -148,4 +153,4 @@ def update_kpi_positions():
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
return jsonify({"error": f"Failed to update positions: {str(e)}"}), 500 return jsonify({"error": f"Failed to update positions: {str(e)}"}), 500

View File

@ -17,48 +17,6 @@ OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:5051")
progress_per_id = {} # {id: {kpi: 0, pdf: 0}} progress_per_id = {} # {id: {kpi: 0, pdf: 0}}
storage_lock = threading.Lock() storage_lock = threading.Lock()
def process_pdf_async(app, file_id, file_data, filename):
with app.app_context():
try:
file_obj = BytesIO(file_data)
file_obj.name = filename
files = {"file": (filename, file_obj, "application/pdf")}
data = {"id": file_id}
response = requests.post(
f"{OCR_SERVICE_URL}/ocr", files=files, data=data, timeout=600
)
if response.status_code == 200:
response_data = response.json()
if "ocr_pdf" in response_data:
import base64
ocr_pdf_data = base64.b64decode(response_data["ocr_pdf"])
file_record = PitchBookModel.query.get(file_id)
if file_record:
file_record.file = ocr_pdf_data
db.session.commit()
print("[DEBUG] PDF updated in database:")
print("[DEBUG] - Successfully saved to database")
socketio.emit("progress", {"id": file_id, "progress": 50})
else:
socketio.emit(
"error", {"id": file_id, "message": "OCR processing failed"}
)
except Exception as e:
import traceback
traceback.print_exc()
socketio.emit(
"error", {"id": file_id, "message": f"Processing failed: {str(e)}"}
)
@pitch_book_controller.route("/", methods=["POST"]) @pitch_book_controller.route("/", methods=["POST"])
def upload_file(): def upload_file():
@ -88,6 +46,7 @@ def upload_file():
files = {"file": (uploaded_file.filename, file_data, "application/pdf")} files = {"file": (uploaded_file.filename, file_data, "application/pdf")}
data = {"id": new_file.id} data = {"id": new_file.id}
socketio.emit("progress", {"id": new_file.id, "progress": 5})
response = requests.post( response = requests.post(
f"{OCR_SERVICE_URL}/ocr", files=files, data=data, timeout=600 f"{OCR_SERVICE_URL}/ocr", files=files, data=data, timeout=600
) )

View File

@ -19,6 +19,6 @@ def progress():
): ):
return jsonify({"error": "Invalid progress value"}), 400 return jsonify({"error": "Invalid progress value"}), 400
socketio.emit("progress", {"id": data["id"], "progress": data["progress"]}) socketio.emit("progress", {"id": int(data["id"]), "progress": data["progress"]})
# Process the data and return a response # Process the data and return a response
return jsonify({"message": "Progress updated"}) return jsonify({"message": "Progress updated"})

View File

@ -14,4 +14,5 @@ def init_db(app):
with app.app_context(): with app.app_context():
db.create_all() db.create_all()
from model.seed_data import seed_default_kpi_settings from model.seed_data import seed_default_kpi_settings
seed_default_kpi_settings() seed_default_kpi_settings()

View File

@ -1,26 +0,0 @@
from .database import db
class Kennzahl(db.Model):
__tablename__ = 'kennzahlen'
id = db.Column(db.Integer, primary_key=True)
pdf_id = db.Column(db.String(100), nullable=False) # ID des PDFs
label = db.Column(db.String(100), nullable=False)
value = db.Column(db.String(100))
page = db.Column(db.Integer)
status = db.Column(db.String(20))
# Zusammengesetzter Unique-Constraint für pdf_id und label
__table_args__ = (
db.UniqueConstraint('pdf_id', 'label', name='unique_pdf_kennzahl'),
)
def to_dict(self):
return {
'pdf_id': self.pdf_id,
'label': self.label,
'value': self.value,
'page': self.page,
'status': self.status
}

View File

@ -38,10 +38,12 @@ class KPISettingModel(db.Model):
"translation": self.translation, "translation": self.translation,
"example": self.example, "example": self.example,
"position": self.position, "position": self.position,
"active": self.active "active": self.active,
} }
def __init__(self, name, description, mandatory, type, translation, example, position, active): def __init__(
self, name, description, mandatory, type, translation, example, position, active
):
self.name = name self.name = name
self.description = description self.description = description
self.mandatory = mandatory self.mandatory = mandatory

View File

@ -1,6 +1,7 @@
from model.database import db from model.database import db
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import LargeBinary from sqlalchemy import LargeBinary
from datetime import datetime
class PitchBookModel(db.Model): class PitchBookModel(db.Model):
@ -8,9 +9,15 @@ class PitchBookModel(db.Model):
filename: Mapped[str] = mapped_column() filename: Mapped[str] = mapped_column()
file: Mapped[bytes] = mapped_column(LargeBinary) file: Mapped[bytes] = mapped_column(LargeBinary)
kpi: Mapped[str | None] kpi: Mapped[str | None]
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
def to_dict(self): def to_dict(self):
return {"id": self.id, "filename": self.filename, "kpi": self.kpi} return {
"id": self.id,
"filename": self.filename,
"kpi": self.kpi,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
def __init__(self, filename, file): def __init__(self, filename, file):
self.filename = filename self.filename = filename

View File

@ -1,6 +1,7 @@
from model.database import db from model.database import db
from model.kpi_setting_model import KPISettingModel, KPISettingType from model.kpi_setting_model import KPISettingModel, KPISettingType
def seed_default_kpi_settings(): def seed_default_kpi_settings():
if KPISettingModel.query.first() is not None: if KPISettingModel.query.first() is not None:
print("KPI Settings bereits vorhanden, Seeding übersprungen") print("KPI Settings bereits vorhanden, Seeding übersprungen")
@ -15,7 +16,7 @@ def seed_default_kpi_settings():
"translation": "Fund Name", "translation": "Fund Name",
"example": "Alpha Real Estate Fund I", "example": "Alpha Real Estate Fund I",
"position": 1, "position": 1,
"active": True "active": True,
}, },
{ {
"name": "Fondsmanager", "name": "Fondsmanager",
@ -25,7 +26,7 @@ def seed_default_kpi_settings():
"translation": "Fund Manager", "translation": "Fund Manager",
"example": "Max Mustermann", "example": "Max Mustermann",
"position": 2, "position": 2,
"active": True "active": True,
}, },
{ {
"name": "AIFM", "name": "AIFM",
@ -35,7 +36,7 @@ def seed_default_kpi_settings():
"translation": "AIFM", "translation": "AIFM",
"example": "Alpha Investment Management GmbH", "example": "Alpha Investment Management GmbH",
"position": 3, "position": 3,
"active": True "active": True,
}, },
{ {
"name": "Datum", "name": "Datum",
@ -45,7 +46,7 @@ def seed_default_kpi_settings():
"translation": "Date", "translation": "Date",
"example": "05.05.2025", "example": "05.05.2025",
"position": 4, "position": 4,
"active": True "active": True,
}, },
{ {
"name": "Risikoprofil", "name": "Risikoprofil",
@ -55,7 +56,7 @@ def seed_default_kpi_settings():
"translation": "Risk Profile", "translation": "Risk Profile",
"example": "Core/Core++", "example": "Core/Core++",
"position": 5, "position": 5,
"active": True "active": True,
}, },
{ {
"name": "Artikel", "name": "Artikel",
@ -65,7 +66,7 @@ def seed_default_kpi_settings():
"translation": "Article", "translation": "Article",
"example": "Artikel 8", "example": "Artikel 8",
"position": 6, "position": 6,
"active": True "active": True,
}, },
{ {
"name": "Zielrendite", "name": "Zielrendite",
@ -75,7 +76,7 @@ def seed_default_kpi_settings():
"translation": "Target Return", "translation": "Target Return",
"example": "6.5", "example": "6.5",
"position": 7, "position": 7,
"active": True "active": True,
}, },
{ {
"name": "Rendite", "name": "Rendite",
@ -85,7 +86,7 @@ def seed_default_kpi_settings():
"translation": "Return", "translation": "Return",
"example": "5.8", "example": "5.8",
"position": 8, "position": 8,
"active": True "active": True,
}, },
{ {
"name": "Zielausschüttung", "name": "Zielausschüttung",
@ -95,7 +96,7 @@ def seed_default_kpi_settings():
"translation": "Target Distribution", "translation": "Target Distribution",
"example": "4.0", "example": "4.0",
"position": 9, "position": 9,
"active": True "active": True,
}, },
{ {
"name": "Ausschüttung", "name": "Ausschüttung",
@ -105,7 +106,7 @@ def seed_default_kpi_settings():
"translation": "Distribution", "translation": "Distribution",
"example": "3.8", "example": "3.8",
"position": 10, "position": 10,
"active": True "active": True,
}, },
{ {
"name": "Laufzeit", "name": "Laufzeit",
@ -115,7 +116,7 @@ def seed_default_kpi_settings():
"translation": "Duration", "translation": "Duration",
"example": "7 Jahre, 10, Evergreen", "example": "7 Jahre, 10, Evergreen",
"position": 11, "position": 11,
"active": True "active": True,
}, },
{ {
"name": "LTV", "name": "LTV",
@ -125,7 +126,7 @@ def seed_default_kpi_settings():
"translation": "LTV", "translation": "LTV",
"example": "65.0", "example": "65.0",
"position": 12, "position": 12,
"active": True "active": True,
}, },
{ {
"name": "Managementgebühren", "name": "Managementgebühren",
@ -135,7 +136,7 @@ def seed_default_kpi_settings():
"translation": "Management Fees", "translation": "Management Fees",
"example": "1.5", "example": "1.5",
"position": 13, "position": 13,
"active": True "active": True,
}, },
{ {
"name": "Sektorenallokation", "name": "Sektorenallokation",
@ -145,7 +146,7 @@ def seed_default_kpi_settings():
"translation": "Sector Allocation", "translation": "Sector Allocation",
"example": "Büro, Wohnen, Logistik, Studentenwohnen", "example": "Büro, Wohnen, Logistik, Studentenwohnen",
"position": 14, "position": 14,
"active": True "active": True,
}, },
{ {
"name": "Länderallokation", "name": "Länderallokation",
@ -155,8 +156,8 @@ def seed_default_kpi_settings():
"translation": "Country Allocation", "translation": "Country Allocation",
"example": "Deutschland,Frankreich, Österreich, Schweiz", "example": "Deutschland,Frankreich, Österreich, Schweiz",
"position": 15, "position": 15,
"active": True "active": True,
} },
] ]
print("Füge Standard KPI Settings hinzu...") print("Füge Standard KPI Settings hinzu...")
@ -170,15 +171,17 @@ def seed_default_kpi_settings():
translation=kpi_data["translation"], translation=kpi_data["translation"],
example=kpi_data["example"], example=kpi_data["example"],
position=kpi_data["position"], position=kpi_data["position"],
active=kpi_data["active"] active=kpi_data["active"],
) )
db.session.add(kpi_setting) db.session.add(kpi_setting)
try: try:
db.session.commit() db.session.commit()
print(f"Erfolgreich {len(default_kpi_settings)} Standard KPI Settings hinzugefügt") print(
f"Erfolgreich {len(default_kpi_settings)} Standard KPI Settings hinzugefügt"
)
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
print(f"Fehler beim Hinzufügen der Standard KPI Settings: {e}") print(f"Fehler beim Hinzufügen der Standard KPI Settings: {e}")
raise raise

View File

@ -15,7 +15,7 @@ def extract_text_from_ocr_json():
pitchbook_id = json_data["id"] pitchbook_id = json_data["id"]
pages_data = json_data["extracted_text_per_page"] pages_data = json_data["extracted_text_per_page"]
entities_json = extract_with_exxeta(pages_data) entities_json = extract_with_exxeta(pages_data, pitchbook_id)
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json
validate_payload = { validate_payload = {
@ -39,4 +39,4 @@ def extract_text_from_ocr_json():
if __name__ == "__main__": if __name__ == "__main__":
app.run(host="0.0.0.0", port=5053, debug=True) app.run(host="0.0.0.0", port=5053, debug=True)

View File

@ -9,6 +9,7 @@ MODEL = "gpt-4o-mini"
EXXETA_BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai" EXXETA_BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai"
load_dotenv() load_dotenv()
EXXETA_API_KEY = os.getenv("API_KEY") EXXETA_API_KEY = os.getenv("API_KEY")
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5050")
MAX_RETRIES = 3 MAX_RETRIES = 3
TIMEOUT = 180 TIMEOUT = 180
@ -16,14 +17,20 @@ TIMEOUT = 180
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def extract_with_exxeta(pages_json): def extract_with_exxeta(pages_json, pitchbook_id):
results = [] results = []
if not EXXETA_API_KEY: if not EXXETA_API_KEY:
logger.warning("EXXETA_API_KEY nicht gesetzt. Rückgabe eines leeren JSON.") logger.warning("EXXETA_API_KEY nicht gesetzt. Rückgabe eines leeren JSON.")
return json.dumps(results, indent=2, ensure_ascii=False) return json.dumps(results, indent=2, ensure_ascii=False)
i = 0
for page_data in pages_json: for page_data in pages_json:
i += 1
if i % 8 == 0:
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35 + 60/len(pages_json)*i})
page_num = page_data.get("page") page_num = page_data.get("page")
page_data.get("page") page_data.get("page")
text = page_data.get("text", "") text = page_data.get("text", "")
@ -57,7 +64,7 @@ def extract_with_exxeta(pages_json):
prompt = ( prompt = (
"Bitte extrahiere relevante Fondskennzahlen aus dem folgenden Pitchbook-Text. " "Bitte extrahiere relevante Fondskennzahlen aus dem folgenden Pitchbook-Text. "
"Analysiere den Text sorgfältig, um **nur exakt benannte und relevante Werte** zu extrahieren.\n\n" "Analysiere den Text sorgfältig, um **nur exakt benannte und relevante Werte** zu extrahieren.\n\n"
"ZU EXTRAHIERENDE KENNZAHLEN (immer exakt wie unten angegeben):\n" "ZU EXTRAHIERENDE KENNZAHLEN (immer exakt wie unten angegeben):\n"
"- FONDSNAME\n" "- FONDSNAME\n"
"- FONDSMANAGER\n" "- FONDSMANAGER\n"
@ -74,14 +81,14 @@ def extract_with_exxeta(pages_json):
"- MANAGEMENTGEBÜHREN (ggf. mit Staffelung und Bezug auf NAV/GAV)\n" "- MANAGEMENTGEBÜHREN (ggf. mit Staffelung und Bezug auf NAV/GAV)\n"
"- SEKTORENALLOKATION (z. B. BÜRO, LOGISTIK, WOHNEN... inkl. %-Angaben)\n" "- SEKTORENALLOKATION (z. B. BÜRO, LOGISTIK, WOHNEN... inkl. %-Angaben)\n"
"- LÄNDERALLOKATION (z. B. DEUTSCHLAND, FRANKREICH, etc. inkl. %-Angaben)\n\n" "- LÄNDERALLOKATION (z. B. DEUTSCHLAND, FRANKREICH, etc. inkl. %-Angaben)\n\n"
"WICHTIG:\n" "WICHTIG:\n"
"- Gib **nur eine Entität pro Kennzahl** an - keine Listen oder Interpretationen.\n" "- Gib **nur eine Entität pro Kennzahl** an - keine Listen oder Interpretationen.\n"
"- Wenn mehrere Varianten genannt werden (z. B. \"Core und Core+\"), gib sie im Originalformat als **eine entity** an.\n" "- Wenn mehrere Varianten genannt werden (z. B. \"Core und Core+\"), gib sie im Originalformat als **eine entity** an.\n"
"- **Keine Vermutungen oder Ergänzungen**. Wenn keine Information enthalten ist, gib die Kennzahl **nicht aus**.\n" "- **Keine Vermutungen oder Ergänzungen**. Wenn keine Information enthalten ist, gib die Kennzahl **nicht aus**.\n"
"- Extrahiere **nur wörtlich vorkommende Inhalte** (keine Berechnungen, keine Zusammenfassungen).\n" "- Extrahiere **nur wörtlich vorkommende Inhalte** (keine Berechnungen, keine Zusammenfassungen).\n"
"- Jeder gefundene Wert muss einem der obigen Label **eindeutig zuordenbar** sein.\n\n" "- Jeder gefundene Wert muss einem der obigen Label **eindeutig zuordenbar** sein.\n\n"
"FORMAT:\n" "FORMAT:\n"
"Antworte als **reines JSON-Array** mit folgendem Format:\n" "Antworte als **reines JSON-Array** mit folgendem Format:\n"
"[\n" "[\n"
@ -92,7 +99,7 @@ def extract_with_exxeta(pages_json):
" },\n" " },\n"
" ...\n" " ...\n"
"]\n\n" "]\n\n"
f"Falls keine Kennzahl enthalten ist, gib ein leeres Array [] zurück.\n\n" f"Falls keine Kennzahl enthalten ist, gib ein leeres Array [] zurück.\n\n"
f"Nur JSON-Antwort - keine Kommentare, keine Erklärungen, kein Text außerhalb des JSON.\n\n" f"Nur JSON-Antwort - keine Kommentare, keine Erklärungen, kein Text außerhalb des JSON.\n\n"
f"TEXT:\n{text}" f"TEXT:\n{text}"
@ -144,4 +151,6 @@ def extract_with_exxeta(pages_json):
if attempt == MAX_RETRIES: if attempt == MAX_RETRIES:
results.extend([]) results.extend([])
return json.dumps(results, indent=2, ensure_ascii=False)
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95})
return json.dumps(results, indent=2, ensure_ascii=False)

View File

@ -41,6 +41,7 @@ def convert_pdf_async(temp_path, pitchbook_id):
logger.info("Sending payload to EXXETA and SPACY services") logger.info("Sending payload to EXXETA and SPACY services")
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35})
try: try:
exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600) exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600)
logger.info(f"EXXETA response: {exxeta_response.status_code}") logger.info(f"EXXETA response: {exxeta_response.status_code}")
@ -59,9 +60,8 @@ def convert_pdf_async(temp_path, pitchbook_id):
headers = {} headers = {}
try: try:
requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers)
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 50}, timeout=600) requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers)
logger.info("COORDINATOR response: Progress + File updated") logger.info("COORDINATOR response: Progress + File updated")
except Exception as e: except Exception as e:
logger.error(f"Error calling COORDINATOR: {e}") logger.error(f"Error calling COORDINATOR: {e}")

View File

@ -10,7 +10,7 @@ import json
app = Flask(__name__) app = Flask(__name__)
load_dotenv() load_dotenv()
coordinator_url = os.getenv("COORDINATOR_URL", "http://localhost:5000") COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
# todo add persistence layer # todo add persistence layer
data_storage = {} # {id: {spacy_data: [], exxeta_data: []}} data_storage = {} # {id: {spacy_data: [], exxeta_data: []}}
@ -19,7 +19,7 @@ storage_lock = threading.Lock()
def send_to_coordinator_service(processed_data, request_id): def send_to_coordinator_service(processed_data, request_id):
if not coordinator_url: if not COORDINATOR_URL:
print("Not processed, missing url", processed_data) print("Not processed, missing url", processed_data)
return return
@ -28,7 +28,7 @@ def send_to_coordinator_service(processed_data, request_id):
"kpi": json.dumps(processed_data), "kpi": json.dumps(processed_data),
} }
requests.put( requests.put(
coordinator_url + "/api/pitch_book/" + str(request_id), COORDINATOR_URL + "/api/pitch_book/" + str(request_id),
data=payload, data=payload,
) )
print(f"Result PitchBook {request_id} sent to coordinator") print(f"Result PitchBook {request_id} sent to coordinator")
@ -40,6 +40,7 @@ def send_to_coordinator_service(processed_data, request_id):
def process_data_async(request_id, spacy_data, exxeta_data): def process_data_async(request_id, spacy_data, exxeta_data):
try: try:
requests.post(COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95})
print(f"Start asynchronous processing for PitchBook: {request_id}") print(f"Start asynchronous processing for PitchBook: {request_id}")
# Perform merge # Perform merge

View File

@ -47,6 +47,7 @@ services:
environment: environment:
- EXXETA_SERVICE_URL=http://exxeta:5000/extract - EXXETA_SERVICE_URL=http://exxeta:5000/extract
- SPACY_SERVICE_URL=http://spacy:5052/extract - SPACY_SERVICE_URL=http://spacy:5052/extract
- COORDINATOR_URL=http://coordinator:5000
ports: ports:
- 5051:5000 - 5051:5000
@ -66,6 +67,7 @@ services:
- .env - .env
environment: environment:
- VALIDATE_SERVICE_URL=http://validate:5000/validate - VALIDATE_SERVICE_URL=http://validate:5000/validate
- COORDINATOR_URL=http://coordinator:5000
ports: ports:
- 5053:5000 - 5053:5000

View File

@ -42,7 +42,7 @@ export default function KennzahlenTable({
data, data,
pdfId, pdfId,
settings, settings,
from from,
}: KennzahlenTableProps) { }: KennzahlenTableProps) {
const [editingIndex, setEditingIndex] = useState<string>(""); const [editingIndex, setEditingIndex] = useState<string>("");
const [editValue, setEditValue] = useState(""); const [editValue, setEditValue] = useState("");
@ -286,12 +286,17 @@ export default function KennzahlenTable({
</Tooltip> </Tooltip>
) : ( ) : (
<Tooltip <Tooltip
title={hasNoValue ? title={
<> hasNoValue ? (
<b>Problem</b> <>
<br /> <b>Problem</b>
Es wurden keine Kennzahlen gefunden. Bitte ergänzen! <br />
</> : "" Es wurden keine Kennzahlen gefunden. Bitte
ergänzen!
</>
) : (
""
)
} }
placement="bottom" placement="bottom"
arrow arrow
@ -318,7 +323,10 @@ export default function KennzahlenTable({
}} }}
> >
{hasNoValue && ( {hasNoValue && (
<ErrorOutlineIcon fontSize="small" color="error" /> <ErrorOutlineIcon
fontSize="small"
color="error"
/>
)} )}
{editingIndex === row.setting.name ? ( {editingIndex === row.setting.name ? (
<TextField <TextField

View File

@ -1,186 +1,388 @@
import { Box, Paper, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Typography, CircularProgress, Chip } from "@mui/material"; import CheckCircleIcon from "@mui/icons-material/CheckCircle";
import { useSuspenseQuery } from "@tanstack/react-query"; import HourglassEmptyIcon from "@mui/icons-material/HourglassEmpty";
import PictureAsPdfIcon from "@mui/icons-material/PictureAsPdf";
import {
Box,
Chip,
CircularProgress,
LinearProgress,
Paper,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Typography,
} from "@mui/material";
import { useQueryClient, useSuspenseQuery } from "@tanstack/react-query";
import { useNavigate } from "@tanstack/react-router"; import { useNavigate } from "@tanstack/react-router";
import { useCallback, useEffect, useState } from "react";
import { socket } from "../socket";
import { fetchPitchBooksById } from "../util/api";
import { pitchBooksQueryOptions } from "../util/query"; import { pitchBooksQueryOptions } from "../util/query";
import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdf';
import CheckCircleIcon from '@mui/icons-material/CheckCircle';
import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
interface PitchBook { interface PitchBook {
id: number; id: number;
filename: string; filename: string;
created_at: string; created_at: string;
kpi?: string | { kpi?:
[key: string]: { | string
label: string; | {
entity: string; [key: string]: {
page: number; label: string;
status: string; entity: string;
source: string; page: number;
}[]; status: string;
}; source: string;
status?: 'processing' | 'completed'; }[];
};
status?: "processing" | "completed";
} }
export function PitchBooksTable() { export function PitchBooksTable() {
const navigate = useNavigate(); const [loadingPitchBooks, setLoadingPitchBooks] = useState<
const { data: pitchBooks, isLoading } = useSuspenseQuery(pitchBooksQueryOptions()); {
id: number;
progress: number;
filename?: string;
buffer: number;
intervalId?: number;
}[]
>([]);
const navigate = useNavigate();
const { data: pitchBooks, isLoading } = useSuspenseQuery(
pitchBooksQueryOptions(),
);
const handleRowClick = (pitchBookId: number) => { const handleRowClick = (pitchBookId: number) => {
navigate({ navigate({
to: "/extractedResult/$pitchBook", to: "/extractedResult/$pitchBook",
params: { pitchBook: pitchBookId.toString() }, params: { pitchBook: pitchBookId.toString() },
search: { from: "overview" } search: { from: "overview" },
}); });
}; };
const getKPIValue = (pitchBook: PitchBook, fieldName: string): string => { const onConnection = useCallback(() => {
if (!pitchBook.kpi || typeof pitchBook.kpi === 'string') { console.log("connected");
try { }, []);
const parsedKPI = JSON.parse(pitchBook.kpi as string);
// Convert array to object format if needed
const kpiObj = Array.isArray(parsedKPI) ?
parsedKPI.reduce((acc: any, item: any) => {
if (!acc[item.label]) acc[item.label] = [];
acc[item.label].push(item);
return acc;
}, {}) : parsedKPI;
return kpiObj[fieldName]?.[0]?.entity || 'N/A'; const queryClient = useQueryClient();
} catch {
return 'N/A';
}
}
return (pitchBook.kpi as any)[fieldName]?.[0]?.entity || 'N/A'; const onProgress = useCallback(
}; (progress: { id: number; progress: number }) => {
if (progress.progress === 100) {
setLoadingPitchBooks((prev) => {
const intervalId = prev.find(
(item) => item.id === progress.id,
)?.intervalId;
console.log(intervalId, prev);
intervalId && clearInterval(intervalId);
const getStatus = (pitchBook: PitchBook) => { return [...prev.filter((item) => item.id !== progress.id)];
if (pitchBook.kpi && });
((typeof pitchBook.kpi === 'string' && pitchBook.kpi !== '{}') || queryClient.invalidateQueries({
(typeof pitchBook.kpi === 'object' && Object.keys(pitchBook.kpi).length > 0))) { queryKey: pitchBooksQueryOptions().queryKey,
return 'completed'; });
} } else {
return 'processing'; setLoadingPitchBooks((prev) => {
}; const oldItem = prev.find((item) => item.id === progress.id);
let intervalId = oldItem?.intervalId;
if (!oldItem) {
intervalId = setInterval(() => {
setLoadingPitchBooks((prev) => {
const oldItem = prev.find((item) => item.id === progress.id);
if (!oldItem) return prev;
if (isLoading) { return [
return ( ...prev.filter((e) => e.id !== progress.id),
<Box display="flex" justifyContent="center" alignItems="center" height="400px"> {
<CircularProgress sx={{ color: "#383838" }} /> id: progress.id,
</Box> progress: oldItem?.progress ?? progress.progress,
); filename: oldItem?.filename,
} buffer: oldItem ? oldItem.buffer + 0.5 : 0,
intervalId: oldItem.intervalId,
},
];
});
}, 400);
return ( fetchPitchBooksById(progress.id)
<TableContainer .then((res) => {
component={Paper} setLoadingPitchBooks((prev) => [
sx={{ ...prev.filter((item) => item.id !== progress.id),
width: "85%", {
maxWidth: 1200, id: progress.id,
boxShadow: "0 2px 8px rgba(0,0,0,0.1)", progress: progress.progress,
}} filename: res.filename,
> buffer: 0,
<Table> intervalId,
<TableHead> },
<TableRow sx={{ backgroundColor: "#f5f5f5" }}> ]);
<TableCell sx={{ width: "60px" }}></TableCell> })
<TableCell sx={{ fontWeight: "bold" }}>Fondsname</TableCell> .catch((err) => {
<TableCell sx={{ fontWeight: "bold" }}>Fondsmanager</TableCell> console.error(err);
<TableCell sx={{ fontWeight: "bold" }}>Dateiname</TableCell> });
<TableCell sx={{ fontWeight: "bold", width: "120px" }}>Status</TableCell> }
</TableRow> return [
</TableHead> ...prev.filter((item) => item.id !== progress.id),
<TableBody> {
{pitchBooks.map((pitchBook: PitchBook) => { id: progress.id,
const status = getStatus(pitchBook); progress: progress.progress,
const fundName = getKPIValue(pitchBook, 'FONDSNAME') || filename: oldItem?.filename,
getKPIValue(pitchBook, 'FUND_NAME') || buffer: 0,
getKPIValue(pitchBook, 'NAME'); intervalId,
},
];
});
}
},
[queryClient],
);
const manager = getKPIValue(pitchBook, 'FONDSMANAGER') || useEffect(() => {
getKPIValue(pitchBook, 'MANAGER') || socket.on("connect", onConnection);
getKPIValue(pitchBook, 'PORTFOLIO_MANAGER'); socket.on("progress", onProgress);
return () => {
socket.off("connect", onConnection);
socket.off("progress", onProgress);
};
}, [onConnection, onProgress]);
return ( const getKPIValue = (pitchBook: PitchBook, fieldName: string): string => {
<TableRow if (!pitchBook.kpi || typeof pitchBook.kpi === "string") {
key={pitchBook.id} try {
onClick={() => handleRowClick(pitchBook.id)} const parsedKPI = JSON.parse(pitchBook.kpi as string);
sx={{ // Convert array to object format if needed
cursor: "pointer", const kpiObj = Array.isArray(parsedKPI)
"&:hover": { ? parsedKPI.reduce((acc, item) => {
backgroundColor: "#f9f9f9", if (!acc[item.label]) acc[item.label] = [];
}, acc[item.label].push(item);
}} return acc;
> }, {})
<TableCell> : parsedKPI;
<Box
sx={{ return kpiObj[fieldName]?.[0]?.entity || "N/A";
width: 40, } catch {
height: 50, return "N/A";
backgroundColor: "#f0f0f0", }
borderRadius: 1, }
display: "flex",
alignItems: "center", return pitchBook.kpi[fieldName]?.[0]?.entity || "N/A";
justifyContent: "center", };
border: "1px solid #e0e0e0",
}} const getStatus = (pitchBook: PitchBook) => {
> if (
<PictureAsPdfIcon fontSize="small" sx={{ color: "#666" }} /> pitchBook.kpi &&
</Box> ((typeof pitchBook.kpi === "string" && pitchBook.kpi !== "{}") ||
</TableCell> (typeof pitchBook.kpi === "object" &&
<TableCell> Object.keys(pitchBook.kpi).length > 0))
<Typography variant="body2" fontWeight="medium"> ) {
{fundName} return "completed";
</Typography> }
</TableCell> return "processing";
<TableCell>{manager}</TableCell> };
<TableCell>
<Typography variant="body2" color="text.secondary" fontSize="0.875rem"> if (isLoading) {
{pitchBook.filename} return (
</Typography> <Box
</TableCell> display="flex"
<TableCell> justifyContent="center"
{status === 'completed' ? ( alignItems="center"
<Chip height="400px"
icon={<CheckCircleIcon />} >
label="Abgeschlossen" <CircularProgress sx={{ color: "#383838" }} />
size="small" </Box>
sx={{ );
backgroundColor: "#e8f5e9", }
color: "#2e7d32",
"& .MuiChip-icon": { return (
color: "#2e7d32", <TableContainer
}, component={Paper}
}} sx={{
/> width: "85%",
) : ( maxWidth: 1200,
<Chip boxShadow: "0 2px 8px rgba(0,0,0,0.1)",
icon={<HourglassEmptyIcon />} }}
label="In Bearbeitung" >
size="small" <Table>
sx={{ <TableHead>
backgroundColor: "#fff3e0", <TableRow sx={{ backgroundColor: "#f5f5f5" }}>
color: "#e65100", <TableCell sx={{ width: "60px" }} />
"& .MuiChip-icon": { <TableCell sx={{ fontWeight: "bold" }}>Fondsname</TableCell>
color: "#e65100", <TableCell sx={{ fontWeight: "bold" }}>Fondsmanager</TableCell>
}, <TableCell sx={{ fontWeight: "bold" }}>Dateiname</TableCell>
}} <TableCell sx={{ fontWeight: "bold", width: "120px" }}>
/> Status
)} </TableCell>
</TableCell> </TableRow>
</TableRow> </TableHead>
); <TableBody>
})} {pitchBooks
</TableBody> .filter(
</Table> (pitchbook: PitchBook) =>
{pitchBooks.length === 0 && ( !loadingPitchBooks.some((e) => e.id === pitchbook.id),
<Box p={4} textAlign="center"> )
<Typography color="text.secondary"> .sort(
Keine Pitch Books vorhanden (a: PitchBook, b: PitchBook) =>
</Typography> new Date(a.created_at).getTime() -
</Box> new Date(b.created_at).getTime(),
)} )
</TableContainer> .map((pitchBook: PitchBook) => {
); const status = getStatus(pitchBook);
} const fundName =
getKPIValue(pitchBook, "FONDSNAME") ||
getKPIValue(pitchBook, "FUND_NAME") ||
getKPIValue(pitchBook, "NAME");
const manager =
getKPIValue(pitchBook, "FONDSMANAGER") ||
getKPIValue(pitchBook, "MANAGER") ||
getKPIValue(pitchBook, "PORTFOLIO_MANAGER");
return (
<TableRow
key={pitchBook.id}
onClick={() => handleRowClick(pitchBook.id)}
sx={{
cursor: "pointer",
"&:hover": {
backgroundColor: "#f9f9f9",
},
}}
>
<TableCell>
<Box
sx={{
width: 40,
height: 50,
backgroundColor: "#f0f0f0",
borderRadius: 1,
display: "flex",
alignItems: "center",
justifyContent: "center",
border: "1px solid #e0e0e0",
}}
>
<PictureAsPdfIcon
fontSize="small"
sx={{ color: "#666" }}
/>
</Box>
</TableCell>
<TableCell>
<Typography variant="body2" fontWeight="medium">
{fundName}
</Typography>
</TableCell>
<TableCell>{manager}</TableCell>
<TableCell>
<Typography
variant="body2"
color="text.secondary"
fontSize="0.875rem"
>
{pitchBook.filename}
</Typography>
</TableCell>
<TableCell>
{status === "completed" ? (
<Chip
icon={<CheckCircleIcon />}
label="Abgeschlossen"
size="small"
sx={{
backgroundColor: "#e8f5e9",
color: "#2e7d32",
"& .MuiChip-icon": {
color: "#2e7d32",
},
}}
/>
) : (
<Chip
icon={<HourglassEmptyIcon />}
label="In Bearbeitung"
size="small"
sx={{
backgroundColor: "#fff3e0",
color: "#e65100",
"& .MuiChip-icon": {
color: "#e65100",
},
}}
/>
)}
</TableCell>
</TableRow>
);
})}
{loadingPitchBooks
.sort((a, b) => a.id - b.id)
.map((pitchBook) => (
<TableRow key={pitchBook.id}>
<TableCell>
<Box
sx={{
width: 40,
height: 50,
backgroundColor: "#f0f0f0",
borderRadius: 1,
display: "flex",
alignItems: "center",
justifyContent: "center",
border: "1px solid #e0e0e0",
}}
>
<PictureAsPdfIcon fontSize="small" sx={{ color: "#666" }} />
</Box>
</TableCell>
<TableCell colSpan={2}>
<LinearProgress
variant="buffer"
value={pitchBook.progress}
valueBuffer={
pitchBook.buffer
? pitchBook.progress + pitchBook.buffer
: pitchBook.progress
}
/>
</TableCell>
<TableCell>
{" "}
<Typography
variant="body2"
color="text.secondary"
fontSize="0.875rem"
>
{pitchBook.filename}
</Typography>
</TableCell>
<TableCell>
<Chip
icon={<HourglassEmptyIcon />}
label="In Bearbeitung"
size="small"
sx={{
backgroundColor: "#fff3e0",
color: "#e65100",
"& .MuiChip-icon": {
color: "#e65100",
},
}}
/>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
{pitchBooks.length === 0 && (
<Box p={4} textAlign="center">
<Typography color="text.secondary">
Keine Pitch Books vorhanden
</Typography>
</Box>
)}
</TableContainer>
);
}

View File

@ -1,13 +1,11 @@
import SettingsIcon from "@mui/icons-material/Settings"; import SettingsIcon from "@mui/icons-material/Settings";
import { Backdrop, Box, Button, IconButton, Paper } from "@mui/material"; import { Backdrop, Box, Button, IconButton, Paper } from "@mui/material";
import { useNavigate } from "@tanstack/react-router"; import { useNavigate, useRouter } from "@tanstack/react-router";
import { useCallback, useEffect, useState } from "react"; import { useCallback, useEffect, useState } from "react";
import FileUpload from "react-material-file-upload"; import FileUpload from "react-material-file-upload";
import { socket } from "../socket"; import { socket } from "../socket";
import { CircularProgressWithLabel } from "./CircularProgressWithLabel";
import { API_HOST } from "../util/api"; import { API_HOST } from "../util/api";
import { CircularProgressWithLabel } from "./CircularProgressWithLabel";
const PROGRESS = true;
export default function UploadPage() { export default function UploadPage() {
const [files, setFiles] = useState<File[]>([]); const [files, setFiles] = useState<File[]>([]);
@ -15,6 +13,7 @@ export default function UploadPage() {
const [loadingState, setLoadingState] = useState<number | null>(null); const [loadingState, setLoadingState] = useState<number | null>(null);
const fileTypes = ["pdf"]; const fileTypes = ["pdf"];
const navigate = useNavigate(); const navigate = useNavigate();
const router = useRouter();
const uploadFile = useCallback(async () => { const uploadFile = useCallback(async () => {
const formData = new FormData(); const formData = new FormData();
@ -28,17 +27,11 @@ export default function UploadPage() {
console.log("File uploaded successfully"); console.log("File uploaded successfully");
const data = await response.json(); const data = await response.json();
setPageId(data.id.toString()); setPageId(data.id.toString());
setLoadingState(0); setLoadingState(5);
!PROGRESS &&
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook: data.id.toString() },
});
} else { } else {
console.error("Failed to upload file"); console.error("Failed to upload file");
} }
}, [files, navigate]); }, [files]);
const onConnection = useCallback(() => { const onConnection = useCallback(() => {
console.log("connected"); console.log("connected");
@ -80,18 +73,16 @@ export default function UploadPage() {
return ( return (
<> <>
{PROGRESS && ( <Backdrop
<Backdrop sx={(theme) => ({ color: "#fff", zIndex: theme.zIndex.drawer + 1 })}
sx={(theme) => ({ color: "#fff", zIndex: theme.zIndex.drawer + 1 })} open={pageId !== null && loadingState !== null && loadingState < 100}
open={pageId !== null && loadingState !== null && loadingState < 100} >
> <CircularProgressWithLabel
<CircularProgressWithLabel color="inherit"
color="inherit" value={loadingState || 0}
value={loadingState || 0} size={60}
size={60} />
/> </Backdrop>
</Backdrop>
)}
<Box <Box
display="flex" display="flex"
flexDirection="column" flexDirection="column"
@ -188,6 +179,7 @@ export default function UploadPage() {
backgroundColor: "#383838", backgroundColor: "#383838",
"&:hover": { backgroundColor: "#2e2e2e" }, "&:hover": { backgroundColor: "#2e2e2e" },
}} }}
onMouseEnter={() => router.preloadRoute({ to: "/pitchbooks" })}
onClick={() => navigate({ to: "/pitchbooks" })} onClick={() => navigate({ to: "/pitchbooks" })}
> >
Alle Pitch Books anzeigen Alle Pitch Books anzeigen
@ -195,4 +187,4 @@ export default function UploadPage() {
</Box> </Box>
</> </>
); );
} }

View File

@ -1,6 +1,6 @@
import type { Kennzahl } from "@/types/kpi"; import type { Kennzahl } from "@/types/kpi";
const API_HOST = import.meta.env.VITE_API_HOST || 'http://localhost:5050'; const API_HOST = import.meta.env.VITE_API_HOST || "http://localhost:5050";
export { API_HOST }; export { API_HOST };
@ -15,9 +15,7 @@ export const fetchKPI = async (
source: string; source: string;
}[]; }[];
}> => { }> => {
const response = await fetch( const response = await fetch(`${API_HOST}/api/pitch_book/${pitchBookId}`);
`${API_HOST}/api/pitch_book/${pitchBookId}`,
);
const data = await response.json(); const data = await response.json();
return data.kpi ? getKPI(data.kpi) : {}; return data.kpi ? getKPI(data.kpi) : {};
@ -46,13 +44,10 @@ export const fetchPutKPI = async (
const formData = new FormData(); const formData = new FormData();
formData.append("kpi", JSON.stringify(flattenKPIArray(kpi))); formData.append("kpi", JSON.stringify(flattenKPIArray(kpi)));
const response = await fetch( const response = await fetch(`${API_HOST}/api/pitch_book/${pitchBookId}`, {
`${API_HOST}/api/pitch_book/${pitchBookId}`, method: "PUT",
{ body: formData,
method: "PUT", });
body: formData,
},
);
if (!response.ok) { if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`); throw new Error(`HTTP error! status: ${response.status}`);
} }
@ -119,3 +114,11 @@ export async function fetchPitchBooks() {
} }
return response.json(); return response.json();
} }
export async function fetchPitchBooksById(id: number) {
const response = await fetch(`${API_HOST}/api/pitch_book/${id}`);
if (!response.ok) {
throw new Error("Failed to fetch pitch books");
}
return response.json();
}