Compare commits

..

No commits in common. "e3149e0aa4469917b2e027e4db469cdeeeec15df" and "f20516535042d6094d02629792634db46c82aa90" have entirely different histories.

19 changed files with 387 additions and 490 deletions

View File

@ -5,6 +5,7 @@ from dotenv import load_dotenv
from controller import register_routes from controller import register_routes
from model.database import init_db from model.database import init_db
from controller.socketIO import socketio from controller.socketIO import socketio
from controller.kennzahlen import kennzahlen_bp
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
@ -21,6 +22,9 @@ init_db(app)
register_routes(app) register_routes(app)
# Register blueprints
app.register_blueprint(kennzahlen_bp)
@app.route("/health") @app.route("/health")
def health_check(): def health_check():
return "OK" return "OK"

View File

@ -0,0 +1,61 @@
from flask import Blueprint, jsonify, request
from model.kennzahl import Kennzahl
from model.database import db
kennzahlen_bp = Blueprint('kennzahlen', __name__)
# Beispieldaten
EXAMPLE_DATA = [
{"pdf_id": "example", "label": "Fondsname", "value": "Fund Real Estate Prime Europe", "page": 1, "status": "ok"},
{"pdf_id": "example", "label": "Fondsmanager", "value": "", "page": 1, "status": "error"},
{"pdf_id": "example", "label": "Risikoprofil", "value": "Core/Core+", "page": 10, "status": "warning"},
{"pdf_id": "example", "label": "LTV", "value": "30-35 %", "page": 8, "status": "ok"},
{"pdf_id": "example", "label": "Ausschüttungsrendite", "value": "4%", "page": 34, "status": "ok"}
]
@kennzahlen_bp.route('/api/kennzahlen/init', methods=['POST'])
def init_kennzahlen():
try:
# Lösche existierende Beispieldaten
Kennzahl.query.filter_by(pdf_id='example').delete()
# Füge Beispieldaten ein
for data in EXAMPLE_DATA:
kennzahl = Kennzahl(
pdf_id=data['pdf_id'],
label=data['label'],
value=data['value'],
page=data['page'],
status=data['status']
)
db.session.add(kennzahl)
db.session.commit()
return jsonify({"message": "Kennzahlen erfolgreich initialisiert"})
except Exception as e:
db.session.rollback()
return jsonify({"error": str(e)}), 500
@kennzahlen_bp.route('/api/kennzahlen', methods=['GET'])
def get_kennzahlen():
pdf_id = request.args.get('pdf_id', 'example') # Default zu 'example' für Beispieldaten
kennzahlen = Kennzahl.query.filter_by(pdf_id=pdf_id).all()
return jsonify([k.to_dict() for k in kennzahlen])
@kennzahlen_bp.route('/api/kennzahlen/<label>', methods=['PUT'])
def update_kennzahl(label):
data = request.get_json()
pdf_id = request.args.get('pdf_id', 'example') # Default zu 'example' für Beispieldaten
kennzahl = Kennzahl.query.filter_by(pdf_id=pdf_id, label=label).first()
if not kennzahl:
return jsonify({'error': 'Kennzahl nicht gefunden'}), 404
kennzahl.value = data.get('value', kennzahl.value)
db.session.commit()
return jsonify(kennzahl.to_dict())

View File

@ -35,7 +35,7 @@ def create_kpi_setting():
"translation", "translation",
"example", "example",
"position", "position",
"active", "active"
] ]
for field in required_fields: for field in required_fields:
if field not in data: if field not in data:
@ -61,7 +61,7 @@ def create_kpi_setting():
translation=data["translation"], translation=data["translation"],
example=data["example"], example=data["example"],
position=data["position"], position=data["position"],
active=data["active"], active=data["active"]
) )
db.session.add(new_kpi_setting) db.session.add(new_kpi_setting)
@ -136,12 +136,7 @@ def update_kpi_positions():
try: try:
for update_item in data: for update_item in data:
if "id" not in update_item or "position" not in update_item: if "id" not in update_item or "position" not in update_item:
return ( return jsonify({"error": "Each item must have 'id' and 'position' fields"}), 400
jsonify(
{"error": "Each item must have 'id' and 'position' fields"}
),
400,
)
kpi_setting = KPISettingModel.query.get_or_404(update_item["id"]) kpi_setting = KPISettingModel.query.get_or_404(update_item["id"])
kpi_setting.position = update_item["position"] kpi_setting.position = update_item["position"]
@ -153,4 +148,4 @@ def update_kpi_positions():
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
return jsonify({"error": f"Failed to update positions: {str(e)}"}), 500 return jsonify({"error": f"Failed to update positions: {str(e)}"}), 500

View File

@ -17,6 +17,48 @@ OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:5051")
progress_per_id = {} # {id: {kpi: 0, pdf: 0}} progress_per_id = {} # {id: {kpi: 0, pdf: 0}}
storage_lock = threading.Lock() storage_lock = threading.Lock()
def process_pdf_async(app, file_id, file_data, filename):
with app.app_context():
try:
file_obj = BytesIO(file_data)
file_obj.name = filename
files = {"file": (filename, file_obj, "application/pdf")}
data = {"id": file_id}
response = requests.post(
f"{OCR_SERVICE_URL}/ocr", files=files, data=data, timeout=600
)
if response.status_code == 200:
response_data = response.json()
if "ocr_pdf" in response_data:
import base64
ocr_pdf_data = base64.b64decode(response_data["ocr_pdf"])
file_record = PitchBookModel.query.get(file_id)
if file_record:
file_record.file = ocr_pdf_data
db.session.commit()
print("[DEBUG] PDF updated in database:")
print("[DEBUG] - Successfully saved to database")
socketio.emit("progress", {"id": file_id, "progress": 50})
else:
socketio.emit(
"error", {"id": file_id, "message": "OCR processing failed"}
)
except Exception as e:
import traceback
traceback.print_exc()
socketio.emit(
"error", {"id": file_id, "message": f"Processing failed: {str(e)}"}
)
@pitch_book_controller.route("/", methods=["POST"]) @pitch_book_controller.route("/", methods=["POST"])
def upload_file(): def upload_file():
@ -46,7 +88,6 @@ def upload_file():
files = {"file": (uploaded_file.filename, file_data, "application/pdf")} files = {"file": (uploaded_file.filename, file_data, "application/pdf")}
data = {"id": new_file.id} data = {"id": new_file.id}
socketio.emit("progress", {"id": new_file.id, "progress": 5})
response = requests.post( response = requests.post(
f"{OCR_SERVICE_URL}/ocr", files=files, data=data, timeout=600 f"{OCR_SERVICE_URL}/ocr", files=files, data=data, timeout=600
) )

View File

@ -19,6 +19,6 @@ def progress():
): ):
return jsonify({"error": "Invalid progress value"}), 400 return jsonify({"error": "Invalid progress value"}), 400
socketio.emit("progress", {"id": int(data["id"]), "progress": data["progress"]}) socketio.emit("progress", {"id": data["id"], "progress": data["progress"]})
# Process the data and return a response # Process the data and return a response
return jsonify({"message": "Progress updated"}) return jsonify({"message": "Progress updated"})

View File

@ -14,5 +14,4 @@ def init_db(app):
with app.app_context(): with app.app_context():
db.create_all() db.create_all()
from model.seed_data import seed_default_kpi_settings from model.seed_data import seed_default_kpi_settings
seed_default_kpi_settings() seed_default_kpi_settings()

View File

@ -0,0 +1,26 @@
from .database import db
class Kennzahl(db.Model):
__tablename__ = 'kennzahlen'
id = db.Column(db.Integer, primary_key=True)
pdf_id = db.Column(db.String(100), nullable=False) # ID des PDFs
label = db.Column(db.String(100), nullable=False)
value = db.Column(db.String(100))
page = db.Column(db.Integer)
status = db.Column(db.String(20))
# Zusammengesetzter Unique-Constraint für pdf_id und label
__table_args__ = (
db.UniqueConstraint('pdf_id', 'label', name='unique_pdf_kennzahl'),
)
def to_dict(self):
return {
'pdf_id': self.pdf_id,
'label': self.label,
'value': self.value,
'page': self.page,
'status': self.status
}

View File

@ -38,12 +38,10 @@ class KPISettingModel(db.Model):
"translation": self.translation, "translation": self.translation,
"example": self.example, "example": self.example,
"position": self.position, "position": self.position,
"active": self.active, "active": self.active
} }
def __init__( def __init__(self, name, description, mandatory, type, translation, example, position, active):
self, name, description, mandatory, type, translation, example, position, active
):
self.name = name self.name = name
self.description = description self.description = description
self.mandatory = mandatory self.mandatory = mandatory

View File

@ -1,7 +1,6 @@
from model.database import db from model.database import db
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import LargeBinary from sqlalchemy import LargeBinary
from datetime import datetime
class PitchBookModel(db.Model): class PitchBookModel(db.Model):
@ -9,15 +8,9 @@ class PitchBookModel(db.Model):
filename: Mapped[str] = mapped_column() filename: Mapped[str] = mapped_column()
file: Mapped[bytes] = mapped_column(LargeBinary) file: Mapped[bytes] = mapped_column(LargeBinary)
kpi: Mapped[str | None] kpi: Mapped[str | None]
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
def to_dict(self): def to_dict(self):
return { return {"id": self.id, "filename": self.filename, "kpi": self.kpi}
"id": self.id,
"filename": self.filename,
"kpi": self.kpi,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
def __init__(self, filename, file): def __init__(self, filename, file):
self.filename = filename self.filename = filename

View File

@ -1,7 +1,6 @@
from model.database import db from model.database import db
from model.kpi_setting_model import KPISettingModel, KPISettingType from model.kpi_setting_model import KPISettingModel, KPISettingType
def seed_default_kpi_settings(): def seed_default_kpi_settings():
if KPISettingModel.query.first() is not None: if KPISettingModel.query.first() is not None:
print("KPI Settings bereits vorhanden, Seeding übersprungen") print("KPI Settings bereits vorhanden, Seeding übersprungen")
@ -16,7 +15,7 @@ def seed_default_kpi_settings():
"translation": "Fund Name", "translation": "Fund Name",
"example": "Alpha Real Estate Fund I", "example": "Alpha Real Estate Fund I",
"position": 1, "position": 1,
"active": True, "active": True
}, },
{ {
"name": "Fondsmanager", "name": "Fondsmanager",
@ -26,7 +25,7 @@ def seed_default_kpi_settings():
"translation": "Fund Manager", "translation": "Fund Manager",
"example": "Max Mustermann", "example": "Max Mustermann",
"position": 2, "position": 2,
"active": True, "active": True
}, },
{ {
"name": "AIFM", "name": "AIFM",
@ -36,7 +35,7 @@ def seed_default_kpi_settings():
"translation": "AIFM", "translation": "AIFM",
"example": "Alpha Investment Management GmbH", "example": "Alpha Investment Management GmbH",
"position": 3, "position": 3,
"active": True, "active": True
}, },
{ {
"name": "Datum", "name": "Datum",
@ -46,7 +45,7 @@ def seed_default_kpi_settings():
"translation": "Date", "translation": "Date",
"example": "05.05.2025", "example": "05.05.2025",
"position": 4, "position": 4,
"active": True, "active": True
}, },
{ {
"name": "Risikoprofil", "name": "Risikoprofil",
@ -56,7 +55,7 @@ def seed_default_kpi_settings():
"translation": "Risk Profile", "translation": "Risk Profile",
"example": "Core/Core++", "example": "Core/Core++",
"position": 5, "position": 5,
"active": True, "active": True
}, },
{ {
"name": "Artikel", "name": "Artikel",
@ -66,7 +65,7 @@ def seed_default_kpi_settings():
"translation": "Article", "translation": "Article",
"example": "Artikel 8", "example": "Artikel 8",
"position": 6, "position": 6,
"active": True, "active": True
}, },
{ {
"name": "Zielrendite", "name": "Zielrendite",
@ -76,7 +75,7 @@ def seed_default_kpi_settings():
"translation": "Target Return", "translation": "Target Return",
"example": "6.5", "example": "6.5",
"position": 7, "position": 7,
"active": True, "active": True
}, },
{ {
"name": "Rendite", "name": "Rendite",
@ -86,7 +85,7 @@ def seed_default_kpi_settings():
"translation": "Return", "translation": "Return",
"example": "5.8", "example": "5.8",
"position": 8, "position": 8,
"active": True, "active": True
}, },
{ {
"name": "Zielausschüttung", "name": "Zielausschüttung",
@ -96,7 +95,7 @@ def seed_default_kpi_settings():
"translation": "Target Distribution", "translation": "Target Distribution",
"example": "4.0", "example": "4.0",
"position": 9, "position": 9,
"active": True, "active": True
}, },
{ {
"name": "Ausschüttung", "name": "Ausschüttung",
@ -106,7 +105,7 @@ def seed_default_kpi_settings():
"translation": "Distribution", "translation": "Distribution",
"example": "3.8", "example": "3.8",
"position": 10, "position": 10,
"active": True, "active": True
}, },
{ {
"name": "Laufzeit", "name": "Laufzeit",
@ -116,7 +115,7 @@ def seed_default_kpi_settings():
"translation": "Duration", "translation": "Duration",
"example": "7 Jahre, 10, Evergreen", "example": "7 Jahre, 10, Evergreen",
"position": 11, "position": 11,
"active": True, "active": True
}, },
{ {
"name": "LTV", "name": "LTV",
@ -126,7 +125,7 @@ def seed_default_kpi_settings():
"translation": "LTV", "translation": "LTV",
"example": "65.0", "example": "65.0",
"position": 12, "position": 12,
"active": True, "active": True
}, },
{ {
"name": "Managementgebühren", "name": "Managementgebühren",
@ -136,7 +135,7 @@ def seed_default_kpi_settings():
"translation": "Management Fees", "translation": "Management Fees",
"example": "1.5", "example": "1.5",
"position": 13, "position": 13,
"active": True, "active": True
}, },
{ {
"name": "Sektorenallokation", "name": "Sektorenallokation",
@ -146,7 +145,7 @@ def seed_default_kpi_settings():
"translation": "Sector Allocation", "translation": "Sector Allocation",
"example": "Büro, Wohnen, Logistik, Studentenwohnen", "example": "Büro, Wohnen, Logistik, Studentenwohnen",
"position": 14, "position": 14,
"active": True, "active": True
}, },
{ {
"name": "Länderallokation", "name": "Länderallokation",
@ -156,8 +155,8 @@ def seed_default_kpi_settings():
"translation": "Country Allocation", "translation": "Country Allocation",
"example": "Deutschland,Frankreich, Österreich, Schweiz", "example": "Deutschland,Frankreich, Österreich, Schweiz",
"position": 15, "position": 15,
"active": True, "active": True
}, }
] ]
print("Füge Standard KPI Settings hinzu...") print("Füge Standard KPI Settings hinzu...")
@ -171,17 +170,15 @@ def seed_default_kpi_settings():
translation=kpi_data["translation"], translation=kpi_data["translation"],
example=kpi_data["example"], example=kpi_data["example"],
position=kpi_data["position"], position=kpi_data["position"],
active=kpi_data["active"], active=kpi_data["active"]
) )
db.session.add(kpi_setting) db.session.add(kpi_setting)
try: try:
db.session.commit() db.session.commit()
print( print(f"Erfolgreich {len(default_kpi_settings)} Standard KPI Settings hinzugefügt")
f"Erfolgreich {len(default_kpi_settings)} Standard KPI Settings hinzugefügt"
)
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
print(f"Fehler beim Hinzufügen der Standard KPI Settings: {e}") print(f"Fehler beim Hinzufügen der Standard KPI Settings: {e}")
raise raise

View File

@ -15,7 +15,7 @@ def extract_text_from_ocr_json():
pitchbook_id = json_data["id"] pitchbook_id = json_data["id"]
pages_data = json_data["extracted_text_per_page"] pages_data = json_data["extracted_text_per_page"]
entities_json = extract_with_exxeta(pages_data, pitchbook_id) entities_json = extract_with_exxeta(pages_data)
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json
validate_payload = { validate_payload = {
@ -39,4 +39,4 @@ def extract_text_from_ocr_json():
if __name__ == "__main__": if __name__ == "__main__":
app.run(host="0.0.0.0", port=5053, debug=True) app.run(host="0.0.0.0", port=5053, debug=True)

View File

@ -9,7 +9,6 @@ MODEL = "gpt-4o-mini"
EXXETA_BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai" EXXETA_BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai"
load_dotenv() load_dotenv()
EXXETA_API_KEY = os.getenv("API_KEY") EXXETA_API_KEY = os.getenv("API_KEY")
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5050")
MAX_RETRIES = 3 MAX_RETRIES = 3
TIMEOUT = 180 TIMEOUT = 180
@ -17,20 +16,14 @@ TIMEOUT = 180
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def extract_with_exxeta(pages_json, pitchbook_id): def extract_with_exxeta(pages_json):
results = [] results = []
if not EXXETA_API_KEY: if not EXXETA_API_KEY:
logger.warning("EXXETA_API_KEY nicht gesetzt. Rückgabe eines leeren JSON.") logger.warning("EXXETA_API_KEY nicht gesetzt. Rückgabe eines leeren JSON.")
return json.dumps(results, indent=2, ensure_ascii=False) return json.dumps(results, indent=2, ensure_ascii=False)
i = 0
for page_data in pages_json: for page_data in pages_json:
i += 1
if i % 8 == 0:
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35 + 60/len(pages_json)*i})
page_num = page_data.get("page") page_num = page_data.get("page")
page_data.get("page") page_data.get("page")
text = page_data.get("text", "") text = page_data.get("text", "")
@ -64,7 +57,7 @@ def extract_with_exxeta(pages_json, pitchbook_id):
prompt = ( prompt = (
"Bitte extrahiere relevante Fondskennzahlen aus dem folgenden Pitchbook-Text. " "Bitte extrahiere relevante Fondskennzahlen aus dem folgenden Pitchbook-Text. "
"Analysiere den Text sorgfältig, um **nur exakt benannte und relevante Werte** zu extrahieren.\n\n" "Analysiere den Text sorgfältig, um **nur exakt benannte und relevante Werte** zu extrahieren.\n\n"
"ZU EXTRAHIERENDE KENNZAHLEN (immer exakt wie unten angegeben):\n" "ZU EXTRAHIERENDE KENNZAHLEN (immer exakt wie unten angegeben):\n"
"- FONDSNAME\n" "- FONDSNAME\n"
"- FONDSMANAGER\n" "- FONDSMANAGER\n"
@ -81,14 +74,14 @@ def extract_with_exxeta(pages_json, pitchbook_id):
"- MANAGEMENTGEBÜHREN (ggf. mit Staffelung und Bezug auf NAV/GAV)\n" "- MANAGEMENTGEBÜHREN (ggf. mit Staffelung und Bezug auf NAV/GAV)\n"
"- SEKTORENALLOKATION (z. B. BÜRO, LOGISTIK, WOHNEN... inkl. %-Angaben)\n" "- SEKTORENALLOKATION (z. B. BÜRO, LOGISTIK, WOHNEN... inkl. %-Angaben)\n"
"- LÄNDERALLOKATION (z. B. DEUTSCHLAND, FRANKREICH, etc. inkl. %-Angaben)\n\n" "- LÄNDERALLOKATION (z. B. DEUTSCHLAND, FRANKREICH, etc. inkl. %-Angaben)\n\n"
"WICHTIG:\n" "WICHTIG:\n"
"- Gib **nur eine Entität pro Kennzahl** an - keine Listen oder Interpretationen.\n" "- Gib **nur eine Entität pro Kennzahl** an - keine Listen oder Interpretationen.\n"
"- Wenn mehrere Varianten genannt werden (z. B. \"Core und Core+\"), gib sie im Originalformat als **eine entity** an.\n" "- Wenn mehrere Varianten genannt werden (z. B. \"Core und Core+\"), gib sie im Originalformat als **eine entity** an.\n"
"- **Keine Vermutungen oder Ergänzungen**. Wenn keine Information enthalten ist, gib die Kennzahl **nicht aus**.\n" "- **Keine Vermutungen oder Ergänzungen**. Wenn keine Information enthalten ist, gib die Kennzahl **nicht aus**.\n"
"- Extrahiere **nur wörtlich vorkommende Inhalte** (keine Berechnungen, keine Zusammenfassungen).\n" "- Extrahiere **nur wörtlich vorkommende Inhalte** (keine Berechnungen, keine Zusammenfassungen).\n"
"- Jeder gefundene Wert muss einem der obigen Label **eindeutig zuordenbar** sein.\n\n" "- Jeder gefundene Wert muss einem der obigen Label **eindeutig zuordenbar** sein.\n\n"
"FORMAT:\n" "FORMAT:\n"
"Antworte als **reines JSON-Array** mit folgendem Format:\n" "Antworte als **reines JSON-Array** mit folgendem Format:\n"
"[\n" "[\n"
@ -99,7 +92,7 @@ def extract_with_exxeta(pages_json, pitchbook_id):
" },\n" " },\n"
" ...\n" " ...\n"
"]\n\n" "]\n\n"
f"Falls keine Kennzahl enthalten ist, gib ein leeres Array [] zurück.\n\n" f"Falls keine Kennzahl enthalten ist, gib ein leeres Array [] zurück.\n\n"
f"Nur JSON-Antwort - keine Kommentare, keine Erklärungen, kein Text außerhalb des JSON.\n\n" f"Nur JSON-Antwort - keine Kommentare, keine Erklärungen, kein Text außerhalb des JSON.\n\n"
f"TEXT:\n{text}" f"TEXT:\n{text}"
@ -151,6 +144,4 @@ def extract_with_exxeta(pages_json, pitchbook_id):
if attempt == MAX_RETRIES: if attempt == MAX_RETRIES:
results.extend([]) results.extend([])
return json.dumps(results, indent=2, ensure_ascii=False)
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95})
return json.dumps(results, indent=2, ensure_ascii=False)

View File

@ -41,7 +41,6 @@ def convert_pdf_async(temp_path, pitchbook_id):
logger.info("Sending payload to EXXETA and SPACY services") logger.info("Sending payload to EXXETA and SPACY services")
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35})
try: try:
exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600) exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600)
logger.info(f"EXXETA response: {exxeta_response.status_code}") logger.info(f"EXXETA response: {exxeta_response.status_code}")
@ -60,8 +59,9 @@ def convert_pdf_async(temp_path, pitchbook_id):
headers = {} headers = {}
try: try:
requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers) requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers)
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 50}, timeout=600)
logger.info("COORDINATOR response: Progress + File updated") logger.info("COORDINATOR response: Progress + File updated")
except Exception as e: except Exception as e:
logger.error(f"Error calling COORDINATOR: {e}") logger.error(f"Error calling COORDINATOR: {e}")

View File

@ -10,7 +10,7 @@ import json
app = Flask(__name__) app = Flask(__name__)
load_dotenv() load_dotenv()
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000") coordinator_url = os.getenv("COORDINATOR_URL", "http://localhost:5000")
# todo add persistence layer # todo add persistence layer
data_storage = {} # {id: {spacy_data: [], exxeta_data: []}} data_storage = {} # {id: {spacy_data: [], exxeta_data: []}}
@ -19,7 +19,7 @@ storage_lock = threading.Lock()
def send_to_coordinator_service(processed_data, request_id): def send_to_coordinator_service(processed_data, request_id):
if not COORDINATOR_URL: if not coordinator_url:
print("Not processed, missing url", processed_data) print("Not processed, missing url", processed_data)
return return
@ -28,7 +28,7 @@ def send_to_coordinator_service(processed_data, request_id):
"kpi": json.dumps(processed_data), "kpi": json.dumps(processed_data),
} }
requests.put( requests.put(
COORDINATOR_URL + "/api/pitch_book/" + str(request_id), coordinator_url + "/api/pitch_book/" + str(request_id),
data=payload, data=payload,
) )
print(f"Result PitchBook {request_id} sent to coordinator") print(f"Result PitchBook {request_id} sent to coordinator")
@ -40,7 +40,6 @@ def send_to_coordinator_service(processed_data, request_id):
def process_data_async(request_id, spacy_data, exxeta_data): def process_data_async(request_id, spacy_data, exxeta_data):
try: try:
requests.post(COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95})
print(f"Start asynchronous processing for PitchBook: {request_id}") print(f"Start asynchronous processing for PitchBook: {request_id}")
# Perform merge # Perform merge

View File

@ -47,7 +47,6 @@ services:
environment: environment:
- EXXETA_SERVICE_URL=http://exxeta:5000/extract - EXXETA_SERVICE_URL=http://exxeta:5000/extract
- SPACY_SERVICE_URL=http://spacy:5052/extract - SPACY_SERVICE_URL=http://spacy:5052/extract
- COORDINATOR_URL=http://coordinator:5000
ports: ports:
- 5051:5000 - 5051:5000
@ -67,7 +66,6 @@ services:
- .env - .env
environment: environment:
- VALIDATE_SERVICE_URL=http://validate:5000/validate - VALIDATE_SERVICE_URL=http://validate:5000/validate
- COORDINATOR_URL=http://coordinator:5000
ports: ports:
- 5053:5000 - 5053:5000

View File

@ -42,7 +42,7 @@ export default function KennzahlenTable({
data, data,
pdfId, pdfId,
settings, settings,
from, from
}: KennzahlenTableProps) { }: KennzahlenTableProps) {
const [editingIndex, setEditingIndex] = useState<string>(""); const [editingIndex, setEditingIndex] = useState<string>("");
const [editValue, setEditValue] = useState(""); const [editValue, setEditValue] = useState("");
@ -286,17 +286,12 @@ export default function KennzahlenTable({
</Tooltip> </Tooltip>
) : ( ) : (
<Tooltip <Tooltip
title={ title={hasNoValue ?
hasNoValue ? ( <>
<> <b>Problem</b>
<b>Problem</b> <br />
<br /> Es wurden keine Kennzahlen gefunden. Bitte ergänzen!
Es wurden keine Kennzahlen gefunden. Bitte </> : ""
ergänzen!
</>
) : (
""
)
} }
placement="bottom" placement="bottom"
arrow arrow
@ -323,10 +318,7 @@ export default function KennzahlenTable({
}} }}
> >
{hasNoValue && ( {hasNoValue && (
<ErrorOutlineIcon <ErrorOutlineIcon fontSize="small" color="error" />
fontSize="small"
color="error"
/>
)} )}
{editingIndex === row.setting.name ? ( {editingIndex === row.setting.name ? (
<TextField <TextField

View File

@ -1,388 +1,186 @@
import CheckCircleIcon from "@mui/icons-material/CheckCircle"; import { Box, Paper, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Typography, CircularProgress, Chip } from "@mui/material";
import HourglassEmptyIcon from "@mui/icons-material/HourglassEmpty"; import { useSuspenseQuery } from "@tanstack/react-query";
import PictureAsPdfIcon from "@mui/icons-material/PictureAsPdf";
import {
Box,
Chip,
CircularProgress,
LinearProgress,
Paper,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Typography,
} from "@mui/material";
import { useQueryClient, useSuspenseQuery } from "@tanstack/react-query";
import { useNavigate } from "@tanstack/react-router"; import { useNavigate } from "@tanstack/react-router";
import { useCallback, useEffect, useState } from "react";
import { socket } from "../socket";
import { fetchPitchBooksById } from "../util/api";
import { pitchBooksQueryOptions } from "../util/query"; import { pitchBooksQueryOptions } from "../util/query";
import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdf';
import CheckCircleIcon from '@mui/icons-material/CheckCircle';
import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
interface PitchBook { interface PitchBook {
id: number; id: number;
filename: string; filename: string;
created_at: string; created_at: string;
kpi?: kpi?: string | {
| string [key: string]: {
| { label: string;
[key: string]: { entity: string;
label: string; page: number;
entity: string; status: string;
page: number; source: string;
status: string; }[];
source: string; };
}[]; status?: 'processing' | 'completed';
};
status?: "processing" | "completed";
} }
export function PitchBooksTable() { export function PitchBooksTable() {
const [loadingPitchBooks, setLoadingPitchBooks] = useState< const navigate = useNavigate();
{ const { data: pitchBooks, isLoading } = useSuspenseQuery(pitchBooksQueryOptions());
id: number;
progress: number;
filename?: string;
buffer: number;
intervalId?: number;
}[]
>([]);
const navigate = useNavigate();
const { data: pitchBooks, isLoading } = useSuspenseQuery(
pitchBooksQueryOptions(),
);
const handleRowClick = (pitchBookId: number) => { const handleRowClick = (pitchBookId: number) => {
navigate({ navigate({
to: "/extractedResult/$pitchBook", to: "/extractedResult/$pitchBook",
params: { pitchBook: pitchBookId.toString() }, params: { pitchBook: pitchBookId.toString() },
search: { from: "overview" }, search: { from: "overview" }
}); });
}; };
const onConnection = useCallback(() => { const getKPIValue = (pitchBook: PitchBook, fieldName: string): string => {
console.log("connected"); if (!pitchBook.kpi || typeof pitchBook.kpi === 'string') {
}, []); try {
const parsedKPI = JSON.parse(pitchBook.kpi as string);
// Convert array to object format if needed
const kpiObj = Array.isArray(parsedKPI) ?
parsedKPI.reduce((acc: any, item: any) => {
if (!acc[item.label]) acc[item.label] = [];
acc[item.label].push(item);
return acc;
}, {}) : parsedKPI;
const queryClient = useQueryClient(); return kpiObj[fieldName]?.[0]?.entity || 'N/A';
} catch {
return 'N/A';
}
}
const onProgress = useCallback( return (pitchBook.kpi as any)[fieldName]?.[0]?.entity || 'N/A';
(progress: { id: number; progress: number }) => { };
if (progress.progress === 100) {
setLoadingPitchBooks((prev) => {
const intervalId = prev.find(
(item) => item.id === progress.id,
)?.intervalId;
console.log(intervalId, prev);
intervalId && clearInterval(intervalId);
return [...prev.filter((item) => item.id !== progress.id)]; const getStatus = (pitchBook: PitchBook) => {
}); if (pitchBook.kpi &&
queryClient.invalidateQueries({ ((typeof pitchBook.kpi === 'string' && pitchBook.kpi !== '{}') ||
queryKey: pitchBooksQueryOptions().queryKey, (typeof pitchBook.kpi === 'object' && Object.keys(pitchBook.kpi).length > 0))) {
}); return 'completed';
} else { }
setLoadingPitchBooks((prev) => { return 'processing';
const oldItem = prev.find((item) => item.id === progress.id); };
let intervalId = oldItem?.intervalId;
if (!oldItem) {
intervalId = setInterval(() => {
setLoadingPitchBooks((prev) => {
const oldItem = prev.find((item) => item.id === progress.id);
if (!oldItem) return prev;
return [ if (isLoading) {
...prev.filter((e) => e.id !== progress.id), return (
{ <Box display="flex" justifyContent="center" alignItems="center" height="400px">
id: progress.id, <CircularProgress sx={{ color: "#383838" }} />
progress: oldItem?.progress ?? progress.progress, </Box>
filename: oldItem?.filename, );
buffer: oldItem ? oldItem.buffer + 0.5 : 0, }
intervalId: oldItem.intervalId,
},
];
});
}, 400);
fetchPitchBooksById(progress.id) return (
.then((res) => { <TableContainer
setLoadingPitchBooks((prev) => [ component={Paper}
...prev.filter((item) => item.id !== progress.id), sx={{
{ width: "85%",
id: progress.id, maxWidth: 1200,
progress: progress.progress, boxShadow: "0 2px 8px rgba(0,0,0,0.1)",
filename: res.filename, }}
buffer: 0, >
intervalId, <Table>
}, <TableHead>
]); <TableRow sx={{ backgroundColor: "#f5f5f5" }}>
}) <TableCell sx={{ width: "60px" }}></TableCell>
.catch((err) => { <TableCell sx={{ fontWeight: "bold" }}>Fondsname</TableCell>
console.error(err); <TableCell sx={{ fontWeight: "bold" }}>Fondsmanager</TableCell>
}); <TableCell sx={{ fontWeight: "bold" }}>Dateiname</TableCell>
} <TableCell sx={{ fontWeight: "bold", width: "120px" }}>Status</TableCell>
return [ </TableRow>
...prev.filter((item) => item.id !== progress.id), </TableHead>
{ <TableBody>
id: progress.id, {pitchBooks.map((pitchBook: PitchBook) => {
progress: progress.progress, const status = getStatus(pitchBook);
filename: oldItem?.filename, const fundName = getKPIValue(pitchBook, 'FONDSNAME') ||
buffer: 0, getKPIValue(pitchBook, 'FUND_NAME') ||
intervalId, getKPIValue(pitchBook, 'NAME');
},
];
});
}
},
[queryClient],
);
useEffect(() => { const manager = getKPIValue(pitchBook, 'FONDSMANAGER') ||
socket.on("connect", onConnection); getKPIValue(pitchBook, 'MANAGER') ||
socket.on("progress", onProgress); getKPIValue(pitchBook, 'PORTFOLIO_MANAGER');
return () => {
socket.off("connect", onConnection);
socket.off("progress", onProgress);
};
}, [onConnection, onProgress]);
const getKPIValue = (pitchBook: PitchBook, fieldName: string): string => { return (
if (!pitchBook.kpi || typeof pitchBook.kpi === "string") { <TableRow
try { key={pitchBook.id}
const parsedKPI = JSON.parse(pitchBook.kpi as string); onClick={() => handleRowClick(pitchBook.id)}
// Convert array to object format if needed sx={{
const kpiObj = Array.isArray(parsedKPI) cursor: "pointer",
? parsedKPI.reduce((acc, item) => { "&:hover": {
if (!acc[item.label]) acc[item.label] = []; backgroundColor: "#f9f9f9",
acc[item.label].push(item); },
return acc; }}
}, {}) >
: parsedKPI; <TableCell>
<Box
return kpiObj[fieldName]?.[0]?.entity || "N/A"; sx={{
} catch { width: 40,
return "N/A"; height: 50,
} backgroundColor: "#f0f0f0",
} borderRadius: 1,
display: "flex",
return pitchBook.kpi[fieldName]?.[0]?.entity || "N/A"; alignItems: "center",
}; justifyContent: "center",
border: "1px solid #e0e0e0",
const getStatus = (pitchBook: PitchBook) => { }}
if ( >
pitchBook.kpi && <PictureAsPdfIcon fontSize="small" sx={{ color: "#666" }} />
((typeof pitchBook.kpi === "string" && pitchBook.kpi !== "{}") || </Box>
(typeof pitchBook.kpi === "object" && </TableCell>
Object.keys(pitchBook.kpi).length > 0)) <TableCell>
) { <Typography variant="body2" fontWeight="medium">
return "completed"; {fundName}
} </Typography>
return "processing"; </TableCell>
}; <TableCell>{manager}</TableCell>
<TableCell>
if (isLoading) { <Typography variant="body2" color="text.secondary" fontSize="0.875rem">
return ( {pitchBook.filename}
<Box </Typography>
display="flex" </TableCell>
justifyContent="center" <TableCell>
alignItems="center" {status === 'completed' ? (
height="400px" <Chip
> icon={<CheckCircleIcon />}
<CircularProgress sx={{ color: "#383838" }} /> label="Abgeschlossen"
</Box> size="small"
); sx={{
} backgroundColor: "#e8f5e9",
color: "#2e7d32",
return ( "& .MuiChip-icon": {
<TableContainer color: "#2e7d32",
component={Paper} },
sx={{ }}
width: "85%", />
maxWidth: 1200, ) : (
boxShadow: "0 2px 8px rgba(0,0,0,0.1)", <Chip
}} icon={<HourglassEmptyIcon />}
> label="In Bearbeitung"
<Table> size="small"
<TableHead> sx={{
<TableRow sx={{ backgroundColor: "#f5f5f5" }}> backgroundColor: "#fff3e0",
<TableCell sx={{ width: "60px" }} /> color: "#e65100",
<TableCell sx={{ fontWeight: "bold" }}>Fondsname</TableCell> "& .MuiChip-icon": {
<TableCell sx={{ fontWeight: "bold" }}>Fondsmanager</TableCell> color: "#e65100",
<TableCell sx={{ fontWeight: "bold" }}>Dateiname</TableCell> },
<TableCell sx={{ fontWeight: "bold", width: "120px" }}> }}
Status />
</TableCell> )}
</TableRow> </TableCell>
</TableHead> </TableRow>
<TableBody> );
{pitchBooks })}
.filter( </TableBody>
(pitchbook: PitchBook) => </Table>
!loadingPitchBooks.some((e) => e.id === pitchbook.id), {pitchBooks.length === 0 && (
) <Box p={4} textAlign="center">
.sort( <Typography color="text.secondary">
(a: PitchBook, b: PitchBook) => Keine Pitch Books vorhanden
new Date(a.created_at).getTime() - </Typography>
new Date(b.created_at).getTime(), </Box>
) )}
.map((pitchBook: PitchBook) => { </TableContainer>
const status = getStatus(pitchBook); );
const fundName = }
getKPIValue(pitchBook, "FONDSNAME") ||
getKPIValue(pitchBook, "FUND_NAME") ||
getKPIValue(pitchBook, "NAME");
const manager =
getKPIValue(pitchBook, "FONDSMANAGER") ||
getKPIValue(pitchBook, "MANAGER") ||
getKPIValue(pitchBook, "PORTFOLIO_MANAGER");
return (
<TableRow
key={pitchBook.id}
onClick={() => handleRowClick(pitchBook.id)}
sx={{
cursor: "pointer",
"&:hover": {
backgroundColor: "#f9f9f9",
},
}}
>
<TableCell>
<Box
sx={{
width: 40,
height: 50,
backgroundColor: "#f0f0f0",
borderRadius: 1,
display: "flex",
alignItems: "center",
justifyContent: "center",
border: "1px solid #e0e0e0",
}}
>
<PictureAsPdfIcon
fontSize="small"
sx={{ color: "#666" }}
/>
</Box>
</TableCell>
<TableCell>
<Typography variant="body2" fontWeight="medium">
{fundName}
</Typography>
</TableCell>
<TableCell>{manager}</TableCell>
<TableCell>
<Typography
variant="body2"
color="text.secondary"
fontSize="0.875rem"
>
{pitchBook.filename}
</Typography>
</TableCell>
<TableCell>
{status === "completed" ? (
<Chip
icon={<CheckCircleIcon />}
label="Abgeschlossen"
size="small"
sx={{
backgroundColor: "#e8f5e9",
color: "#2e7d32",
"& .MuiChip-icon": {
color: "#2e7d32",
},
}}
/>
) : (
<Chip
icon={<HourglassEmptyIcon />}
label="In Bearbeitung"
size="small"
sx={{
backgroundColor: "#fff3e0",
color: "#e65100",
"& .MuiChip-icon": {
color: "#e65100",
},
}}
/>
)}
</TableCell>
</TableRow>
);
})}
{loadingPitchBooks
.sort((a, b) => a.id - b.id)
.map((pitchBook) => (
<TableRow key={pitchBook.id}>
<TableCell>
<Box
sx={{
width: 40,
height: 50,
backgroundColor: "#f0f0f0",
borderRadius: 1,
display: "flex",
alignItems: "center",
justifyContent: "center",
border: "1px solid #e0e0e0",
}}
>
<PictureAsPdfIcon fontSize="small" sx={{ color: "#666" }} />
</Box>
</TableCell>
<TableCell colSpan={2}>
<LinearProgress
variant="buffer"
value={pitchBook.progress}
valueBuffer={
pitchBook.buffer
? pitchBook.progress + pitchBook.buffer
: pitchBook.progress
}
/>
</TableCell>
<TableCell>
{" "}
<Typography
variant="body2"
color="text.secondary"
fontSize="0.875rem"
>
{pitchBook.filename}
</Typography>
</TableCell>
<TableCell>
<Chip
icon={<HourglassEmptyIcon />}
label="In Bearbeitung"
size="small"
sx={{
backgroundColor: "#fff3e0",
color: "#e65100",
"& .MuiChip-icon": {
color: "#e65100",
},
}}
/>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
{pitchBooks.length === 0 && (
<Box p={4} textAlign="center">
<Typography color="text.secondary">
Keine Pitch Books vorhanden
</Typography>
</Box>
)}
</TableContainer>
);
}

View File

@ -1,11 +1,13 @@
import SettingsIcon from "@mui/icons-material/Settings"; import SettingsIcon from "@mui/icons-material/Settings";
import { Backdrop, Box, Button, IconButton, Paper } from "@mui/material"; import { Backdrop, Box, Button, IconButton, Paper } from "@mui/material";
import { useNavigate, useRouter } from "@tanstack/react-router"; import { useNavigate } from "@tanstack/react-router";
import { useCallback, useEffect, useState } from "react"; import { useCallback, useEffect, useState } from "react";
import FileUpload from "react-material-file-upload"; import FileUpload from "react-material-file-upload";
import { socket } from "../socket"; import { socket } from "../socket";
import { API_HOST } from "../util/api";
import { CircularProgressWithLabel } from "./CircularProgressWithLabel"; import { CircularProgressWithLabel } from "./CircularProgressWithLabel";
import { API_HOST } from "../util/api";
const PROGRESS = true;
export default function UploadPage() { export default function UploadPage() {
const [files, setFiles] = useState<File[]>([]); const [files, setFiles] = useState<File[]>([]);
@ -13,7 +15,6 @@ export default function UploadPage() {
const [loadingState, setLoadingState] = useState<number | null>(null); const [loadingState, setLoadingState] = useState<number | null>(null);
const fileTypes = ["pdf"]; const fileTypes = ["pdf"];
const navigate = useNavigate(); const navigate = useNavigate();
const router = useRouter();
const uploadFile = useCallback(async () => { const uploadFile = useCallback(async () => {
const formData = new FormData(); const formData = new FormData();
@ -27,11 +28,17 @@ export default function UploadPage() {
console.log("File uploaded successfully"); console.log("File uploaded successfully");
const data = await response.json(); const data = await response.json();
setPageId(data.id.toString()); setPageId(data.id.toString());
setLoadingState(5); setLoadingState(0);
!PROGRESS &&
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook: data.id.toString() },
});
} else { } else {
console.error("Failed to upload file"); console.error("Failed to upload file");
} }
}, [files]); }, [files, navigate]);
const onConnection = useCallback(() => { const onConnection = useCallback(() => {
console.log("connected"); console.log("connected");
@ -73,16 +80,18 @@ export default function UploadPage() {
return ( return (
<> <>
<Backdrop {PROGRESS && (
sx={(theme) => ({ color: "#fff", zIndex: theme.zIndex.drawer + 1 })} <Backdrop
open={pageId !== null && loadingState !== null && loadingState < 100} sx={(theme) => ({ color: "#fff", zIndex: theme.zIndex.drawer + 1 })}
> open={pageId !== null && loadingState !== null && loadingState < 100}
<CircularProgressWithLabel >
color="inherit" <CircularProgressWithLabel
value={loadingState || 0} color="inherit"
size={60} value={loadingState || 0}
/> size={60}
</Backdrop> />
</Backdrop>
)}
<Box <Box
display="flex" display="flex"
flexDirection="column" flexDirection="column"
@ -179,7 +188,6 @@ export default function UploadPage() {
backgroundColor: "#383838", backgroundColor: "#383838",
"&:hover": { backgroundColor: "#2e2e2e" }, "&:hover": { backgroundColor: "#2e2e2e" },
}} }}
onMouseEnter={() => router.preloadRoute({ to: "/pitchbooks" })}
onClick={() => navigate({ to: "/pitchbooks" })} onClick={() => navigate({ to: "/pitchbooks" })}
> >
Alle Pitch Books anzeigen Alle Pitch Books anzeigen
@ -187,4 +195,4 @@ export default function UploadPage() {
</Box> </Box>
</> </>
); );
} }

View File

@ -1,6 +1,6 @@
import type { Kennzahl } from "@/types/kpi"; import type { Kennzahl } from "@/types/kpi";
const API_HOST = import.meta.env.VITE_API_HOST || "http://localhost:5050"; const API_HOST = import.meta.env.VITE_API_HOST || 'http://localhost:5050';
export { API_HOST }; export { API_HOST };
@ -15,7 +15,9 @@ export const fetchKPI = async (
source: string; source: string;
}[]; }[];
}> => { }> => {
const response = await fetch(`${API_HOST}/api/pitch_book/${pitchBookId}`); const response = await fetch(
`${API_HOST}/api/pitch_book/${pitchBookId}`,
);
const data = await response.json(); const data = await response.json();
return data.kpi ? getKPI(data.kpi) : {}; return data.kpi ? getKPI(data.kpi) : {};
@ -44,10 +46,13 @@ export const fetchPutKPI = async (
const formData = new FormData(); const formData = new FormData();
formData.append("kpi", JSON.stringify(flattenKPIArray(kpi))); formData.append("kpi", JSON.stringify(flattenKPIArray(kpi)));
const response = await fetch(`${API_HOST}/api/pitch_book/${pitchBookId}`, { const response = await fetch(
method: "PUT", `${API_HOST}/api/pitch_book/${pitchBookId}`,
body: formData, {
}); method: "PUT",
body: formData,
},
);
if (!response.ok) { if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`); throw new Error(`HTTP error! status: ${response.status}`);
} }
@ -114,11 +119,3 @@ export async function fetchPitchBooks() {
} }
return response.json(); return response.json();
} }
export async function fetchPitchBooksById(id: number) {
const response = await fetch(`${API_HOST}/api/pitch_book/${id}`);
if (!response.ok) {
throw new Error("Failed to fetch pitch books");
}
return response.json();
}