KPI_data angepasst

pull/94/head
Abdulrahman Dabbagh 2025-06-29 04:57:24 +02:00
parent 12783539b3
commit 360da3acb0
11 changed files with 234 additions and 133 deletions

View File

@ -29,13 +29,11 @@ def create_kpi_setting():
required_fields = [
"name",
"description",
"mandatory",
"type",
"translation",
"example",
"position",
"active",
"examples",
]
for field in required_fields:
if field not in data:
@ -55,13 +53,11 @@ def create_kpi_setting():
new_kpi_setting = KPISettingModel(
name=data["name"],
description=data["description"],
mandatory=data["mandatory"],
type=kpi_type,
translation=data["translation"],
example=data["example"],
position=data["position"],
active=data["active"],
examples=data.get("examples", []),
)
db.session.add(new_kpi_setting)
@ -84,9 +80,6 @@ def update_kpi_setting(id):
return jsonify({"error": "KPI Setting with this name already exists"}), 409
kpi_setting.name = data["name"]
if "description" in data:
kpi_setting.description = data["description"]
if "mandatory" in data:
kpi_setting.mandatory = data["mandatory"]
@ -100,18 +93,15 @@ def update_kpi_setting(id):
400,
)
if "translation" in data:
kpi_setting.translation = data["translation"]
if "example" in data:
kpi_setting.example = data["example"]
if "position" in data:
kpi_setting.position = data["position"]
if "active" in data:
kpi_setting.active = data["active"]
if "examples" in data:
kpi_setting.examples = data["examples"]
db.session.commit()
return jsonify(kpi_setting.to_dict()), 200

View File

@ -2,6 +2,8 @@ from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import Enum as SQLAlchemyEnum
from enum import Enum
from sqlalchemy.dialects.postgresql import JSONB
from collections import OrderedDict
class KPISettingType(Enum):
@ -18,37 +20,31 @@ class KPISettingModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(unique=True)
description: Mapped[str]
mandatory: Mapped[bool]
type: Mapped[KPISettingType] = mapped_column(
SQLAlchemyEnum(KPISettingType, native_enum=True)
)
translation: Mapped[str]
example: Mapped[str]
position: Mapped[int]
active: Mapped[bool]
examples: Mapped[list] = mapped_column(JSONB, default=[])
def to_dict(self):
return {
"id": self.id,
"name": self.name,
"description": self.description,
"mandatory": self.mandatory,
"type": self.type.value,
"translation": self.translation,
"example": self.example,
"position": self.position,
"active": self.active,
}
return OrderedDict(
[
("id", self.id),
("name", self.name),
("mandatory", self.mandatory),
("type", self.type.value),
("position", self.position),
("examples", self.examples),
("active", self.active),
]
)
def __init__(
self, name, description, mandatory, type, translation, example, position, active
):
def __init__(self, name, mandatory, type, position, active, examples=None):
self.name = name
self.description = description
self.mandatory = mandatory
self.type = type
self.translation = translation
self.example = example
self.position = position
self.active = active
self.examples = examples or []

View File

@ -10,153 +10,243 @@ def seed_default_kpi_settings():
default_kpi_settings = [
{
"name": "Fondsname",
"description": "Der vollständige Name des Investmentfonds",
"mandatory": True,
"type": KPISettingType.STRING,
"translation": "Fund Name",
"example": "Alpha Real Estate Fund I",
"position": 1,
"active": True,
"examples": [
{
"sentence": "Der Fonds trägt den Namen Alpha Real Estate Fund I.",
"value": "Alpha Real Estate Fund I",
},
{
"sentence": "Im Pitchbook wird der Fondsname als Alpha Real Estate Fund I angegeben.",
"value": "Alpha Real Estate Fund I",
},
],
},
{
"name": "Fondsmanager",
"description": "Verantwortlicher Manager für die Fondsverwaltung",
"mandatory": True,
"type": KPISettingType.STRING,
"translation": "Fund Manager",
"example": "Max Mustermann",
"position": 2,
"active": True,
"examples": [
{
"sentence": "Fondsmanager des Projekts ist Max Mustermann.",
"value": "Max Mustermann",
},
{
"sentence": "Die Verwaltung liegt bei Max Mustermann.",
"value": "Max Mustermann",
},
],
},
{
"name": "AIFM",
"description": "Alternative Investment Fund Manager",
"mandatory": True,
"type": KPISettingType.STRING,
"translation": "AIFM",
"example": "Alpha Investment Management GmbH",
"position": 3,
"active": True,
"examples": [
{
"sentence": "AIFM ist die Alpha Investment Management GmbH.",
"value": "Alpha Investment Management GmbH",
},
{
"sentence": "Die Alpha Investment Management GmbH fungiert als AIFM.",
"value": "Alpha Investment Management GmbH",
},
],
},
{
"name": "Datum",
"description": "Stichtag der Datenerfassung",
"mandatory": True,
"type": KPISettingType.DATE,
"translation": "Date",
"example": "05.05.2025",
"position": 4,
"active": True,
"examples": [
{
"sentence": "Die Daten basieren auf dem Stand vom 05.05.2025.",
"value": "05.05.2025",
},
{
"sentence": "Stichtag der Angaben ist der 05.05.2025.",
"value": "05.05.2025",
},
],
},
{
"name": "Risikoprofil",
"description": "Klassifizierung des Risikos des Fonds",
"mandatory": True,
"type": KPISettingType.STRING,
"translation": "Risk Profile",
"example": "Core/Core++",
"position": 5,
"active": True,
"examples": [
{
"sentence": "Der Fonds hat das Risikoprofil Core/Core++.",
"value": "Core/Core++",
},
{
"sentence": "Einstufung des Fondsrisikos: Core/Core++.",
"value": "Core/Core++",
},
],
},
{
"name": "Artikel",
"description": "Artikel 8 SFDR-Klassifizierung",
"mandatory": False,
"type": KPISettingType.BOOLEAN,
"translation": "Article",
"example": "Artikel 8",
"position": 6,
"active": True,
"examples": [
{
"sentence": "Der Fonds erfüllt die Anforderungen von Artikel 8.",
"value": "Artikel 8",
},
{
"sentence": "Gemäß SFDR fällt dieser Fonds unter Artikel 8.",
"value": "Artikel 8",
},
],
},
{
"name": "Zielrendite",
"description": "Angestrebte jährliche Rendite in Prozent",
"mandatory": True,
"type": KPISettingType.NUMBER,
"translation": "Target Return",
"example": "6.5",
"position": 7,
"active": True,
"examples": [
{
"sentence": "Die angestrebte Zielrendite liegt bei 6.5%.",
"value": "6.5%",
},
{"sentence": "Zielrendite des Fonds beträgt 6.5%.", "value": "6.5%"},
],
},
{
"name": "Rendite",
"description": "Tatsächlich erzielte Rendite in Prozent",
"mandatory": False,
"type": KPISettingType.NUMBER,
"translation": "Return",
"example": "5.8",
"position": 8,
"active": True,
"examples": [
{
"sentence": "Die Rendite für das Jahr beträgt 5.8%.",
"value": "5.8%",
},
{
"sentence": "Im letzten Jahr wurde eine Rendite von 5.8% erzielt.",
"value": "5.8%",
},
],
},
{
"name": "Zielausschüttung",
"description": "Geplante Ausschüttung in Prozent",
"mandatory": False,
"type": KPISettingType.NUMBER,
"translation": "Target Distribution",
"example": "4.0",
"position": 9,
"active": True,
"examples": [
{"sentence": "Die Zielausschüttung beträgt 4.0%.", "value": "4.0%"},
{
"sentence": "Geplante Ausschüttung: 4.0% pro Jahr.",
"value": "4.0%",
},
],
},
{
"name": "Ausschüttung",
"description": "Tatsächliche Ausschüttung in Prozent",
"mandatory": False,
"type": KPISettingType.NUMBER,
"translation": "Distribution",
"example": "3.8",
"position": 10,
"active": True,
"examples": [
{
"sentence": "Die Ausschüttung im Jahr 2024 lag bei 3.8%.",
"value": "3.8%",
},
{
"sentence": "Es wurde eine Ausschüttung von 3.8% vorgenommen.",
"value": "3.8%",
},
],
},
{
"name": "Laufzeit",
"description": "Geplante Laufzeit des Fonds",
"mandatory": True,
"type": KPISettingType.STRING,
"translation": "Duration",
"example": "7 Jahre, 10, Evergreen",
"position": 11,
"active": True,
"examples": [
{
"sentence": "Die Laufzeit des Fonds beträgt 7 Jahre.",
"value": "7 Jahre",
},
{"sentence": "Geplante Dauer: Evergreen-Modell.", "value": "Evergreen"},
],
},
{
"name": "LTV",
"description": "Loan-to-Value Verhältnis in Prozent",
"mandatory": False,
"type": KPISettingType.NUMBER,
"translation": "LTV",
"example": "65.0",
"position": 12,
"active": True,
"examples": [
{"sentence": "Der LTV beträgt 65.0%.", "value": "65.0%"},
{"sentence": "Loan-to-Value-Ratio: 65.0%.", "value": "65.0%"},
],
},
{
"name": "Managementgebühren",
"description": "Jährliche Verwaltungsgebühren in Prozent",
"mandatory": True,
"type": KPISettingType.NUMBER,
"translation": "Management Fees",
"example": "1.5",
"position": 13,
"active": True,
"examples": [
{
"sentence": "Die Managementgebühren betragen jährlich 1.5%.",
"value": "1.5%",
},
{
"sentence": "Für die Verwaltung wird eine Gebühr von 1.5% erhoben.",
"value": "1.5%",
},
],
},
{
"name": "Sektorenallokation",
"description": "Verteilung der Investments nach Sektoren",
"mandatory": False,
"type": KPISettingType.ARRAY,
"translation": "Sector Allocation",
"example": "Büro, Wohnen, Logistik, Studentenwohnen",
"position": 14,
"active": True,
"examples": [
{
"sentence": "Die Sektorenallokation umfasst Büro, Wohnen und Logistik.",
"value": "Büro, Wohnen, Logistik",
},
{
"sentence": "Investiert wird in Büro, Logistik und Studentenwohnen.",
"value": "Büro, Logistik, Studentenwohnen",
},
],
},
{
"name": "Länderallokation",
"description": "Geografische Verteilung der Investments",
"mandatory": False,
"type": KPISettingType.ARRAY,
"translation": "Country Allocation",
"example": "Deutschland,Frankreich, Österreich, Schweiz",
"position": 15,
"active": True,
"examples": [
{
"sentence": "Investitionen erfolgen in Deutschland, Frankreich und Österreich.",
"value": "Deutschland, Frankreich, Österreich",
},
{
"sentence": "Die Länderallokation umfasst Deutschland, Schweiz und Frankreich.",
"value": "Deutschland, Schweiz, Frankreich",
},
],
},
]
@ -165,13 +255,11 @@ def seed_default_kpi_settings():
for kpi_data in default_kpi_settings:
kpi_setting = KPISettingModel(
name=kpi_data["name"],
description=kpi_data["description"],
mandatory=kpi_data["mandatory"],
type=kpi_data["type"],
translation=kpi_data["translation"],
example=kpi_data["example"],
position=kpi_data["position"],
active=kpi_data["active"],
examples=kpi_data.get("examples", []),
)
db.session.add(kpi_setting)

View File

@ -6,9 +6,12 @@ import json
app = Flask(__name__)
VALIDATE_SERVICE_URL = os.getenv("VALIDATE_SERVICE_URL", "http://localhost:5054/validate")
VALIDATE_SERVICE_URL = os.getenv(
"VALIDATE_SERVICE_URL", "http://localhost:5054/validate"
)
@app.route('/extract', methods=['POST'])
@app.route("/extract", methods=["POST"])
def extract_text_from_ocr_json():
json_data = request.get_json()
@ -16,19 +19,19 @@ def extract_text_from_ocr_json():
pages_data = json_data["extracted_text_per_page"]
entities_json = extract_with_exxeta(pages_data, pitchbook_id)
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json
entities = (
json.loads(entities_json) if isinstance(entities_json, str) else entities_json
)
validate_payload = {
"id": pitchbook_id,
"service": "exxeta",
"entities": entities
}
validate_payload = {"id": pitchbook_id, "service": "exxeta", "entities": entities}
print(f"[EXXETA] Sending to validate service: {VALIDATE_SERVICE_URL}")
print(f"[EXXETA] Payload: {validate_payload} entities for pitchbook {pitchbook_id}")
try:
response = requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600)
response = requests.post(
VALIDATE_SERVICE_URL, json=validate_payload, timeout=600
)
print(f"[EXXETA] Validate service response: {response.status_code}")
if response.status_code != 200:
print(f"[EXXETA] Validate service error: {response.text}")

View File

@ -16,6 +16,7 @@ TIMEOUT = 180
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def get_dynamic_labels():
url = f"{COORDINATOR_URL}/api/kpi_setting/"
try:
@ -28,6 +29,7 @@ def get_dynamic_labels():
logger.warning(f"Konnte dynamische Labels nicht laden: {e}")
return []
def extract_with_exxeta(pages_json, pitchbook_id):
results = []
@ -39,7 +41,10 @@ def extract_with_exxeta(pages_json, pitchbook_id):
for page_data in pages_json:
i += 1
if i % 8 == 0:
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35 + 60/len(pages_json)*i})
requests.post(
COORDINATOR_URL + "/api/progress",
json={"id": pitchbook_id, "progress": 35 + 60 / len(pages_json) * i},
)
page_num = page_data.get("page")
text = page_data.get("text", "")
@ -100,23 +105,28 @@ def extract_with_exxeta(pages_json, pitchbook_id):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {EXXETA_API_KEY}"
"Authorization": f"Bearer {EXXETA_API_KEY}",
}
payload = {
"model": MODEL,
"messages": [
{"role": "system", "content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array."},
{"role": "user", "content": prompt}
{
"role": "system",
"content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array.",
},
{"role": "user", "content": prompt},
],
"temperature": 0.0
"temperature": 0.0,
}
url = f"{EXXETA_BASE_URL}/deployments/{MODEL}/chat/completions"
for attempt in range(1, MAX_RETRIES + 1):
try:
response = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT)
response = requests.post(
url, headers=headers, json=payload, timeout=TIMEOUT
)
response.raise_for_status()
content = response.json()["choices"][0]["message"]["content"].strip()
if content.startswith("```json"):
@ -140,9 +150,12 @@ def extract_with_exxeta(pages_json, pitchbook_id):
if attempt == MAX_RETRIES:
results.extend([])
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95})
requests.post(
COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95}
)
return json.dumps(results, indent=2, ensure_ascii=False)
if __name__ == "__main__":
print("📡 Test-Aufruf get_dynamic_labels:")
print(get_dynamic_labels())
print(get_dynamic_labels())

View File

@ -29,19 +29,17 @@ def convert_pdf_async(temp_path, pitchbook_id):
temp_path.unlink() # cleanup
return {"error": "OCR processing failed - all PDFs must be OCR'd"}, 500
with open(ocr_path, 'rb') as ocr_file:
with open(ocr_path, "rb") as ocr_file:
ocr_file.seek(0)
result = pdf_to_json(ocr_file)
payload = {
"id": int(pitchbook_id),
"extracted_text_per_page": result["pages"]
}
payload = {"id": int(pitchbook_id), "extracted_text_per_page": result["pages"]}
logger.info("Sending payload to EXXETA and SPACY services")
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35})
requests.post(
COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35}
)
try:
exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600)
logger.info(f"EXXETA response: {exxeta_response.status_code}")
@ -54,14 +52,16 @@ def convert_pdf_async(temp_path, pitchbook_id):
except Exception as e:
logger.error(f"Error calling SPACY: {e}")
files=[
('file',('',open(ocr_path,'rb'),'application/pdf'))
]
files = [("file", ("", open(ocr_path, "rb"), "application/pdf"))]
headers = {}
try:
requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers)
requests.put(
f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}",
files=files,
timeout=600,
headers=headers,
)
logger.info("COORDINATOR response: Progress + File updated")
except Exception as e:
logger.error(f"Error calling COORDINATOR: {e}")
@ -72,7 +72,7 @@ def convert_pdf_async(temp_path, pitchbook_id):
logger.error(f"Exception in OCR processing: {str(e)}", exc_info=True)
@app.route('/ocr', methods=['POST'])
@app.route("/ocr", methods=["POST"])
def convert_extract_text_from_pdf():
if "file" not in request.files:
return {"error": "No file"}, 400
@ -85,7 +85,7 @@ def convert_extract_text_from_pdf():
if not pitchbook_id:
return {"error": "No ID"}, 400
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
file.seek(0)
temp_file.write(file.read())
temp_path = Path(temp_file.name)
@ -93,10 +93,7 @@ def convert_extract_text_from_pdf():
thread = threading.Thread(target=convert_pdf_async, args=(temp_path, pitchbook_id))
thread.start()
return {
"status": "sent",
"message": "PDF successfully OCR'd and processed"
}, 200
return {"status": "sent", "message": "PDF successfully OCR'd and processed"}, 200
if __name__ == "__main__":

View File

@ -17,9 +17,10 @@ log_folder = TEMP_DIR / "logs"
output_folder.mkdir(exist_ok=True)
log_folder.mkdir(exist_ok=True)
def pdf_to_json(pdf_input):
try:
if hasattr(pdf_input, 'read'):
if hasattr(pdf_input, "read"):
pdf_input.seek(0)
with pdfplumber.open(pdf_input) as pdf:
@ -83,7 +84,9 @@ def ocr_pdf(input_file_path: Path):
if result.returncode == 0:
if output_file.exists():
logger.info(f"OCR successful, output file size: {output_file.stat().st_size} bytes")
logger.info(
f"OCR successful, output file size: {output_file.stat().st_size} bytes"
)
return output_file
else:
logger.error(f"OCR completed but output file not found: {output_file}")
@ -119,4 +122,4 @@ def extract_text_to_json(pdf_path: Path):
except Exception as e:
logger.error(f"Failed to extract text to JSON: {e}")
return None
return None

View File

@ -40,7 +40,9 @@ def send_to_coordinator_service(processed_data, request_id):
def process_data_async(request_id, spacy_data, exxeta_data):
try:
requests.post(COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95})
requests.post(
COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95}
)
print(f"Start asynchronous processing for PitchBook: {request_id}")
# Perform merge
@ -96,7 +98,6 @@ def validate():
# If both datasets are present, start asynchronous processing
if spacy_data is not None and exxeta_data is not None:
# Start asynchronous processing in a separate thread
processing_thread = threading.Thread(
target=process_data_async,

View File

@ -27,7 +27,6 @@ def merge_entities(spacy_data, exxeta_data):
and s_entity_norm == e_entity_norm
and s_page == e_page
):
merged.append(
{
"label": s["label"],

View File

@ -5,6 +5,8 @@ import os
# SETTINGS = [{"id": "Rendite", "type": "number"}]
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
def validate_entities(entities):
try:
response = requests.get(COORDINATOR_URL + "/api/kpi_setting/")
@ -42,7 +44,6 @@ def validate_entities(entities):
result.extend(item[1])
continue
# Filter not validated, if there are valid values
validated = False
for entity in item[1]:
@ -61,11 +62,11 @@ def validate_entities(entities):
def validate_number(entity_list, settings):
filtered_kpi = {}
for label, entity_list in entity_list.items():
setting = next((s for s in settings if s["name"].upper() == label), None)
if setting and setting["type"] == "number":
filtered_entities = [
entity for entity in entity_list
entity
for entity in entity_list
if is_valid_number(str(entity["entity"]))
]
for entity in entity_list:
@ -80,8 +81,12 @@ def validate_number(entity_list, settings):
def is_valid_number(number):
pattern = r'^[0-9\-\s%,.€]+$'
return any(char.isdigit() for char in number) and not re.search(r'\d+\s\d+', number) and re.fullmatch(pattern, number)
pattern = r"^[0-9\-\s%,.€]+$"
return (
any(char.isdigit() for char in number)
and not re.search(r"\d+\s\d+", number)
and re.fullmatch(pattern, number)
)
def delete_exxeta_unknown(entity_list):
@ -89,11 +94,16 @@ def delete_exxeta_unknown(entity_list):
for label, entity_list in entity_list.items():
# Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped)
filtered_entities = [
entity for entity in entity_list
if str(entity["entity"]).lower().replace(" ", "") not in {"nichtangegeben", "n/a"}
entity
for entity in entity_list
if str(entity["entity"]).lower().replace(" ", "")
not in {"nichtangegeben", "n/a"}
]
for entity in entity_list:
if str(entity["entity"]).lower().replace(" ", "") in {"nichtangegeben", "n/a"}:
if str(entity["entity"]).lower().replace(" ", "") in {
"nichtangegeben",
"n/a",
}:
print(f"filtered out: {entity}")
if filtered_entities: # Only add the label if there are entities left
filtered_kpi[label] = filtered_entities
@ -115,6 +125,7 @@ def delete_duplicate_entities(entity_list):
unique_entities[label] = filtered_entities
return unique_entities
if __name__ == "__main__":
entities = [
# {"label": "PERSON", "entity": "John Doe", "status": "validated"},

View File

@ -122,7 +122,7 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false,
example: formData.example || '',
position: formData.position ?? 0,
active: formData.active ?? true,
examples: [{ sentence: '', value: '' }]
examples: formData.examples ?? []
});
// Formular zurücksetzen:
setFormData(emptyKPI);