KPI_data angepasst
parent
12783539b3
commit
360da3acb0
|
|
@ -29,13 +29,11 @@ def create_kpi_setting():
|
||||||
|
|
||||||
required_fields = [
|
required_fields = [
|
||||||
"name",
|
"name",
|
||||||
"description",
|
|
||||||
"mandatory",
|
"mandatory",
|
||||||
"type",
|
"type",
|
||||||
"translation",
|
|
||||||
"example",
|
|
||||||
"position",
|
"position",
|
||||||
"active",
|
"active",
|
||||||
|
"examples",
|
||||||
]
|
]
|
||||||
for field in required_fields:
|
for field in required_fields:
|
||||||
if field not in data:
|
if field not in data:
|
||||||
|
|
@ -55,13 +53,11 @@ def create_kpi_setting():
|
||||||
|
|
||||||
new_kpi_setting = KPISettingModel(
|
new_kpi_setting = KPISettingModel(
|
||||||
name=data["name"],
|
name=data["name"],
|
||||||
description=data["description"],
|
|
||||||
mandatory=data["mandatory"],
|
mandatory=data["mandatory"],
|
||||||
type=kpi_type,
|
type=kpi_type,
|
||||||
translation=data["translation"],
|
|
||||||
example=data["example"],
|
|
||||||
position=data["position"],
|
position=data["position"],
|
||||||
active=data["active"],
|
active=data["active"],
|
||||||
|
examples=data.get("examples", []),
|
||||||
)
|
)
|
||||||
|
|
||||||
db.session.add(new_kpi_setting)
|
db.session.add(new_kpi_setting)
|
||||||
|
|
@ -84,9 +80,6 @@ def update_kpi_setting(id):
|
||||||
return jsonify({"error": "KPI Setting with this name already exists"}), 409
|
return jsonify({"error": "KPI Setting with this name already exists"}), 409
|
||||||
kpi_setting.name = data["name"]
|
kpi_setting.name = data["name"]
|
||||||
|
|
||||||
if "description" in data:
|
|
||||||
kpi_setting.description = data["description"]
|
|
||||||
|
|
||||||
if "mandatory" in data:
|
if "mandatory" in data:
|
||||||
kpi_setting.mandatory = data["mandatory"]
|
kpi_setting.mandatory = data["mandatory"]
|
||||||
|
|
||||||
|
|
@ -100,18 +93,15 @@ def update_kpi_setting(id):
|
||||||
400,
|
400,
|
||||||
)
|
)
|
||||||
|
|
||||||
if "translation" in data:
|
|
||||||
kpi_setting.translation = data["translation"]
|
|
||||||
|
|
||||||
if "example" in data:
|
|
||||||
kpi_setting.example = data["example"]
|
|
||||||
|
|
||||||
if "position" in data:
|
if "position" in data:
|
||||||
kpi_setting.position = data["position"]
|
kpi_setting.position = data["position"]
|
||||||
|
|
||||||
if "active" in data:
|
if "active" in data:
|
||||||
kpi_setting.active = data["active"]
|
kpi_setting.active = data["active"]
|
||||||
|
|
||||||
|
if "examples" in data:
|
||||||
|
kpi_setting.examples = data["examples"]
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
return jsonify(kpi_setting.to_dict()), 200
|
return jsonify(kpi_setting.to_dict()), 200
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ from model.database import db
|
||||||
from sqlalchemy.orm import Mapped, mapped_column
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
from sqlalchemy import Enum as SQLAlchemyEnum
|
from sqlalchemy import Enum as SQLAlchemyEnum
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
class KPISettingType(Enum):
|
class KPISettingType(Enum):
|
||||||
|
|
@ -18,37 +20,31 @@ class KPISettingModel(db.Model):
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(primary_key=True)
|
id: Mapped[int] = mapped_column(primary_key=True)
|
||||||
name: Mapped[str] = mapped_column(unique=True)
|
name: Mapped[str] = mapped_column(unique=True)
|
||||||
description: Mapped[str]
|
|
||||||
mandatory: Mapped[bool]
|
mandatory: Mapped[bool]
|
||||||
type: Mapped[KPISettingType] = mapped_column(
|
type: Mapped[KPISettingType] = mapped_column(
|
||||||
SQLAlchemyEnum(KPISettingType, native_enum=True)
|
SQLAlchemyEnum(KPISettingType, native_enum=True)
|
||||||
)
|
)
|
||||||
translation: Mapped[str]
|
|
||||||
example: Mapped[str]
|
|
||||||
position: Mapped[int]
|
position: Mapped[int]
|
||||||
active: Mapped[bool]
|
active: Mapped[bool]
|
||||||
|
examples: Mapped[list] = mapped_column(JSONB, default=[])
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return {
|
return OrderedDict(
|
||||||
"id": self.id,
|
[
|
||||||
"name": self.name,
|
("id", self.id),
|
||||||
"description": self.description,
|
("name", self.name),
|
||||||
"mandatory": self.mandatory,
|
("mandatory", self.mandatory),
|
||||||
"type": self.type.value,
|
("type", self.type.value),
|
||||||
"translation": self.translation,
|
("position", self.position),
|
||||||
"example": self.example,
|
("examples", self.examples),
|
||||||
"position": self.position,
|
("active", self.active),
|
||||||
"active": self.active,
|
]
|
||||||
}
|
)
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, name, mandatory, type, position, active, examples=None):
|
||||||
self, name, description, mandatory, type, translation, example, position, active
|
|
||||||
):
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.description = description
|
|
||||||
self.mandatory = mandatory
|
self.mandatory = mandatory
|
||||||
self.type = type
|
self.type = type
|
||||||
self.translation = translation
|
|
||||||
self.example = example
|
|
||||||
self.position = position
|
self.position = position
|
||||||
self.active = active
|
self.active = active
|
||||||
|
self.examples = examples or []
|
||||||
|
|
|
||||||
|
|
@ -10,153 +10,243 @@ def seed_default_kpi_settings():
|
||||||
default_kpi_settings = [
|
default_kpi_settings = [
|
||||||
{
|
{
|
||||||
"name": "Fondsname",
|
"name": "Fondsname",
|
||||||
"description": "Der vollständige Name des Investmentfonds",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.STRING,
|
"type": KPISettingType.STRING,
|
||||||
"translation": "Fund Name",
|
|
||||||
"example": "Alpha Real Estate Fund I",
|
|
||||||
"position": 1,
|
"position": 1,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Der Fonds trägt den Namen Alpha Real Estate Fund I.",
|
||||||
|
"value": "Alpha Real Estate Fund I",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Im Pitchbook wird der Fondsname als Alpha Real Estate Fund I angegeben.",
|
||||||
|
"value": "Alpha Real Estate Fund I",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Fondsmanager",
|
"name": "Fondsmanager",
|
||||||
"description": "Verantwortlicher Manager für die Fondsverwaltung",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.STRING,
|
"type": KPISettingType.STRING,
|
||||||
"translation": "Fund Manager",
|
|
||||||
"example": "Max Mustermann",
|
|
||||||
"position": 2,
|
"position": 2,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Fondsmanager des Projekts ist Max Mustermann.",
|
||||||
|
"value": "Max Mustermann",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Die Verwaltung liegt bei Max Mustermann.",
|
||||||
|
"value": "Max Mustermann",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "AIFM",
|
"name": "AIFM",
|
||||||
"description": "Alternative Investment Fund Manager",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.STRING,
|
"type": KPISettingType.STRING,
|
||||||
"translation": "AIFM",
|
|
||||||
"example": "Alpha Investment Management GmbH",
|
|
||||||
"position": 3,
|
"position": 3,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "AIFM ist die Alpha Investment Management GmbH.",
|
||||||
|
"value": "Alpha Investment Management GmbH",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Die Alpha Investment Management GmbH fungiert als AIFM.",
|
||||||
|
"value": "Alpha Investment Management GmbH",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Datum",
|
"name": "Datum",
|
||||||
"description": "Stichtag der Datenerfassung",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.DATE,
|
"type": KPISettingType.DATE,
|
||||||
"translation": "Date",
|
|
||||||
"example": "05.05.2025",
|
|
||||||
"position": 4,
|
"position": 4,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die Daten basieren auf dem Stand vom 05.05.2025.",
|
||||||
|
"value": "05.05.2025",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Stichtag der Angaben ist der 05.05.2025.",
|
||||||
|
"value": "05.05.2025",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Risikoprofil",
|
"name": "Risikoprofil",
|
||||||
"description": "Klassifizierung des Risikos des Fonds",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.STRING,
|
"type": KPISettingType.STRING,
|
||||||
"translation": "Risk Profile",
|
|
||||||
"example": "Core/Core++",
|
|
||||||
"position": 5,
|
"position": 5,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Der Fonds hat das Risikoprofil Core/Core++.",
|
||||||
|
"value": "Core/Core++",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Einstufung des Fondsrisikos: Core/Core++.",
|
||||||
|
"value": "Core/Core++",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Artikel",
|
"name": "Artikel",
|
||||||
"description": "Artikel 8 SFDR-Klassifizierung",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.BOOLEAN,
|
"type": KPISettingType.BOOLEAN,
|
||||||
"translation": "Article",
|
|
||||||
"example": "Artikel 8",
|
|
||||||
"position": 6,
|
"position": 6,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Der Fonds erfüllt die Anforderungen von Artikel 8.",
|
||||||
|
"value": "Artikel 8",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Gemäß SFDR fällt dieser Fonds unter Artikel 8.",
|
||||||
|
"value": "Artikel 8",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Zielrendite",
|
"name": "Zielrendite",
|
||||||
"description": "Angestrebte jährliche Rendite in Prozent",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.NUMBER,
|
"type": KPISettingType.NUMBER,
|
||||||
"translation": "Target Return",
|
|
||||||
"example": "6.5",
|
|
||||||
"position": 7,
|
"position": 7,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die angestrebte Zielrendite liegt bei 6.5 %.",
|
||||||
|
"value": "6.5 %",
|
||||||
|
},
|
||||||
|
{"sentence": "Zielrendite des Fonds beträgt 6.5 %.", "value": "6.5 %"},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Rendite",
|
"name": "Rendite",
|
||||||
"description": "Tatsächlich erzielte Rendite in Prozent",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.NUMBER,
|
"type": KPISettingType.NUMBER,
|
||||||
"translation": "Return",
|
|
||||||
"example": "5.8",
|
|
||||||
"position": 8,
|
"position": 8,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die Rendite für das Jahr beträgt 5.8 %.",
|
||||||
|
"value": "5.8 %",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Im letzten Jahr wurde eine Rendite von 5.8 % erzielt.",
|
||||||
|
"value": "5.8 %",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Zielausschüttung",
|
"name": "Zielausschüttung",
|
||||||
"description": "Geplante Ausschüttung in Prozent",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.NUMBER,
|
"type": KPISettingType.NUMBER,
|
||||||
"translation": "Target Distribution",
|
|
||||||
"example": "4.0",
|
|
||||||
"position": 9,
|
"position": 9,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{"sentence": "Die Zielausschüttung beträgt 4.0 %.", "value": "4.0 %"},
|
||||||
|
{
|
||||||
|
"sentence": "Geplante Ausschüttung: 4.0 % pro Jahr.",
|
||||||
|
"value": "4.0 %",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Ausschüttung",
|
"name": "Ausschüttung",
|
||||||
"description": "Tatsächliche Ausschüttung in Prozent",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.NUMBER,
|
"type": KPISettingType.NUMBER,
|
||||||
"translation": "Distribution",
|
|
||||||
"example": "3.8",
|
|
||||||
"position": 10,
|
"position": 10,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die Ausschüttung im Jahr 2024 lag bei 3.8 %.",
|
||||||
|
"value": "3.8 %",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Es wurde eine Ausschüttung von 3.8 % vorgenommen.",
|
||||||
|
"value": "3.8 %",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Laufzeit",
|
"name": "Laufzeit",
|
||||||
"description": "Geplante Laufzeit des Fonds",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.STRING,
|
"type": KPISettingType.STRING,
|
||||||
"translation": "Duration",
|
|
||||||
"example": "7 Jahre, 10, Evergreen",
|
|
||||||
"position": 11,
|
"position": 11,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die Laufzeit des Fonds beträgt 7 Jahre.",
|
||||||
|
"value": "7 Jahre",
|
||||||
|
},
|
||||||
|
{"sentence": "Geplante Dauer: Evergreen-Modell.", "value": "Evergreen"},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "LTV",
|
"name": "LTV",
|
||||||
"description": "Loan-to-Value Verhältnis in Prozent",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.NUMBER,
|
"type": KPISettingType.NUMBER,
|
||||||
"translation": "LTV",
|
|
||||||
"example": "65.0",
|
|
||||||
"position": 12,
|
"position": 12,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{"sentence": "Der LTV beträgt 65.0 %.", "value": "65.0 %"},
|
||||||
|
{"sentence": "Loan-to-Value-Ratio: 65.0 %.", "value": "65.0 %"},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Managementgebühren",
|
"name": "Managementgebühren",
|
||||||
"description": "Jährliche Verwaltungsgebühren in Prozent",
|
|
||||||
"mandatory": True,
|
"mandatory": True,
|
||||||
"type": KPISettingType.NUMBER,
|
"type": KPISettingType.NUMBER,
|
||||||
"translation": "Management Fees",
|
|
||||||
"example": "1.5",
|
|
||||||
"position": 13,
|
"position": 13,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die Managementgebühren betragen jährlich 1.5 %.",
|
||||||
|
"value": "1.5 %",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Für die Verwaltung wird eine Gebühr von 1.5 % erhoben.",
|
||||||
|
"value": "1.5 %",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Sektorenallokation",
|
"name": "Sektorenallokation",
|
||||||
"description": "Verteilung der Investments nach Sektoren",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.ARRAY,
|
"type": KPISettingType.ARRAY,
|
||||||
"translation": "Sector Allocation",
|
|
||||||
"example": "Büro, Wohnen, Logistik, Studentenwohnen",
|
|
||||||
"position": 14,
|
"position": 14,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Die Sektorenallokation umfasst Büro, Wohnen und Logistik.",
|
||||||
|
"value": "Büro, Wohnen, Logistik",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Investiert wird in Büro, Logistik und Studentenwohnen.",
|
||||||
|
"value": "Büro, Logistik, Studentenwohnen",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Länderallokation",
|
"name": "Länderallokation",
|
||||||
"description": "Geografische Verteilung der Investments",
|
|
||||||
"mandatory": False,
|
"mandatory": False,
|
||||||
"type": KPISettingType.ARRAY,
|
"type": KPISettingType.ARRAY,
|
||||||
"translation": "Country Allocation",
|
|
||||||
"example": "Deutschland,Frankreich, Österreich, Schweiz",
|
|
||||||
"position": 15,
|
"position": 15,
|
||||||
"active": True,
|
"active": True,
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"sentence": "Investitionen erfolgen in Deutschland, Frankreich und Österreich.",
|
||||||
|
"value": "Deutschland, Frankreich, Österreich",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Die Länderallokation umfasst Deutschland, Schweiz und Frankreich.",
|
||||||
|
"value": "Deutschland, Schweiz, Frankreich",
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -165,13 +255,11 @@ def seed_default_kpi_settings():
|
||||||
for kpi_data in default_kpi_settings:
|
for kpi_data in default_kpi_settings:
|
||||||
kpi_setting = KPISettingModel(
|
kpi_setting = KPISettingModel(
|
||||||
name=kpi_data["name"],
|
name=kpi_data["name"],
|
||||||
description=kpi_data["description"],
|
|
||||||
mandatory=kpi_data["mandatory"],
|
mandatory=kpi_data["mandatory"],
|
||||||
type=kpi_data["type"],
|
type=kpi_data["type"],
|
||||||
translation=kpi_data["translation"],
|
|
||||||
example=kpi_data["example"],
|
|
||||||
position=kpi_data["position"],
|
position=kpi_data["position"],
|
||||||
active=kpi_data["active"],
|
active=kpi_data["active"],
|
||||||
|
examples=kpi_data.get("examples", []),
|
||||||
)
|
)
|
||||||
|
|
||||||
db.session.add(kpi_setting)
|
db.session.add(kpi_setting)
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,12 @@ import json
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
VALIDATE_SERVICE_URL = os.getenv("VALIDATE_SERVICE_URL", "http://localhost:5054/validate")
|
VALIDATE_SERVICE_URL = os.getenv(
|
||||||
|
"VALIDATE_SERVICE_URL", "http://localhost:5054/validate"
|
||||||
|
)
|
||||||
|
|
||||||
@app.route('/extract', methods=['POST'])
|
|
||||||
|
@app.route("/extract", methods=["POST"])
|
||||||
def extract_text_from_ocr_json():
|
def extract_text_from_ocr_json():
|
||||||
json_data = request.get_json()
|
json_data = request.get_json()
|
||||||
|
|
||||||
|
|
@ -16,19 +19,19 @@ def extract_text_from_ocr_json():
|
||||||
pages_data = json_data["extracted_text_per_page"]
|
pages_data = json_data["extracted_text_per_page"]
|
||||||
|
|
||||||
entities_json = extract_with_exxeta(pages_data, pitchbook_id)
|
entities_json = extract_with_exxeta(pages_data, pitchbook_id)
|
||||||
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json
|
entities = (
|
||||||
|
json.loads(entities_json) if isinstance(entities_json, str) else entities_json
|
||||||
|
)
|
||||||
|
|
||||||
validate_payload = {
|
validate_payload = {"id": pitchbook_id, "service": "exxeta", "entities": entities}
|
||||||
"id": pitchbook_id,
|
|
||||||
"service": "exxeta",
|
|
||||||
"entities": entities
|
|
||||||
}
|
|
||||||
|
|
||||||
print(f"[EXXETA] Sending to validate service: {VALIDATE_SERVICE_URL}")
|
print(f"[EXXETA] Sending to validate service: {VALIDATE_SERVICE_URL}")
|
||||||
print(f"[EXXETA] Payload: {validate_payload} entities for pitchbook {pitchbook_id}")
|
print(f"[EXXETA] Payload: {validate_payload} entities for pitchbook {pitchbook_id}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600)
|
response = requests.post(
|
||||||
|
VALIDATE_SERVICE_URL, json=validate_payload, timeout=600
|
||||||
|
)
|
||||||
print(f"[EXXETA] Validate service response: {response.status_code}")
|
print(f"[EXXETA] Validate service response: {response.status_code}")
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
print(f"[EXXETA] Validate service error: {response.text}")
|
print(f"[EXXETA] Validate service error: {response.text}")
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ TIMEOUT = 180
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_dynamic_labels():
|
def get_dynamic_labels():
|
||||||
url = f"{COORDINATOR_URL}/api/kpi_setting/"
|
url = f"{COORDINATOR_URL}/api/kpi_setting/"
|
||||||
try:
|
try:
|
||||||
|
|
@ -28,6 +29,7 @@ def get_dynamic_labels():
|
||||||
logger.warning(f"Konnte dynamische Labels nicht laden: {e}")
|
logger.warning(f"Konnte dynamische Labels nicht laden: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def extract_with_exxeta(pages_json, pitchbook_id):
|
def extract_with_exxeta(pages_json, pitchbook_id):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
|
@ -39,7 +41,10 @@ def extract_with_exxeta(pages_json, pitchbook_id):
|
||||||
for page_data in pages_json:
|
for page_data in pages_json:
|
||||||
i += 1
|
i += 1
|
||||||
if i % 8 == 0:
|
if i % 8 == 0:
|
||||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35 + 60/len(pages_json)*i})
|
requests.post(
|
||||||
|
COORDINATOR_URL + "/api/progress",
|
||||||
|
json={"id": pitchbook_id, "progress": 35 + 60 / len(pages_json) * i},
|
||||||
|
)
|
||||||
|
|
||||||
page_num = page_data.get("page")
|
page_num = page_data.get("page")
|
||||||
text = page_data.get("text", "")
|
text = page_data.get("text", "")
|
||||||
|
|
@ -100,23 +105,28 @@ def extract_with_exxeta(pages_json, pitchbook_id):
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {EXXETA_API_KEY}"
|
"Authorization": f"Bearer {EXXETA_API_KEY}",
|
||||||
}
|
}
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": MODEL,
|
"model": MODEL,
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array."},
|
{
|
||||||
{"role": "user", "content": prompt}
|
"role": "system",
|
||||||
|
"content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
],
|
],
|
||||||
"temperature": 0.0
|
"temperature": 0.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
url = f"{EXXETA_BASE_URL}/deployments/{MODEL}/chat/completions"
|
url = f"{EXXETA_BASE_URL}/deployments/{MODEL}/chat/completions"
|
||||||
|
|
||||||
for attempt in range(1, MAX_RETRIES + 1):
|
for attempt in range(1, MAX_RETRIES + 1):
|
||||||
try:
|
try:
|
||||||
response = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT)
|
response = requests.post(
|
||||||
|
url, headers=headers, json=payload, timeout=TIMEOUT
|
||||||
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
content = response.json()["choices"][0]["message"]["content"].strip()
|
content = response.json()["choices"][0]["message"]["content"].strip()
|
||||||
if content.startswith("```json"):
|
if content.startswith("```json"):
|
||||||
|
|
@ -140,9 +150,12 @@ def extract_with_exxeta(pages_json, pitchbook_id):
|
||||||
if attempt == MAX_RETRIES:
|
if attempt == MAX_RETRIES:
|
||||||
results.extend([])
|
results.extend([])
|
||||||
|
|
||||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95})
|
requests.post(
|
||||||
|
COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95}
|
||||||
|
)
|
||||||
return json.dumps(results, indent=2, ensure_ascii=False)
|
return json.dumps(results, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("📡 Test-Aufruf get_dynamic_labels:")
|
print("📡 Test-Aufruf get_dynamic_labels:")
|
||||||
print(get_dynamic_labels())
|
print(get_dynamic_labels())
|
||||||
|
|
@ -29,19 +29,17 @@ def convert_pdf_async(temp_path, pitchbook_id):
|
||||||
temp_path.unlink() # cleanup
|
temp_path.unlink() # cleanup
|
||||||
return {"error": "OCR processing failed - all PDFs must be OCR'd"}, 500
|
return {"error": "OCR processing failed - all PDFs must be OCR'd"}, 500
|
||||||
|
|
||||||
with open(ocr_path, 'rb') as ocr_file:
|
with open(ocr_path, "rb") as ocr_file:
|
||||||
ocr_file.seek(0)
|
ocr_file.seek(0)
|
||||||
result = pdf_to_json(ocr_file)
|
result = pdf_to_json(ocr_file)
|
||||||
|
|
||||||
|
payload = {"id": int(pitchbook_id), "extracted_text_per_page": result["pages"]}
|
||||||
payload = {
|
|
||||||
"id": int(pitchbook_id),
|
|
||||||
"extracted_text_per_page": result["pages"]
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info("Sending payload to EXXETA and SPACY services")
|
logger.info("Sending payload to EXXETA and SPACY services")
|
||||||
|
|
||||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35})
|
requests.post(
|
||||||
|
COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35}
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600)
|
exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600)
|
||||||
logger.info(f"EXXETA response: {exxeta_response.status_code}")
|
logger.info(f"EXXETA response: {exxeta_response.status_code}")
|
||||||
|
|
@ -54,14 +52,16 @@ def convert_pdf_async(temp_path, pitchbook_id):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error calling SPACY: {e}")
|
logger.error(f"Error calling SPACY: {e}")
|
||||||
|
|
||||||
files=[
|
files = [("file", ("", open(ocr_path, "rb"), "application/pdf"))]
|
||||||
('file',('',open(ocr_path,'rb'),'application/pdf'))
|
|
||||||
]
|
|
||||||
headers = {}
|
headers = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
requests.put(
|
||||||
requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers)
|
f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}",
|
||||||
|
files=files,
|
||||||
|
timeout=600,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
logger.info("COORDINATOR response: Progress + File updated")
|
logger.info("COORDINATOR response: Progress + File updated")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error calling COORDINATOR: {e}")
|
logger.error(f"Error calling COORDINATOR: {e}")
|
||||||
|
|
@ -72,7 +72,7 @@ def convert_pdf_async(temp_path, pitchbook_id):
|
||||||
logger.error(f"Exception in OCR processing: {str(e)}", exc_info=True)
|
logger.error(f"Exception in OCR processing: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/ocr', methods=['POST'])
|
@app.route("/ocr", methods=["POST"])
|
||||||
def convert_extract_text_from_pdf():
|
def convert_extract_text_from_pdf():
|
||||||
if "file" not in request.files:
|
if "file" not in request.files:
|
||||||
return {"error": "No file"}, 400
|
return {"error": "No file"}, 400
|
||||||
|
|
@ -85,7 +85,7 @@ def convert_extract_text_from_pdf():
|
||||||
if not pitchbook_id:
|
if not pitchbook_id:
|
||||||
return {"error": "No ID"}, 400
|
return {"error": "No ID"}, 400
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
||||||
file.seek(0)
|
file.seek(0)
|
||||||
temp_file.write(file.read())
|
temp_file.write(file.read())
|
||||||
temp_path = Path(temp_file.name)
|
temp_path = Path(temp_file.name)
|
||||||
|
|
@ -93,10 +93,7 @@ def convert_extract_text_from_pdf():
|
||||||
thread = threading.Thread(target=convert_pdf_async, args=(temp_path, pitchbook_id))
|
thread = threading.Thread(target=convert_pdf_async, args=(temp_path, pitchbook_id))
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
return {
|
return {"status": "sent", "message": "PDF successfully OCR'd and processed"}, 200
|
||||||
"status": "sent",
|
|
||||||
"message": "PDF successfully OCR'd and processed"
|
|
||||||
}, 200
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,10 @@ log_folder = TEMP_DIR / "logs"
|
||||||
output_folder.mkdir(exist_ok=True)
|
output_folder.mkdir(exist_ok=True)
|
||||||
log_folder.mkdir(exist_ok=True)
|
log_folder.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def pdf_to_json(pdf_input):
|
def pdf_to_json(pdf_input):
|
||||||
try:
|
try:
|
||||||
if hasattr(pdf_input, 'read'):
|
if hasattr(pdf_input, "read"):
|
||||||
pdf_input.seek(0)
|
pdf_input.seek(0)
|
||||||
|
|
||||||
with pdfplumber.open(pdf_input) as pdf:
|
with pdfplumber.open(pdf_input) as pdf:
|
||||||
|
|
@ -83,7 +84,9 @@ def ocr_pdf(input_file_path: Path):
|
||||||
|
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
if output_file.exists():
|
if output_file.exists():
|
||||||
logger.info(f"OCR successful, output file size: {output_file.stat().st_size} bytes")
|
logger.info(
|
||||||
|
f"OCR successful, output file size: {output_file.stat().st_size} bytes"
|
||||||
|
)
|
||||||
return output_file
|
return output_file
|
||||||
else:
|
else:
|
||||||
logger.error(f"OCR completed but output file not found: {output_file}")
|
logger.error(f"OCR completed but output file not found: {output_file}")
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,9 @@ def send_to_coordinator_service(processed_data, request_id):
|
||||||
|
|
||||||
def process_data_async(request_id, spacy_data, exxeta_data):
|
def process_data_async(request_id, spacy_data, exxeta_data):
|
||||||
try:
|
try:
|
||||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95})
|
requests.post(
|
||||||
|
COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95}
|
||||||
|
)
|
||||||
print(f"Start asynchronous processing for PitchBook: {request_id}")
|
print(f"Start asynchronous processing for PitchBook: {request_id}")
|
||||||
|
|
||||||
# Perform merge
|
# Perform merge
|
||||||
|
|
@ -96,7 +98,6 @@ def validate():
|
||||||
|
|
||||||
# If both datasets are present, start asynchronous processing
|
# If both datasets are present, start asynchronous processing
|
||||||
if spacy_data is not None and exxeta_data is not None:
|
if spacy_data is not None and exxeta_data is not None:
|
||||||
|
|
||||||
# Start asynchronous processing in a separate thread
|
# Start asynchronous processing in a separate thread
|
||||||
processing_thread = threading.Thread(
|
processing_thread = threading.Thread(
|
||||||
target=process_data_async,
|
target=process_data_async,
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,6 @@ def merge_entities(spacy_data, exxeta_data):
|
||||||
and s_entity_norm == e_entity_norm
|
and s_entity_norm == e_entity_norm
|
||||||
and s_page == e_page
|
and s_page == e_page
|
||||||
):
|
):
|
||||||
|
|
||||||
merged.append(
|
merged.append(
|
||||||
{
|
{
|
||||||
"label": s["label"],
|
"label": s["label"],
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import os
|
||||||
|
|
||||||
# SETTINGS = [{"id": "Rendite", "type": "number"}]
|
# SETTINGS = [{"id": "Rendite", "type": "number"}]
|
||||||
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
|
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
|
||||||
|
|
||||||
|
|
||||||
def validate_entities(entities):
|
def validate_entities(entities):
|
||||||
try:
|
try:
|
||||||
response = requests.get(COORDINATOR_URL + "/api/kpi_setting/")
|
response = requests.get(COORDINATOR_URL + "/api/kpi_setting/")
|
||||||
|
|
@ -42,7 +44,6 @@ def validate_entities(entities):
|
||||||
result.extend(item[1])
|
result.extend(item[1])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
# Filter not validated, if there are valid values
|
# Filter not validated, if there are valid values
|
||||||
validated = False
|
validated = False
|
||||||
for entity in item[1]:
|
for entity in item[1]:
|
||||||
|
|
@ -61,11 +62,11 @@ def validate_entities(entities):
|
||||||
def validate_number(entity_list, settings):
|
def validate_number(entity_list, settings):
|
||||||
filtered_kpi = {}
|
filtered_kpi = {}
|
||||||
for label, entity_list in entity_list.items():
|
for label, entity_list in entity_list.items():
|
||||||
|
|
||||||
setting = next((s for s in settings if s["name"].upper() == label), None)
|
setting = next((s for s in settings if s["name"].upper() == label), None)
|
||||||
if setting and setting["type"] == "number":
|
if setting and setting["type"] == "number":
|
||||||
filtered_entities = [
|
filtered_entities = [
|
||||||
entity for entity in entity_list
|
entity
|
||||||
|
for entity in entity_list
|
||||||
if is_valid_number(str(entity["entity"]))
|
if is_valid_number(str(entity["entity"]))
|
||||||
]
|
]
|
||||||
for entity in entity_list:
|
for entity in entity_list:
|
||||||
|
|
@ -80,8 +81,12 @@ def validate_number(entity_list, settings):
|
||||||
|
|
||||||
|
|
||||||
def is_valid_number(number):
|
def is_valid_number(number):
|
||||||
pattern = r'^[0-9\-\s%,.€]+$'
|
pattern = r"^[0-9\-\s%,.€]+$"
|
||||||
return any(char.isdigit() for char in number) and not re.search(r'\d+\s\d+', number) and re.fullmatch(pattern, number)
|
return (
|
||||||
|
any(char.isdigit() for char in number)
|
||||||
|
and not re.search(r"\d+\s\d+", number)
|
||||||
|
and re.fullmatch(pattern, number)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def delete_exxeta_unknown(entity_list):
|
def delete_exxeta_unknown(entity_list):
|
||||||
|
|
@ -89,11 +94,16 @@ def delete_exxeta_unknown(entity_list):
|
||||||
for label, entity_list in entity_list.items():
|
for label, entity_list in entity_list.items():
|
||||||
# Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped)
|
# Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped)
|
||||||
filtered_entities = [
|
filtered_entities = [
|
||||||
entity for entity in entity_list
|
entity
|
||||||
if str(entity["entity"]).lower().replace(" ", "") not in {"nichtangegeben", "n/a"}
|
for entity in entity_list
|
||||||
|
if str(entity["entity"]).lower().replace(" ", "")
|
||||||
|
not in {"nichtangegeben", "n/a"}
|
||||||
]
|
]
|
||||||
for entity in entity_list:
|
for entity in entity_list:
|
||||||
if str(entity["entity"]).lower().replace(" ", "") in {"nichtangegeben", "n/a"}:
|
if str(entity["entity"]).lower().replace(" ", "") in {
|
||||||
|
"nichtangegeben",
|
||||||
|
"n/a",
|
||||||
|
}:
|
||||||
print(f"filtered out: {entity}")
|
print(f"filtered out: {entity}")
|
||||||
if filtered_entities: # Only add the label if there are entities left
|
if filtered_entities: # Only add the label if there are entities left
|
||||||
filtered_kpi[label] = filtered_entities
|
filtered_kpi[label] = filtered_entities
|
||||||
|
|
@ -115,6 +125,7 @@ def delete_duplicate_entities(entity_list):
|
||||||
unique_entities[label] = filtered_entities
|
unique_entities[label] = filtered_entities
|
||||||
return unique_entities
|
return unique_entities
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
entities = [
|
entities = [
|
||||||
# {"label": "PERSON", "entity": "John Doe", "status": "validated"},
|
# {"label": "PERSON", "entity": "John Doe", "status": "validated"},
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,7 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false,
|
||||||
example: formData.example || '',
|
example: formData.example || '',
|
||||||
position: formData.position ?? 0,
|
position: formData.position ?? 0,
|
||||||
active: formData.active ?? true,
|
active: formData.active ?? true,
|
||||||
examples: [{ sentence: '', value: '' }]
|
examples: formData.examples ?? []
|
||||||
});
|
});
|
||||||
// Formular zurücksetzen:
|
// Formular zurücksetzen:
|
||||||
setFormData(emptyKPI);
|
setFormData(emptyKPI);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue