KPI_data angepasst
parent
12783539b3
commit
360da3acb0
|
|
@ -29,13 +29,11 @@ def create_kpi_setting():
|
|||
|
||||
required_fields = [
|
||||
"name",
|
||||
"description",
|
||||
"mandatory",
|
||||
"type",
|
||||
"translation",
|
||||
"example",
|
||||
"position",
|
||||
"active",
|
||||
"examples",
|
||||
]
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
|
|
@ -55,13 +53,11 @@ def create_kpi_setting():
|
|||
|
||||
new_kpi_setting = KPISettingModel(
|
||||
name=data["name"],
|
||||
description=data["description"],
|
||||
mandatory=data["mandatory"],
|
||||
type=kpi_type,
|
||||
translation=data["translation"],
|
||||
example=data["example"],
|
||||
position=data["position"],
|
||||
active=data["active"],
|
||||
examples=data.get("examples", []),
|
||||
)
|
||||
|
||||
db.session.add(new_kpi_setting)
|
||||
|
|
@ -84,9 +80,6 @@ def update_kpi_setting(id):
|
|||
return jsonify({"error": "KPI Setting with this name already exists"}), 409
|
||||
kpi_setting.name = data["name"]
|
||||
|
||||
if "description" in data:
|
||||
kpi_setting.description = data["description"]
|
||||
|
||||
if "mandatory" in data:
|
||||
kpi_setting.mandatory = data["mandatory"]
|
||||
|
||||
|
|
@ -100,18 +93,15 @@ def update_kpi_setting(id):
|
|||
400,
|
||||
)
|
||||
|
||||
if "translation" in data:
|
||||
kpi_setting.translation = data["translation"]
|
||||
|
||||
if "example" in data:
|
||||
kpi_setting.example = data["example"]
|
||||
|
||||
if "position" in data:
|
||||
kpi_setting.position = data["position"]
|
||||
|
||||
if "active" in data:
|
||||
kpi_setting.active = data["active"]
|
||||
|
||||
if "examples" in data:
|
||||
kpi_setting.examples = data["examples"]
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(kpi_setting.to_dict()), 200
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ from model.database import db
|
|||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy import Enum as SQLAlchemyEnum
|
||||
from enum import Enum
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class KPISettingType(Enum):
|
||||
|
|
@ -18,37 +20,31 @@ class KPISettingModel(db.Model):
|
|||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
name: Mapped[str] = mapped_column(unique=True)
|
||||
description: Mapped[str]
|
||||
mandatory: Mapped[bool]
|
||||
type: Mapped[KPISettingType] = mapped_column(
|
||||
SQLAlchemyEnum(KPISettingType, native_enum=True)
|
||||
)
|
||||
translation: Mapped[str]
|
||||
example: Mapped[str]
|
||||
position: Mapped[int]
|
||||
active: Mapped[bool]
|
||||
examples: Mapped[list] = mapped_column(JSONB, default=[])
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"mandatory": self.mandatory,
|
||||
"type": self.type.value,
|
||||
"translation": self.translation,
|
||||
"example": self.example,
|
||||
"position": self.position,
|
||||
"active": self.active,
|
||||
}
|
||||
return OrderedDict(
|
||||
[
|
||||
("id", self.id),
|
||||
("name", self.name),
|
||||
("mandatory", self.mandatory),
|
||||
("type", self.type.value),
|
||||
("position", self.position),
|
||||
("examples", self.examples),
|
||||
("active", self.active),
|
||||
]
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self, name, description, mandatory, type, translation, example, position, active
|
||||
):
|
||||
def __init__(self, name, mandatory, type, position, active, examples=None):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.mandatory = mandatory
|
||||
self.type = type
|
||||
self.translation = translation
|
||||
self.example = example
|
||||
self.position = position
|
||||
self.active = active
|
||||
self.examples = examples or []
|
||||
|
|
|
|||
|
|
@ -10,153 +10,243 @@ def seed_default_kpi_settings():
|
|||
default_kpi_settings = [
|
||||
{
|
||||
"name": "Fondsname",
|
||||
"description": "Der vollständige Name des Investmentfonds",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.STRING,
|
||||
"translation": "Fund Name",
|
||||
"example": "Alpha Real Estate Fund I",
|
||||
"position": 1,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Der Fonds trägt den Namen Alpha Real Estate Fund I.",
|
||||
"value": "Alpha Real Estate Fund I",
|
||||
},
|
||||
{
|
||||
"sentence": "Im Pitchbook wird der Fondsname als Alpha Real Estate Fund I angegeben.",
|
||||
"value": "Alpha Real Estate Fund I",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Fondsmanager",
|
||||
"description": "Verantwortlicher Manager für die Fondsverwaltung",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.STRING,
|
||||
"translation": "Fund Manager",
|
||||
"example": "Max Mustermann",
|
||||
"position": 2,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Fondsmanager des Projekts ist Max Mustermann.",
|
||||
"value": "Max Mustermann",
|
||||
},
|
||||
{
|
||||
"sentence": "Die Verwaltung liegt bei Max Mustermann.",
|
||||
"value": "Max Mustermann",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "AIFM",
|
||||
"description": "Alternative Investment Fund Manager",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.STRING,
|
||||
"translation": "AIFM",
|
||||
"example": "Alpha Investment Management GmbH",
|
||||
"position": 3,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "AIFM ist die Alpha Investment Management GmbH.",
|
||||
"value": "Alpha Investment Management GmbH",
|
||||
},
|
||||
{
|
||||
"sentence": "Die Alpha Investment Management GmbH fungiert als AIFM.",
|
||||
"value": "Alpha Investment Management GmbH",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Datum",
|
||||
"description": "Stichtag der Datenerfassung",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.DATE,
|
||||
"translation": "Date",
|
||||
"example": "05.05.2025",
|
||||
"position": 4,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die Daten basieren auf dem Stand vom 05.05.2025.",
|
||||
"value": "05.05.2025",
|
||||
},
|
||||
{
|
||||
"sentence": "Stichtag der Angaben ist der 05.05.2025.",
|
||||
"value": "05.05.2025",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Risikoprofil",
|
||||
"description": "Klassifizierung des Risikos des Fonds",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.STRING,
|
||||
"translation": "Risk Profile",
|
||||
"example": "Core/Core++",
|
||||
"position": 5,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Der Fonds hat das Risikoprofil Core/Core++.",
|
||||
"value": "Core/Core++",
|
||||
},
|
||||
{
|
||||
"sentence": "Einstufung des Fondsrisikos: Core/Core++.",
|
||||
"value": "Core/Core++",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Artikel",
|
||||
"description": "Artikel 8 SFDR-Klassifizierung",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.BOOLEAN,
|
||||
"translation": "Article",
|
||||
"example": "Artikel 8",
|
||||
"position": 6,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Der Fonds erfüllt die Anforderungen von Artikel 8.",
|
||||
"value": "Artikel 8",
|
||||
},
|
||||
{
|
||||
"sentence": "Gemäß SFDR fällt dieser Fonds unter Artikel 8.",
|
||||
"value": "Artikel 8",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Zielrendite",
|
||||
"description": "Angestrebte jährliche Rendite in Prozent",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.NUMBER,
|
||||
"translation": "Target Return",
|
||||
"example": "6.5",
|
||||
"position": 7,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die angestrebte Zielrendite liegt bei 6.5 %.",
|
||||
"value": "6.5 %",
|
||||
},
|
||||
{"sentence": "Zielrendite des Fonds beträgt 6.5 %.", "value": "6.5 %"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Rendite",
|
||||
"description": "Tatsächlich erzielte Rendite in Prozent",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.NUMBER,
|
||||
"translation": "Return",
|
||||
"example": "5.8",
|
||||
"position": 8,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die Rendite für das Jahr beträgt 5.8 %.",
|
||||
"value": "5.8 %",
|
||||
},
|
||||
{
|
||||
"sentence": "Im letzten Jahr wurde eine Rendite von 5.8 % erzielt.",
|
||||
"value": "5.8 %",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Zielausschüttung",
|
||||
"description": "Geplante Ausschüttung in Prozent",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.NUMBER,
|
||||
"translation": "Target Distribution",
|
||||
"example": "4.0",
|
||||
"position": 9,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{"sentence": "Die Zielausschüttung beträgt 4.0 %.", "value": "4.0 %"},
|
||||
{
|
||||
"sentence": "Geplante Ausschüttung: 4.0 % pro Jahr.",
|
||||
"value": "4.0 %",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Ausschüttung",
|
||||
"description": "Tatsächliche Ausschüttung in Prozent",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.NUMBER,
|
||||
"translation": "Distribution",
|
||||
"example": "3.8",
|
||||
"position": 10,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die Ausschüttung im Jahr 2024 lag bei 3.8 %.",
|
||||
"value": "3.8 %",
|
||||
},
|
||||
{
|
||||
"sentence": "Es wurde eine Ausschüttung von 3.8 % vorgenommen.",
|
||||
"value": "3.8 %",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Laufzeit",
|
||||
"description": "Geplante Laufzeit des Fonds",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.STRING,
|
||||
"translation": "Duration",
|
||||
"example": "7 Jahre, 10, Evergreen",
|
||||
"position": 11,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die Laufzeit des Fonds beträgt 7 Jahre.",
|
||||
"value": "7 Jahre",
|
||||
},
|
||||
{"sentence": "Geplante Dauer: Evergreen-Modell.", "value": "Evergreen"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "LTV",
|
||||
"description": "Loan-to-Value Verhältnis in Prozent",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.NUMBER,
|
||||
"translation": "LTV",
|
||||
"example": "65.0",
|
||||
"position": 12,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{"sentence": "Der LTV beträgt 65.0 %.", "value": "65.0 %"},
|
||||
{"sentence": "Loan-to-Value-Ratio: 65.0 %.", "value": "65.0 %"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Managementgebühren",
|
||||
"description": "Jährliche Verwaltungsgebühren in Prozent",
|
||||
"mandatory": True,
|
||||
"type": KPISettingType.NUMBER,
|
||||
"translation": "Management Fees",
|
||||
"example": "1.5",
|
||||
"position": 13,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die Managementgebühren betragen jährlich 1.5 %.",
|
||||
"value": "1.5 %",
|
||||
},
|
||||
{
|
||||
"sentence": "Für die Verwaltung wird eine Gebühr von 1.5 % erhoben.",
|
||||
"value": "1.5 %",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Sektorenallokation",
|
||||
"description": "Verteilung der Investments nach Sektoren",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.ARRAY,
|
||||
"translation": "Sector Allocation",
|
||||
"example": "Büro, Wohnen, Logistik, Studentenwohnen",
|
||||
"position": 14,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Die Sektorenallokation umfasst Büro, Wohnen und Logistik.",
|
||||
"value": "Büro, Wohnen, Logistik",
|
||||
},
|
||||
{
|
||||
"sentence": "Investiert wird in Büro, Logistik und Studentenwohnen.",
|
||||
"value": "Büro, Logistik, Studentenwohnen",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Länderallokation",
|
||||
"description": "Geografische Verteilung der Investments",
|
||||
"mandatory": False,
|
||||
"type": KPISettingType.ARRAY,
|
||||
"translation": "Country Allocation",
|
||||
"example": "Deutschland,Frankreich, Österreich, Schweiz",
|
||||
"position": 15,
|
||||
"active": True,
|
||||
"examples": [
|
||||
{
|
||||
"sentence": "Investitionen erfolgen in Deutschland, Frankreich und Österreich.",
|
||||
"value": "Deutschland, Frankreich, Österreich",
|
||||
},
|
||||
{
|
||||
"sentence": "Die Länderallokation umfasst Deutschland, Schweiz und Frankreich.",
|
||||
"value": "Deutschland, Schweiz, Frankreich",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
|
@ -165,13 +255,11 @@ def seed_default_kpi_settings():
|
|||
for kpi_data in default_kpi_settings:
|
||||
kpi_setting = KPISettingModel(
|
||||
name=kpi_data["name"],
|
||||
description=kpi_data["description"],
|
||||
mandatory=kpi_data["mandatory"],
|
||||
type=kpi_data["type"],
|
||||
translation=kpi_data["translation"],
|
||||
example=kpi_data["example"],
|
||||
position=kpi_data["position"],
|
||||
active=kpi_data["active"],
|
||||
examples=kpi_data.get("examples", []),
|
||||
)
|
||||
|
||||
db.session.add(kpi_setting)
|
||||
|
|
|
|||
|
|
@ -6,9 +6,12 @@ import json
|
|||
|
||||
app = Flask(__name__)
|
||||
|
||||
VALIDATE_SERVICE_URL = os.getenv("VALIDATE_SERVICE_URL", "http://localhost:5054/validate")
|
||||
VALIDATE_SERVICE_URL = os.getenv(
|
||||
"VALIDATE_SERVICE_URL", "http://localhost:5054/validate"
|
||||
)
|
||||
|
||||
@app.route('/extract', methods=['POST'])
|
||||
|
||||
@app.route("/extract", methods=["POST"])
|
||||
def extract_text_from_ocr_json():
|
||||
json_data = request.get_json()
|
||||
|
||||
|
|
@ -16,19 +19,19 @@ def extract_text_from_ocr_json():
|
|||
pages_data = json_data["extracted_text_per_page"]
|
||||
|
||||
entities_json = extract_with_exxeta(pages_data, pitchbook_id)
|
||||
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json
|
||||
entities = (
|
||||
json.loads(entities_json) if isinstance(entities_json, str) else entities_json
|
||||
)
|
||||
|
||||
validate_payload = {
|
||||
"id": pitchbook_id,
|
||||
"service": "exxeta",
|
||||
"entities": entities
|
||||
}
|
||||
validate_payload = {"id": pitchbook_id, "service": "exxeta", "entities": entities}
|
||||
|
||||
print(f"[EXXETA] Sending to validate service: {VALIDATE_SERVICE_URL}")
|
||||
print(f"[EXXETA] Payload: {validate_payload} entities for pitchbook {pitchbook_id}")
|
||||
|
||||
try:
|
||||
response = requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600)
|
||||
response = requests.post(
|
||||
VALIDATE_SERVICE_URL, json=validate_payload, timeout=600
|
||||
)
|
||||
print(f"[EXXETA] Validate service response: {response.status_code}")
|
||||
if response.status_code != 200:
|
||||
print(f"[EXXETA] Validate service error: {response.text}")
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ TIMEOUT = 180
|
|||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_dynamic_labels():
|
||||
url = f"{COORDINATOR_URL}/api/kpi_setting/"
|
||||
try:
|
||||
|
|
@ -28,6 +29,7 @@ def get_dynamic_labels():
|
|||
logger.warning(f"Konnte dynamische Labels nicht laden: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def extract_with_exxeta(pages_json, pitchbook_id):
|
||||
results = []
|
||||
|
||||
|
|
@ -39,7 +41,10 @@ def extract_with_exxeta(pages_json, pitchbook_id):
|
|||
for page_data in pages_json:
|
||||
i += 1
|
||||
if i % 8 == 0:
|
||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35 + 60/len(pages_json)*i})
|
||||
requests.post(
|
||||
COORDINATOR_URL + "/api/progress",
|
||||
json={"id": pitchbook_id, "progress": 35 + 60 / len(pages_json) * i},
|
||||
)
|
||||
|
||||
page_num = page_data.get("page")
|
||||
text = page_data.get("text", "")
|
||||
|
|
@ -100,23 +105,28 @@ def extract_with_exxeta(pages_json, pitchbook_id):
|
|||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {EXXETA_API_KEY}"
|
||||
"Authorization": f"Bearer {EXXETA_API_KEY}",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array."},
|
||||
{"role": "user", "content": prompt}
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array.",
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
"temperature": 0.0
|
||||
"temperature": 0.0,
|
||||
}
|
||||
|
||||
url = f"{EXXETA_BASE_URL}/deployments/{MODEL}/chat/completions"
|
||||
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT)
|
||||
response = requests.post(
|
||||
url, headers=headers, json=payload, timeout=TIMEOUT
|
||||
)
|
||||
response.raise_for_status()
|
||||
content = response.json()["choices"][0]["message"]["content"].strip()
|
||||
if content.startswith("```json"):
|
||||
|
|
@ -140,9 +150,12 @@ def extract_with_exxeta(pages_json, pitchbook_id):
|
|||
if attempt == MAX_RETRIES:
|
||||
results.extend([])
|
||||
|
||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95})
|
||||
requests.post(
|
||||
COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95}
|
||||
)
|
||||
return json.dumps(results, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("📡 Test-Aufruf get_dynamic_labels:")
|
||||
print(get_dynamic_labels())
|
||||
print(get_dynamic_labels())
|
||||
|
|
|
|||
|
|
@ -29,19 +29,17 @@ def convert_pdf_async(temp_path, pitchbook_id):
|
|||
temp_path.unlink() # cleanup
|
||||
return {"error": "OCR processing failed - all PDFs must be OCR'd"}, 500
|
||||
|
||||
with open(ocr_path, 'rb') as ocr_file:
|
||||
with open(ocr_path, "rb") as ocr_file:
|
||||
ocr_file.seek(0)
|
||||
result = pdf_to_json(ocr_file)
|
||||
|
||||
|
||||
payload = {
|
||||
"id": int(pitchbook_id),
|
||||
"extracted_text_per_page": result["pages"]
|
||||
}
|
||||
payload = {"id": int(pitchbook_id), "extracted_text_per_page": result["pages"]}
|
||||
|
||||
logger.info("Sending payload to EXXETA and SPACY services")
|
||||
|
||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35})
|
||||
requests.post(
|
||||
COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35}
|
||||
)
|
||||
try:
|
||||
exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600)
|
||||
logger.info(f"EXXETA response: {exxeta_response.status_code}")
|
||||
|
|
@ -54,14 +52,16 @@ def convert_pdf_async(temp_path, pitchbook_id):
|
|||
except Exception as e:
|
||||
logger.error(f"Error calling SPACY: {e}")
|
||||
|
||||
files=[
|
||||
('file',('',open(ocr_path,'rb'),'application/pdf'))
|
||||
]
|
||||
files = [("file", ("", open(ocr_path, "rb"), "application/pdf"))]
|
||||
headers = {}
|
||||
|
||||
try:
|
||||
|
||||
requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers)
|
||||
requests.put(
|
||||
f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}",
|
||||
files=files,
|
||||
timeout=600,
|
||||
headers=headers,
|
||||
)
|
||||
logger.info("COORDINATOR response: Progress + File updated")
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling COORDINATOR: {e}")
|
||||
|
|
@ -72,7 +72,7 @@ def convert_pdf_async(temp_path, pitchbook_id):
|
|||
logger.error(f"Exception in OCR processing: {str(e)}", exc_info=True)
|
||||
|
||||
|
||||
@app.route('/ocr', methods=['POST'])
|
||||
@app.route("/ocr", methods=["POST"])
|
||||
def convert_extract_text_from_pdf():
|
||||
if "file" not in request.files:
|
||||
return {"error": "No file"}, 400
|
||||
|
|
@ -85,7 +85,7 @@ def convert_extract_text_from_pdf():
|
|||
if not pitchbook_id:
|
||||
return {"error": "No ID"}, 400
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
||||
file.seek(0)
|
||||
temp_file.write(file.read())
|
||||
temp_path = Path(temp_file.name)
|
||||
|
|
@ -93,10 +93,7 @@ def convert_extract_text_from_pdf():
|
|||
thread = threading.Thread(target=convert_pdf_async, args=(temp_path, pitchbook_id))
|
||||
thread.start()
|
||||
|
||||
return {
|
||||
"status": "sent",
|
||||
"message": "PDF successfully OCR'd and processed"
|
||||
}, 200
|
||||
return {"status": "sent", "message": "PDF successfully OCR'd and processed"}, 200
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -17,9 +17,10 @@ log_folder = TEMP_DIR / "logs"
|
|||
output_folder.mkdir(exist_ok=True)
|
||||
log_folder.mkdir(exist_ok=True)
|
||||
|
||||
|
||||
def pdf_to_json(pdf_input):
|
||||
try:
|
||||
if hasattr(pdf_input, 'read'):
|
||||
if hasattr(pdf_input, "read"):
|
||||
pdf_input.seek(0)
|
||||
|
||||
with pdfplumber.open(pdf_input) as pdf:
|
||||
|
|
@ -83,7 +84,9 @@ def ocr_pdf(input_file_path: Path):
|
|||
|
||||
if result.returncode == 0:
|
||||
if output_file.exists():
|
||||
logger.info(f"OCR successful, output file size: {output_file.stat().st_size} bytes")
|
||||
logger.info(
|
||||
f"OCR successful, output file size: {output_file.stat().st_size} bytes"
|
||||
)
|
||||
return output_file
|
||||
else:
|
||||
logger.error(f"OCR completed but output file not found: {output_file}")
|
||||
|
|
@ -119,4 +122,4 @@ def extract_text_to_json(pdf_path: Path):
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract text to JSON: {e}")
|
||||
return None
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -40,7 +40,9 @@ def send_to_coordinator_service(processed_data, request_id):
|
|||
|
||||
def process_data_async(request_id, spacy_data, exxeta_data):
|
||||
try:
|
||||
requests.post(COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95})
|
||||
requests.post(
|
||||
COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95}
|
||||
)
|
||||
print(f"Start asynchronous processing for PitchBook: {request_id}")
|
||||
|
||||
# Perform merge
|
||||
|
|
@ -96,7 +98,6 @@ def validate():
|
|||
|
||||
# If both datasets are present, start asynchronous processing
|
||||
if spacy_data is not None and exxeta_data is not None:
|
||||
|
||||
# Start asynchronous processing in a separate thread
|
||||
processing_thread = threading.Thread(
|
||||
target=process_data_async,
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ def merge_entities(spacy_data, exxeta_data):
|
|||
and s_entity_norm == e_entity_norm
|
||||
and s_page == e_page
|
||||
):
|
||||
|
||||
merged.append(
|
||||
{
|
||||
"label": s["label"],
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import os
|
|||
|
||||
# SETTINGS = [{"id": "Rendite", "type": "number"}]
|
||||
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
|
||||
|
||||
|
||||
def validate_entities(entities):
|
||||
try:
|
||||
response = requests.get(COORDINATOR_URL + "/api/kpi_setting/")
|
||||
|
|
@ -42,7 +44,6 @@ def validate_entities(entities):
|
|||
result.extend(item[1])
|
||||
continue
|
||||
|
||||
|
||||
# Filter not validated, if there are valid values
|
||||
validated = False
|
||||
for entity in item[1]:
|
||||
|
|
@ -61,11 +62,11 @@ def validate_entities(entities):
|
|||
def validate_number(entity_list, settings):
|
||||
filtered_kpi = {}
|
||||
for label, entity_list in entity_list.items():
|
||||
|
||||
setting = next((s for s in settings if s["name"].upper() == label), None)
|
||||
if setting and setting["type"] == "number":
|
||||
filtered_entities = [
|
||||
entity for entity in entity_list
|
||||
entity
|
||||
for entity in entity_list
|
||||
if is_valid_number(str(entity["entity"]))
|
||||
]
|
||||
for entity in entity_list:
|
||||
|
|
@ -80,8 +81,12 @@ def validate_number(entity_list, settings):
|
|||
|
||||
|
||||
def is_valid_number(number):
|
||||
pattern = r'^[0-9\-\s%,.€]+$'
|
||||
return any(char.isdigit() for char in number) and not re.search(r'\d+\s\d+', number) and re.fullmatch(pattern, number)
|
||||
pattern = r"^[0-9\-\s%,.€]+$"
|
||||
return (
|
||||
any(char.isdigit() for char in number)
|
||||
and not re.search(r"\d+\s\d+", number)
|
||||
and re.fullmatch(pattern, number)
|
||||
)
|
||||
|
||||
|
||||
def delete_exxeta_unknown(entity_list):
|
||||
|
|
@ -89,11 +94,16 @@ def delete_exxeta_unknown(entity_list):
|
|||
for label, entity_list in entity_list.items():
|
||||
# Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped)
|
||||
filtered_entities = [
|
||||
entity for entity in entity_list
|
||||
if str(entity["entity"]).lower().replace(" ", "") not in {"nichtangegeben", "n/a"}
|
||||
entity
|
||||
for entity in entity_list
|
||||
if str(entity["entity"]).lower().replace(" ", "")
|
||||
not in {"nichtangegeben", "n/a"}
|
||||
]
|
||||
for entity in entity_list:
|
||||
if str(entity["entity"]).lower().replace(" ", "") in {"nichtangegeben", "n/a"}:
|
||||
if str(entity["entity"]).lower().replace(" ", "") in {
|
||||
"nichtangegeben",
|
||||
"n/a",
|
||||
}:
|
||||
print(f"filtered out: {entity}")
|
||||
if filtered_entities: # Only add the label if there are entities left
|
||||
filtered_kpi[label] = filtered_entities
|
||||
|
|
@ -115,6 +125,7 @@ def delete_duplicate_entities(entity_list):
|
|||
unique_entities[label] = filtered_entities
|
||||
return unique_entities
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
entities = [
|
||||
# {"label": "PERSON", "entity": "John Doe", "status": "validated"},
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false,
|
|||
example: formData.example || '',
|
||||
position: formData.position ?? 0,
|
||||
active: formData.active ?? true,
|
||||
examples: [{ sentence: '', value: '' }]
|
||||
examples: formData.examples ?? []
|
||||
});
|
||||
// Formular zurücksetzen:
|
||||
setFormData(emptyKPI);
|
||||
|
|
|
|||
Loading…
Reference in New Issue