From 360da3acb071cf4b27bf3cb4b16277b50b6c6547 Mon Sep 17 00:00:00 2001 From: Abdulraahman Dabbagh <1924466@stud.hs-mannheim.de> Date: Sun, 29 Jun 2025 04:57:24 +0200 Subject: [PATCH] KPI_data angepasst --- .../controller/kpi_setting_controller.py | 20 +- .../coordinator/model/kpi_setting_model.py | 36 ++-- .../backend/coordinator/model/seed_data.py | 184 +++++++++++++----- project/backend/exxetaGPT-service/app.py | 21 +- .../exxetaGPT-service/extractExxeta.py | 29 ++- project/backend/ocr-service/app.py | 33 ++-- project/backend/ocr-service/ocr_runner.py | 9 +- project/backend/validate-service/app.py | 5 +- .../backend/validate-service/merge_logic.py | 1 - .../validate-service/validate_logic.py | 27 ++- project/frontend/src/components/KPIForm.tsx | 2 +- 11 files changed, 234 insertions(+), 133 deletions(-) diff --git a/project/backend/coordinator/controller/kpi_setting_controller.py b/project/backend/coordinator/controller/kpi_setting_controller.py index d6328cf..92b51b3 100644 --- a/project/backend/coordinator/controller/kpi_setting_controller.py +++ b/project/backend/coordinator/controller/kpi_setting_controller.py @@ -29,13 +29,11 @@ def create_kpi_setting(): required_fields = [ "name", - "description", "mandatory", "type", - "translation", - "example", "position", "active", + "examples", ] for field in required_fields: if field not in data: @@ -55,13 +53,11 @@ def create_kpi_setting(): new_kpi_setting = KPISettingModel( name=data["name"], - description=data["description"], mandatory=data["mandatory"], type=kpi_type, - translation=data["translation"], - example=data["example"], position=data["position"], active=data["active"], + examples=data.get("examples", []), ) db.session.add(new_kpi_setting) @@ -84,9 +80,6 @@ def update_kpi_setting(id): return jsonify({"error": "KPI Setting with this name already exists"}), 409 kpi_setting.name = data["name"] - if "description" in data: - kpi_setting.description = data["description"] - if "mandatory" in data: kpi_setting.mandatory = data["mandatory"] @@ -100,18 +93,15 @@ def update_kpi_setting(id): 400, ) - if "translation" in data: - kpi_setting.translation = data["translation"] - - if "example" in data: - kpi_setting.example = data["example"] - if "position" in data: kpi_setting.position = data["position"] if "active" in data: kpi_setting.active = data["active"] + if "examples" in data: + kpi_setting.examples = data["examples"] + db.session.commit() return jsonify(kpi_setting.to_dict()), 200 diff --git a/project/backend/coordinator/model/kpi_setting_model.py b/project/backend/coordinator/model/kpi_setting_model.py index 11ff33d..4accfbb 100644 --- a/project/backend/coordinator/model/kpi_setting_model.py +++ b/project/backend/coordinator/model/kpi_setting_model.py @@ -2,6 +2,8 @@ from model.database import db from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy import Enum as SQLAlchemyEnum from enum import Enum +from sqlalchemy.dialects.postgresql import JSONB +from collections import OrderedDict class KPISettingType(Enum): @@ -18,37 +20,31 @@ class KPISettingModel(db.Model): id: Mapped[int] = mapped_column(primary_key=True) name: Mapped[str] = mapped_column(unique=True) - description: Mapped[str] mandatory: Mapped[bool] type: Mapped[KPISettingType] = mapped_column( SQLAlchemyEnum(KPISettingType, native_enum=True) ) - translation: Mapped[str] - example: Mapped[str] position: Mapped[int] active: Mapped[bool] + examples: Mapped[list] = mapped_column(JSONB, default=[]) def to_dict(self): - return { - "id": self.id, - "name": self.name, - "description": self.description, - "mandatory": self.mandatory, - "type": self.type.value, - "translation": self.translation, - "example": self.example, - "position": self.position, - "active": self.active, - } + return OrderedDict( + [ + ("id", self.id), + ("name", self.name), + ("mandatory", self.mandatory), + ("type", self.type.value), + ("position", self.position), + ("examples", self.examples), + ("active", self.active), + ] + ) - def __init__( - self, name, description, mandatory, type, translation, example, position, active - ): + def __init__(self, name, mandatory, type, position, active, examples=None): self.name = name - self.description = description self.mandatory = mandatory self.type = type - self.translation = translation - self.example = example self.position = position self.active = active + self.examples = examples or [] diff --git a/project/backend/coordinator/model/seed_data.py b/project/backend/coordinator/model/seed_data.py index 28c65c0..6e34664 100644 --- a/project/backend/coordinator/model/seed_data.py +++ b/project/backend/coordinator/model/seed_data.py @@ -10,153 +10,243 @@ def seed_default_kpi_settings(): default_kpi_settings = [ { "name": "Fondsname", - "description": "Der vollständige Name des Investmentfonds", "mandatory": True, "type": KPISettingType.STRING, - "translation": "Fund Name", - "example": "Alpha Real Estate Fund I", "position": 1, "active": True, + "examples": [ + { + "sentence": "Der Fonds trägt den Namen Alpha Real Estate Fund I.", + "value": "Alpha Real Estate Fund I", + }, + { + "sentence": "Im Pitchbook wird der Fondsname als Alpha Real Estate Fund I angegeben.", + "value": "Alpha Real Estate Fund I", + }, + ], }, { "name": "Fondsmanager", - "description": "Verantwortlicher Manager für die Fondsverwaltung", "mandatory": True, "type": KPISettingType.STRING, - "translation": "Fund Manager", - "example": "Max Mustermann", "position": 2, "active": True, + "examples": [ + { + "sentence": "Fondsmanager des Projekts ist Max Mustermann.", + "value": "Max Mustermann", + }, + { + "sentence": "Die Verwaltung liegt bei Max Mustermann.", + "value": "Max Mustermann", + }, + ], }, { "name": "AIFM", - "description": "Alternative Investment Fund Manager", "mandatory": True, "type": KPISettingType.STRING, - "translation": "AIFM", - "example": "Alpha Investment Management GmbH", "position": 3, "active": True, + "examples": [ + { + "sentence": "AIFM ist die Alpha Investment Management GmbH.", + "value": "Alpha Investment Management GmbH", + }, + { + "sentence": "Die Alpha Investment Management GmbH fungiert als AIFM.", + "value": "Alpha Investment Management GmbH", + }, + ], }, { "name": "Datum", - "description": "Stichtag der Datenerfassung", "mandatory": True, "type": KPISettingType.DATE, - "translation": "Date", - "example": "05.05.2025", "position": 4, "active": True, + "examples": [ + { + "sentence": "Die Daten basieren auf dem Stand vom 05.05.2025.", + "value": "05.05.2025", + }, + { + "sentence": "Stichtag der Angaben ist der 05.05.2025.", + "value": "05.05.2025", + }, + ], }, { "name": "Risikoprofil", - "description": "Klassifizierung des Risikos des Fonds", "mandatory": True, "type": KPISettingType.STRING, - "translation": "Risk Profile", - "example": "Core/Core++", "position": 5, "active": True, + "examples": [ + { + "sentence": "Der Fonds hat das Risikoprofil Core/Core++.", + "value": "Core/Core++", + }, + { + "sentence": "Einstufung des Fondsrisikos: Core/Core++.", + "value": "Core/Core++", + }, + ], }, { "name": "Artikel", - "description": "Artikel 8 SFDR-Klassifizierung", "mandatory": False, "type": KPISettingType.BOOLEAN, - "translation": "Article", - "example": "Artikel 8", "position": 6, "active": True, + "examples": [ + { + "sentence": "Der Fonds erfüllt die Anforderungen von Artikel 8.", + "value": "Artikel 8", + }, + { + "sentence": "Gemäß SFDR fällt dieser Fonds unter Artikel 8.", + "value": "Artikel 8", + }, + ], }, { "name": "Zielrendite", - "description": "Angestrebte jährliche Rendite in Prozent", "mandatory": True, "type": KPISettingType.NUMBER, - "translation": "Target Return", - "example": "6.5", "position": 7, "active": True, + "examples": [ + { + "sentence": "Die angestrebte Zielrendite liegt bei 6.5 %.", + "value": "6.5 %", + }, + {"sentence": "Zielrendite des Fonds beträgt 6.5 %.", "value": "6.5 %"}, + ], }, { "name": "Rendite", - "description": "Tatsächlich erzielte Rendite in Prozent", "mandatory": False, "type": KPISettingType.NUMBER, - "translation": "Return", - "example": "5.8", "position": 8, "active": True, + "examples": [ + { + "sentence": "Die Rendite für das Jahr beträgt 5.8 %.", + "value": "5.8 %", + }, + { + "sentence": "Im letzten Jahr wurde eine Rendite von 5.8 % erzielt.", + "value": "5.8 %", + }, + ], }, { "name": "Zielausschüttung", - "description": "Geplante Ausschüttung in Prozent", "mandatory": False, "type": KPISettingType.NUMBER, - "translation": "Target Distribution", - "example": "4.0", "position": 9, "active": True, + "examples": [ + {"sentence": "Die Zielausschüttung beträgt 4.0 %.", "value": "4.0 %"}, + { + "sentence": "Geplante Ausschüttung: 4.0 % pro Jahr.", + "value": "4.0 %", + }, + ], }, { "name": "Ausschüttung", - "description": "Tatsächliche Ausschüttung in Prozent", "mandatory": False, "type": KPISettingType.NUMBER, - "translation": "Distribution", - "example": "3.8", "position": 10, "active": True, + "examples": [ + { + "sentence": "Die Ausschüttung im Jahr 2024 lag bei 3.8 %.", + "value": "3.8 %", + }, + { + "sentence": "Es wurde eine Ausschüttung von 3.8 % vorgenommen.", + "value": "3.8 %", + }, + ], }, { "name": "Laufzeit", - "description": "Geplante Laufzeit des Fonds", "mandatory": True, "type": KPISettingType.STRING, - "translation": "Duration", - "example": "7 Jahre, 10, Evergreen", "position": 11, "active": True, + "examples": [ + { + "sentence": "Die Laufzeit des Fonds beträgt 7 Jahre.", + "value": "7 Jahre", + }, + {"sentence": "Geplante Dauer: Evergreen-Modell.", "value": "Evergreen"}, + ], }, { "name": "LTV", - "description": "Loan-to-Value Verhältnis in Prozent", "mandatory": False, "type": KPISettingType.NUMBER, - "translation": "LTV", - "example": "65.0", "position": 12, "active": True, + "examples": [ + {"sentence": "Der LTV beträgt 65.0 %.", "value": "65.0 %"}, + {"sentence": "Loan-to-Value-Ratio: 65.0 %.", "value": "65.0 %"}, + ], }, { "name": "Managementgebühren", - "description": "Jährliche Verwaltungsgebühren in Prozent", "mandatory": True, "type": KPISettingType.NUMBER, - "translation": "Management Fees", - "example": "1.5", "position": 13, "active": True, + "examples": [ + { + "sentence": "Die Managementgebühren betragen jährlich 1.5 %.", + "value": "1.5 %", + }, + { + "sentence": "Für die Verwaltung wird eine Gebühr von 1.5 % erhoben.", + "value": "1.5 %", + }, + ], }, { "name": "Sektorenallokation", - "description": "Verteilung der Investments nach Sektoren", "mandatory": False, "type": KPISettingType.ARRAY, - "translation": "Sector Allocation", - "example": "Büro, Wohnen, Logistik, Studentenwohnen", "position": 14, "active": True, + "examples": [ + { + "sentence": "Die Sektorenallokation umfasst Büro, Wohnen und Logistik.", + "value": "Büro, Wohnen, Logistik", + }, + { + "sentence": "Investiert wird in Büro, Logistik und Studentenwohnen.", + "value": "Büro, Logistik, Studentenwohnen", + }, + ], }, { "name": "Länderallokation", - "description": "Geografische Verteilung der Investments", "mandatory": False, "type": KPISettingType.ARRAY, - "translation": "Country Allocation", - "example": "Deutschland,Frankreich, Österreich, Schweiz", "position": 15, "active": True, + "examples": [ + { + "sentence": "Investitionen erfolgen in Deutschland, Frankreich und Österreich.", + "value": "Deutschland, Frankreich, Österreich", + }, + { + "sentence": "Die Länderallokation umfasst Deutschland, Schweiz und Frankreich.", + "value": "Deutschland, Schweiz, Frankreich", + }, + ], }, ] @@ -165,13 +255,11 @@ def seed_default_kpi_settings(): for kpi_data in default_kpi_settings: kpi_setting = KPISettingModel( name=kpi_data["name"], - description=kpi_data["description"], mandatory=kpi_data["mandatory"], type=kpi_data["type"], - translation=kpi_data["translation"], - example=kpi_data["example"], position=kpi_data["position"], active=kpi_data["active"], + examples=kpi_data.get("examples", []), ) db.session.add(kpi_setting) diff --git a/project/backend/exxetaGPT-service/app.py b/project/backend/exxetaGPT-service/app.py index 9b1597b..4c97749 100644 --- a/project/backend/exxetaGPT-service/app.py +++ b/project/backend/exxetaGPT-service/app.py @@ -6,9 +6,12 @@ import json app = Flask(__name__) -VALIDATE_SERVICE_URL = os.getenv("VALIDATE_SERVICE_URL", "http://localhost:5054/validate") +VALIDATE_SERVICE_URL = os.getenv( + "VALIDATE_SERVICE_URL", "http://localhost:5054/validate" +) -@app.route('/extract', methods=['POST']) + +@app.route("/extract", methods=["POST"]) def extract_text_from_ocr_json(): json_data = request.get_json() @@ -16,19 +19,19 @@ def extract_text_from_ocr_json(): pages_data = json_data["extracted_text_per_page"] entities_json = extract_with_exxeta(pages_data, pitchbook_id) - entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json + entities = ( + json.loads(entities_json) if isinstance(entities_json, str) else entities_json + ) - validate_payload = { - "id": pitchbook_id, - "service": "exxeta", - "entities": entities - } + validate_payload = {"id": pitchbook_id, "service": "exxeta", "entities": entities} print(f"[EXXETA] Sending to validate service: {VALIDATE_SERVICE_URL}") print(f"[EXXETA] Payload: {validate_payload} entities for pitchbook {pitchbook_id}") try: - response = requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600) + response = requests.post( + VALIDATE_SERVICE_URL, json=validate_payload, timeout=600 + ) print(f"[EXXETA] Validate service response: {response.status_code}") if response.status_code != 200: print(f"[EXXETA] Validate service error: {response.text}") diff --git a/project/backend/exxetaGPT-service/extractExxeta.py b/project/backend/exxetaGPT-service/extractExxeta.py index 3948c8b..94cc746 100644 --- a/project/backend/exxetaGPT-service/extractExxeta.py +++ b/project/backend/exxetaGPT-service/extractExxeta.py @@ -16,6 +16,7 @@ TIMEOUT = 180 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) + def get_dynamic_labels(): url = f"{COORDINATOR_URL}/api/kpi_setting/" try: @@ -28,6 +29,7 @@ def get_dynamic_labels(): logger.warning(f"Konnte dynamische Labels nicht laden: {e}") return [] + def extract_with_exxeta(pages_json, pitchbook_id): results = [] @@ -39,7 +41,10 @@ def extract_with_exxeta(pages_json, pitchbook_id): for page_data in pages_json: i += 1 if i % 8 == 0: - requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35 + 60/len(pages_json)*i}) + requests.post( + COORDINATOR_URL + "/api/progress", + json={"id": pitchbook_id, "progress": 35 + 60 / len(pages_json) * i}, + ) page_num = page_data.get("page") text = page_data.get("text", "") @@ -100,23 +105,28 @@ def extract_with_exxeta(pages_json, pitchbook_id): headers = { "Content-Type": "application/json", - "Authorization": f"Bearer {EXXETA_API_KEY}" + "Authorization": f"Bearer {EXXETA_API_KEY}", } payload = { "model": MODEL, "messages": [ - {"role": "system", "content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array."}, - {"role": "user", "content": prompt} + { + "role": "system", + "content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array.", + }, + {"role": "user", "content": prompt}, ], - "temperature": 0.0 + "temperature": 0.0, } url = f"{EXXETA_BASE_URL}/deployments/{MODEL}/chat/completions" for attempt in range(1, MAX_RETRIES + 1): try: - response = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT) + response = requests.post( + url, headers=headers, json=payload, timeout=TIMEOUT + ) response.raise_for_status() content = response.json()["choices"][0]["message"]["content"].strip() if content.startswith("```json"): @@ -140,9 +150,12 @@ def extract_with_exxeta(pages_json, pitchbook_id): if attempt == MAX_RETRIES: results.extend([]) - requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95}) + requests.post( + COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 95} + ) return json.dumps(results, indent=2, ensure_ascii=False) + if __name__ == "__main__": print("📡 Test-Aufruf get_dynamic_labels:") - print(get_dynamic_labels()) \ No newline at end of file + print(get_dynamic_labels()) diff --git a/project/backend/ocr-service/app.py b/project/backend/ocr-service/app.py index ba6c0ae..472a3d3 100644 --- a/project/backend/ocr-service/app.py +++ b/project/backend/ocr-service/app.py @@ -29,19 +29,17 @@ def convert_pdf_async(temp_path, pitchbook_id): temp_path.unlink() # cleanup return {"error": "OCR processing failed - all PDFs must be OCR'd"}, 500 - with open(ocr_path, 'rb') as ocr_file: + with open(ocr_path, "rb") as ocr_file: ocr_file.seek(0) result = pdf_to_json(ocr_file) - - payload = { - "id": int(pitchbook_id), - "extracted_text_per_page": result["pages"] - } + payload = {"id": int(pitchbook_id), "extracted_text_per_page": result["pages"]} logger.info("Sending payload to EXXETA and SPACY services") - requests.post(COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35}) + requests.post( + COORDINATOR_URL + "/api/progress", json={"id": pitchbook_id, "progress": 35} + ) try: exxeta_response = requests.post(EXXETA_URL, json=payload, timeout=600) logger.info(f"EXXETA response: {exxeta_response.status_code}") @@ -54,14 +52,16 @@ def convert_pdf_async(temp_path, pitchbook_id): except Exception as e: logger.error(f"Error calling SPACY: {e}") - files=[ - ('file',('',open(ocr_path,'rb'),'application/pdf')) - ] + files = [("file", ("", open(ocr_path, "rb"), "application/pdf"))] headers = {} try: - - requests.put(f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", files=files, timeout=600, headers=headers) + requests.put( + f"{COORDINATOR_URL}/api/pitch_book/{pitchbook_id}", + files=files, + timeout=600, + headers=headers, + ) logger.info("COORDINATOR response: Progress + File updated") except Exception as e: logger.error(f"Error calling COORDINATOR: {e}") @@ -72,7 +72,7 @@ def convert_pdf_async(temp_path, pitchbook_id): logger.error(f"Exception in OCR processing: {str(e)}", exc_info=True) -@app.route('/ocr', methods=['POST']) +@app.route("/ocr", methods=["POST"]) def convert_extract_text_from_pdf(): if "file" not in request.files: return {"error": "No file"}, 400 @@ -85,7 +85,7 @@ def convert_extract_text_from_pdf(): if not pitchbook_id: return {"error": "No ID"}, 400 - with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file: + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: file.seek(0) temp_file.write(file.read()) temp_path = Path(temp_file.name) @@ -93,10 +93,7 @@ def convert_extract_text_from_pdf(): thread = threading.Thread(target=convert_pdf_async, args=(temp_path, pitchbook_id)) thread.start() - return { - "status": "sent", - "message": "PDF successfully OCR'd and processed" - }, 200 + return {"status": "sent", "message": "PDF successfully OCR'd and processed"}, 200 if __name__ == "__main__": diff --git a/project/backend/ocr-service/ocr_runner.py b/project/backend/ocr-service/ocr_runner.py index 1ce9599..0b8abfa 100644 --- a/project/backend/ocr-service/ocr_runner.py +++ b/project/backend/ocr-service/ocr_runner.py @@ -17,9 +17,10 @@ log_folder = TEMP_DIR / "logs" output_folder.mkdir(exist_ok=True) log_folder.mkdir(exist_ok=True) + def pdf_to_json(pdf_input): try: - if hasattr(pdf_input, 'read'): + if hasattr(pdf_input, "read"): pdf_input.seek(0) with pdfplumber.open(pdf_input) as pdf: @@ -83,7 +84,9 @@ def ocr_pdf(input_file_path: Path): if result.returncode == 0: if output_file.exists(): - logger.info(f"OCR successful, output file size: {output_file.stat().st_size} bytes") + logger.info( + f"OCR successful, output file size: {output_file.stat().st_size} bytes" + ) return output_file else: logger.error(f"OCR completed but output file not found: {output_file}") @@ -119,4 +122,4 @@ def extract_text_to_json(pdf_path: Path): except Exception as e: logger.error(f"Failed to extract text to JSON: {e}") - return None \ No newline at end of file + return None diff --git a/project/backend/validate-service/app.py b/project/backend/validate-service/app.py index 693c032..ccd6b53 100644 --- a/project/backend/validate-service/app.py +++ b/project/backend/validate-service/app.py @@ -40,7 +40,9 @@ def send_to_coordinator_service(processed_data, request_id): def process_data_async(request_id, spacy_data, exxeta_data): try: - requests.post(COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95}) + requests.post( + COORDINATOR_URL + "/api/progress", json={"id": request_id, "progress": 95} + ) print(f"Start asynchronous processing for PitchBook: {request_id}") # Perform merge @@ -96,7 +98,6 @@ def validate(): # If both datasets are present, start asynchronous processing if spacy_data is not None and exxeta_data is not None: - # Start asynchronous processing in a separate thread processing_thread = threading.Thread( target=process_data_async, diff --git a/project/backend/validate-service/merge_logic.py b/project/backend/validate-service/merge_logic.py index 1bc404c..5f4f7b7 100644 --- a/project/backend/validate-service/merge_logic.py +++ b/project/backend/validate-service/merge_logic.py @@ -27,7 +27,6 @@ def merge_entities(spacy_data, exxeta_data): and s_entity_norm == e_entity_norm and s_page == e_page ): - merged.append( { "label": s["label"], diff --git a/project/backend/validate-service/validate_logic.py b/project/backend/validate-service/validate_logic.py index 1e90d84..416e82c 100644 --- a/project/backend/validate-service/validate_logic.py +++ b/project/backend/validate-service/validate_logic.py @@ -5,6 +5,8 @@ import os # SETTINGS = [{"id": "Rendite", "type": "number"}] COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000") + + def validate_entities(entities): try: response = requests.get(COORDINATOR_URL + "/api/kpi_setting/") @@ -42,7 +44,6 @@ def validate_entities(entities): result.extend(item[1]) continue - # Filter not validated, if there are valid values validated = False for entity in item[1]: @@ -61,11 +62,11 @@ def validate_entities(entities): def validate_number(entity_list, settings): filtered_kpi = {} for label, entity_list in entity_list.items(): - setting = next((s for s in settings if s["name"].upper() == label), None) if setting and setting["type"] == "number": filtered_entities = [ - entity for entity in entity_list + entity + for entity in entity_list if is_valid_number(str(entity["entity"])) ] for entity in entity_list: @@ -80,8 +81,12 @@ def validate_number(entity_list, settings): def is_valid_number(number): - pattern = r'^[0-9\-\s%,.€]+$' - return any(char.isdigit() for char in number) and not re.search(r'\d+\s\d+', number) and re.fullmatch(pattern, number) + pattern = r"^[0-9\-\s%,.€]+$" + return ( + any(char.isdigit() for char in number) + and not re.search(r"\d+\s\d+", number) + and re.fullmatch(pattern, number) + ) def delete_exxeta_unknown(entity_list): @@ -89,11 +94,16 @@ def delete_exxeta_unknown(entity_list): for label, entity_list in entity_list.items(): # Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped) filtered_entities = [ - entity for entity in entity_list - if str(entity["entity"]).lower().replace(" ", "") not in {"nichtangegeben", "n/a"} + entity + for entity in entity_list + if str(entity["entity"]).lower().replace(" ", "") + not in {"nichtangegeben", "n/a"} ] for entity in entity_list: - if str(entity["entity"]).lower().replace(" ", "") in {"nichtangegeben", "n/a"}: + if str(entity["entity"]).lower().replace(" ", "") in { + "nichtangegeben", + "n/a", + }: print(f"filtered out: {entity}") if filtered_entities: # Only add the label if there are entities left filtered_kpi[label] = filtered_entities @@ -115,6 +125,7 @@ def delete_duplicate_entities(entity_list): unique_entities[label] = filtered_entities return unique_entities + if __name__ == "__main__": entities = [ # {"label": "PERSON", "entity": "John Doe", "status": "validated"}, diff --git a/project/frontend/src/components/KPIForm.tsx b/project/frontend/src/components/KPIForm.tsx index 452dfd8..13128b4 100644 --- a/project/frontend/src/components/KPIForm.tsx +++ b/project/frontend/src/components/KPIForm.tsx @@ -122,7 +122,7 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false, example: formData.example || '', position: formData.position ?? 0, active: formData.active ?? true, - examples: [{ sentence: '', value: '' }] + examples: formData.examples ?? [] }); // Formular zurücksetzen: setFormData(emptyKPI);