From 0d09a825e9d1d5b4dd05a19a7d81da7897036fa2 Mon Sep 17 00:00:00 2001 From: Jaronim Pracht Date: Wed, 25 Jun 2025 15:36:01 +0200 Subject: [PATCH] add validiation in service --- .../validate-service/validate_logic.py | 108 ++++++++++++++++-- 1 file changed, 100 insertions(+), 8 deletions(-) diff --git a/project/backend/validate-service/validate_logic.py b/project/backend/validate-service/validate_logic.py index 1bbacbe..1e90d84 100644 --- a/project/backend/validate-service/validate_logic.py +++ b/project/backend/validate-service/validate_logic.py @@ -1,15 +1,36 @@ from typing import Dict, List +import re +import requests +import os - +# SETTINGS = [{"id": "Rendite", "type": "number"}] +COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000") def validate_entities(entities): + try: + response = requests.get(COORDINATOR_URL + "/api/kpi_setting/") + if response.status_code == 200: + settings = response.json() + else: + settings = [] + except requests.exceptions.RequestException as e: + print(f"Error fetching settings: {e}") + settings = [] + # settings = SETTINGS + result = [] - reduced_kpi: Dict[str, List[Dict[str, str | int]]] = {} + reduced_kpi: Dict[str, List[Dict[str, str]]] = {} + + # reduce entities by label. Example: {"PERSON": [{"label": "PERSON", "entity": "John Doe", "status": "validated"}]} for item in entities: label = item["label"] if label not in reduced_kpi: reduced_kpi[label] = [] reduced_kpi[label].append(item) + reduced_kpi = delete_exxeta_unknown(reduced_kpi) + reduced_kpi = validate_number(reduced_kpi, settings) + reduced_kpi = delete_duplicate_entities(reduced_kpi) + for item in reduced_kpi.items(): if item[0] == "FONDSNAME": result.extend(item[1]) @@ -21,6 +42,8 @@ def validate_entities(entities): result.extend(item[1]) continue + + # Filter not validated, if there are valid values validated = False for entity in item[1]: if entity["status"] == "validated": @@ -34,13 +57,82 @@ def validate_entities(entities): return result + +def validate_number(entity_list, settings): + filtered_kpi = {} + for label, entity_list in entity_list.items(): + + setting = next((s for s in settings if s["name"].upper() == label), None) + if setting and setting["type"] == "number": + filtered_entities = [ + entity for entity in entity_list + if is_valid_number(str(entity["entity"])) + ] + for entity in entity_list: + if not is_valid_number(str(entity["entity"])): + print(f"Invalid number: {entity}") + if filtered_entities: # Only add the label if there are entities left + filtered_kpi[label] = filtered_entities + else: + filtered_kpi[label] = entity_list + + return filtered_kpi + + +def is_valid_number(number): + pattern = r'^[0-9\-\s%,.€]+$' + return any(char.isdigit() for char in number) and not re.search(r'\d+\s\d+', number) and re.fullmatch(pattern, number) + + +def delete_exxeta_unknown(entity_list): + filtered_kpi = {} + for label, entity_list in entity_list.items(): + # Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped) + filtered_entities = [ + entity for entity in entity_list + if str(entity["entity"]).lower().replace(" ", "") not in {"nichtangegeben", "n/a"} + ] + for entity in entity_list: + if str(entity["entity"]).lower().replace(" ", "") in {"nichtangegeben", "n/a"}: + print(f"filtered out: {entity}") + if filtered_entities: # Only add the label if there are entities left + filtered_kpi[label] = filtered_entities + return filtered_kpi + + +def delete_duplicate_entities(entity_list): + unique_entities = {} + for label, entity_list in entity_list.items(): + values = set() + filtered_entities = [] + for entity in entity_list: + if str(entity["entity"]).lower().replace(" ", "") not in values: + filtered_entities.append(entity) + else: + print(f"Duplicate entity: {entity}") + values.add(str(entity["entity"]).lower().replace(" ", "")) + if filtered_entities: + unique_entities[label] = filtered_entities + return unique_entities + if __name__ == "__main__": entities = [ - {"label": "PERSON", "entity": "John Doe", "status": "validated"}, - {"label": "PERSON", "entity": "Exxeta", "status": "invalid"}, - {"label": "ORG", "entity": "Google", "status": "invalid"}, - {"label": "FONDSNAME", "entity": "Microsoft", "status": "validated"}, - {"label": "FONDSNAME", "entity": "Amazon", "status": "invalid"}, - {"label": "FONDSNAME", "entity": "Apple", "status": "invalid"} + # {"label": "PERSON", "entity": "John Doe", "status": "validated"}, + # {"label": "PERSON", "entity": "Exxeta", "status": "invalid"}, + # {"label": "ORG", "entity": "Google", "status": "invalid"}, + # {"label": "FONDSNAME", "entity": "Microsoft", "status": "validated"}, + # {"label": "FONDSNAME", "entity": "Amazon", "status": "invalid"}, + # {"label": "FONDSNAME", "entity": "Apple", "status": "invalid"}, + {"label": "RENDITE", "entity": "8 8 8 8 8", "status": "validated"}, + {"label": "RENDITE", "entity": "N/A", "status": "validated"}, + {"label": "RENDITE", "entity": "nicht angegeben", "status": "validated"}, + {"label": "RENDITE", "entity": "uaieluae--t>", "status": "validated"}, + {"label": "RENDITE", "entity": "3,5", "status": "validated"}, + {"label": "RENDITE", "entity": "3,5", "status": "validated"}, + {"label": "RENDITE", "entity": "3 , 5", "status": "validated"}, + {"label": "RENDITE", "entity": "3%", "status": "validated"}, + {"label": "RENDITE", "entity": "", "status": "invalid"}, + {"label": "RENDITE", "entity": "2 mehr als 6", "status": "invalid"}, + {"label": "RENDITE", "entity": 2, "status": "invalid"}, ] print(validate_entities(entities))