add validiation in service #93

Merged
3023730 merged 3 commits from #91-validate into main 2025-06-27 10:06:00 +02:00
1 changed files with 100 additions and 8 deletions

View File

@ -1,15 +1,36 @@
from typing import Dict, List
import re
import requests
import os
# SETTINGS = [{"id": "Rendite", "type": "number"}]
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
def validate_entities(entities):
try:
response = requests.get(COORDINATOR_URL + "/api/kpi_setting/")
if response.status_code == 200:
settings = response.json()
else:
settings = []
except requests.exceptions.RequestException as e:
print(f"Error fetching settings: {e}")
settings = []
# settings = SETTINGS
result = []
reduced_kpi: Dict[str, List[Dict[str, str | int]]] = {}
reduced_kpi: Dict[str, List[Dict[str, str]]] = {}
# reduce entities by label. Example: {"PERSON": [{"label": "PERSON", "entity": "John Doe", "status": "validated"}]}
for item in entities:
label = item["label"]
if label not in reduced_kpi:
reduced_kpi[label] = []
reduced_kpi[label].append(item)
reduced_kpi = delete_exxeta_unknown(reduced_kpi)
reduced_kpi = validate_number(reduced_kpi, settings)
reduced_kpi = delete_duplicate_entities(reduced_kpi)
for item in reduced_kpi.items():
if item[0] == "FONDSNAME":
result.extend(item[1])
@ -21,6 +42,8 @@ def validate_entities(entities):
result.extend(item[1])
continue
# Filter not validated, if there are valid values
validated = False
for entity in item[1]:
if entity["status"] == "validated":
@ -34,13 +57,82 @@ def validate_entities(entities):
return result
def validate_number(entity_list, settings):
filtered_kpi = {}
for label, entity_list in entity_list.items():
setting = next((s for s in settings if s["name"].upper() == label), None)
if setting and setting["type"] == "number":
filtered_entities = [
entity for entity in entity_list
if is_valid_number(str(entity["entity"]))
]
for entity in entity_list:
if not is_valid_number(str(entity["entity"])):
print(f"Invalid number: {entity}")
if filtered_entities: # Only add the label if there are entities left
filtered_kpi[label] = filtered_entities
else:
filtered_kpi[label] = entity_list
return filtered_kpi
def is_valid_number(number):
pattern = r'^[0-9\-\s%,.€]+$'
return any(char.isdigit() for char in number) and not re.search(r'\d+\s\d+', number) and re.fullmatch(pattern, number)
def delete_exxeta_unknown(entity_list):
filtered_kpi = {}
for label, entity_list in entity_list.items():
# Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped)
filtered_entities = [
entity for entity in entity_list
if str(entity["entity"]).lower().replace(" ", "") not in {"nichtangegeben", "n/a"}
]
for entity in entity_list:
if str(entity["entity"]).lower().replace(" ", "") in {"nichtangegeben", "n/a"}:
print(f"filtered out: {entity}")
if filtered_entities: # Only add the label if there are entities left
filtered_kpi[label] = filtered_entities
return filtered_kpi
def delete_duplicate_entities(entity_list):
unique_entities = {}
for label, entity_list in entity_list.items():
values = set()
filtered_entities = []
for entity in entity_list:
if str(entity["entity"]).lower().replace(" ", "") not in values:
filtered_entities.append(entity)
else:
print(f"Duplicate entity: {entity}")
values.add(str(entity["entity"]).lower().replace(" ", ""))
if filtered_entities:
unique_entities[label] = filtered_entities
return unique_entities
if __name__ == "__main__":
entities = [
{"label": "PERSON", "entity": "John Doe", "status": "validated"},
{"label": "PERSON", "entity": "Exxeta", "status": "invalid"},
{"label": "ORG", "entity": "Google", "status": "invalid"},
{"label": "FONDSNAME", "entity": "Microsoft", "status": "validated"},
{"label": "FONDSNAME", "entity": "Amazon", "status": "invalid"},
{"label": "FONDSNAME", "entity": "Apple", "status": "invalid"}
# {"label": "PERSON", "entity": "John Doe", "status": "validated"},
# {"label": "PERSON", "entity": "Exxeta", "status": "invalid"},
# {"label": "ORG", "entity": "Google", "status": "invalid"},
# {"label": "FONDSNAME", "entity": "Microsoft", "status": "validated"},
# {"label": "FONDSNAME", "entity": "Amazon", "status": "invalid"},
# {"label": "FONDSNAME", "entity": "Apple", "status": "invalid"},
{"label": "RENDITE", "entity": "8 8 8 8 8", "status": "validated"},
{"label": "RENDITE", "entity": "N/A", "status": "validated"},
{"label": "RENDITE", "entity": "nicht angegeben", "status": "validated"},
{"label": "RENDITE", "entity": "uaieluae--t>", "status": "validated"},
{"label": "RENDITE", "entity": "3,5", "status": "validated"},
{"label": "RENDITE", "entity": "3,5", "status": "validated"},
{"label": "RENDITE", "entity": "3 , 5", "status": "validated"},
{"label": "RENDITE", "entity": "3%", "status": "validated"},
{"label": "RENDITE", "entity": "", "status": "invalid"},
{"label": "RENDITE", "entity": "2 mehr als 6", "status": "invalid"},
{"label": "RENDITE", "entity": 2, "status": "invalid"},
]
print(validate_entities(entities))