pse2_ff/project/backend/validate-service/validate_logic.py

150 lines
5.4 KiB
Python

from typing import Dict, List
import re
import requests
import os
# SETTINGS = [{"id": "Rendite", "type": "number"}]
COORDINATOR_URL = os.getenv("COORDINATOR_URL", "http://localhost:5000")
def validate_entities(entities):
try:
response = requests.get(COORDINATOR_URL + "/api/kpi_setting/")
if response.status_code == 200:
settings = response.json()
else:
settings = []
except requests.exceptions.RequestException as e:
print(f"Error fetching settings: {e}")
settings = []
# settings = SETTINGS
result = []
reduced_kpi: Dict[str, List[Dict[str, str]]] = {}
# reduce entities by label. Example: {"PERSON": [{"label": "PERSON", "entity": "John Doe", "status": "validated"}]}
for item in entities:
label = item["label"]
if label not in reduced_kpi:
reduced_kpi[label] = []
reduced_kpi[label].append(item)
reduced_kpi = delete_exxeta_unknown(reduced_kpi)
reduced_kpi = validate_number(reduced_kpi, settings)
reduced_kpi = delete_duplicate_entities(reduced_kpi)
for item in reduced_kpi.items():
if item[0] == "FONDSNAME":
result.extend(item[1])
continue
elif item[0] == "DATUM":
result.extend(item[1])
continue
elif item[0] == "FONDSMANAGER":
result.extend(item[1])
continue
# Filter not validated, if there are valid values
validated = False
for entity in item[1]:
if entity["status"] == "validated":
validated = True
if validated:
item_list = [x for x in item[1] if x["status"] == "validated"]
result.extend(item_list)
else:
result.extend(item[1])
return result
def validate_number(entity_list, settings):
filtered_kpi = {}
for label, entity_list in entity_list.items():
setting = next((s for s in settings if s["name"].upper() == label), None)
if setting and setting["type"] == "number":
filtered_entities = [
entity
for entity in entity_list
if is_valid_number(str(entity["entity"]))
]
for entity in entity_list:
if not is_valid_number(str(entity["entity"])):
print(f"Invalid number: {entity}")
if filtered_entities: # Only add the label if there are entities left
filtered_kpi[label] = filtered_entities
else:
filtered_kpi[label] = entity_list
return filtered_kpi
def is_valid_number(number):
pattern = r"^[0-9\-\s%,.€]+$"
return (
any(char.isdigit() for char in number)
and not re.search(r"\d+\s\d+", number)
and re.fullmatch(pattern, number)
)
def delete_exxeta_unknown(entity_list):
filtered_kpi = {}
for label, entity_list in entity_list.items():
# Filter out entities with "nichtangegeben" or "n/a" (case-insensitive and stripped)
filtered_entities = [
entity
for entity in entity_list
if str(entity["entity"]).lower().replace(" ", "")
not in {"nichtangegeben", "n/a"}
]
for entity in entity_list:
if str(entity["entity"]).lower().replace(" ", "") in {
"nichtangegeben",
"n/a",
}:
print(f"filtered out: {entity}")
if filtered_entities: # Only add the label if there are entities left
filtered_kpi[label] = filtered_entities
return filtered_kpi
def delete_duplicate_entities(entity_list):
unique_entities = {}
for label, entity_list in entity_list.items():
values = set()
filtered_entities = []
for entity in entity_list:
if str(entity["entity"]).lower().replace(" ", "") not in values:
filtered_entities.append(entity)
else:
print(f"Duplicate entity: {entity}")
values.add(str(entity["entity"]).lower().replace(" ", ""))
if filtered_entities:
unique_entities[label] = filtered_entities
return unique_entities
if __name__ == "__main__":
entities = [
# {"label": "PERSON", "entity": "John Doe", "status": "validated"},
# {"label": "PERSON", "entity": "Exxeta", "status": "invalid"},
# {"label": "ORG", "entity": "Google", "status": "invalid"},
# {"label": "FONDSNAME", "entity": "Microsoft", "status": "validated"},
# {"label": "FONDSNAME", "entity": "Amazon", "status": "invalid"},
# {"label": "FONDSNAME", "entity": "Apple", "status": "invalid"},
{"label": "RENDITE", "entity": "8 8 8 8 8", "status": "validated"},
{"label": "RENDITE", "entity": "N/A", "status": "validated"},
{"label": "RENDITE", "entity": "nicht angegeben", "status": "validated"},
{"label": "RENDITE", "entity": "uaieluae--t>", "status": "validated"},
{"label": "RENDITE", "entity": "3,5", "status": "validated"},
{"label": "RENDITE", "entity": "3,5", "status": "validated"},
{"label": "RENDITE", "entity": "3 , 5", "status": "validated"},
{"label": "RENDITE", "entity": "3%", "status": "validated"},
{"label": "RENDITE", "entity": "", "status": "invalid"},
{"label": "RENDITE", "entity": "2 mehr als 6", "status": "invalid"},
{"label": "RENDITE", "entity": 2, "status": "invalid"},
]
print(validate_entities(entities))