diff --git a/project/backend/exxetaGPT-service/app.py b/project/backend/exxetaGPT-service/app.py index 76c714e..1326b24 100644 --- a/project/backend/exxetaGPT-service/app.py +++ b/project/backend/exxetaGPT-service/app.py @@ -25,7 +25,7 @@ def extract_text_from_ocr_json(): } requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600) - return jsonify({"Sent to validate-service"}), 200 + return jsonify("Sent to validate-service"), 200 if __name__ == "__main__": diff --git a/project/backend/exxetaGPT-service/extractExxeta.py b/project/backend/exxetaGPT-service/extractExxeta.py index 6ae0cc3..8bd979d 100644 --- a/project/backend/exxetaGPT-service/extractExxeta.py +++ b/project/backend/exxetaGPT-service/extractExxeta.py @@ -3,11 +3,10 @@ import json import os import time import logging - from dotenv import load_dotenv MODEL = "gpt-4o-mini" -EXXETA_BASE_URL= "https://ai.exxeta.com/api/v2/azure/openai" +EXXETA_BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai" load_dotenv() EXXETA_API_KEY = os.getenv("API_KEY") @@ -19,13 +18,15 @@ logger = logging.getLogger(__name__) def extract_with_exxeta(pages_json): results = [] + if not EXXETA_API_KEY: logger.warning("EXXETA_API_KEY nicht gesetzt. Rückgabe eines leeren JSON.") return json.dumps(results, indent=2, ensure_ascii=False) for page_data in pages_json: page_num = page_data.get("page") - text = page_data.get("text", "").strip() + page_data.get("page") + text = page_data.get("text", "") if not text: continue @@ -105,7 +106,7 @@ def extract_with_exxeta(pages_json): payload = { "model": MODEL, "messages": [ - {"role": "system", "content": "Du bist ein Finanzanalyst, der Fondsprofile auswertet. Antworte nur mit validen JSON-Arrays."}, + {"role": "system", "content": "Du bist ein Finanzanalyst. Antworte ausschließlich mit einem validen JSON-Array."}, {"role": "user", "content": prompt} ], "temperature": 0.0 @@ -143,5 +144,4 @@ def extract_with_exxeta(pages_json): if attempt == MAX_RETRIES: results.extend([]) - json_result = json.dumps(results, indent=2, ensure_ascii=False) - return json_result \ No newline at end of file + return json.dumps(results, indent=2, ensure_ascii=False) \ No newline at end of file diff --git a/project/backend/ocr-service/app.py b/project/backend/ocr-service/app.py index ac21052..a83e9b8 100644 --- a/project/backend/ocr-service/app.py +++ b/project/backend/ocr-service/app.py @@ -16,7 +16,7 @@ def convert_extract_text_from_pdf(): return {"error": "No file"}, 400 file = request.files["file"] - pitchbook_id = request.form.get("id") + pitchbook_id = request.form.get("pitchbook_id") if not pitchbook_id: return {"error": "No ID"}, 400 diff --git a/project/backend/spacy-service/app.py b/project/backend/spacy-service/app.py index 6ae877d..9a81cdc 100644 --- a/project/backend/spacy-service/app.py +++ b/project/backend/spacy-service/app.py @@ -25,7 +25,7 @@ def extract_pdf(): } requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600) - return jsonify({"Sent to validate-service"}), 200 + return jsonify("Sent to validate-service"), 200 if __name__ == "__main__": diff --git a/project/backend/spacy-service/extractSpacy.py b/project/backend/spacy-service/extractSpacy.py index 8218345..8ae4e31 100644 --- a/project/backend/spacy-service/extractSpacy.py +++ b/project/backend/spacy-service/extractSpacy.py @@ -6,15 +6,14 @@ current_dir = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_dir, "spacy_training/output/model-last") nlp = spacy.load(model_path) - def extract(pages_json): - results = [] for page in pages_json: - text = page.get("text", "").strip() - page_num = page.get("page") + text = page.get("text", "") + text = text.strip() + page_num = page.get("page") if not text: continue @@ -26,5 +25,4 @@ def extract(pages_json): "page": page_num }) - json_result = json.dumps(results, indent=2, ensure_ascii=False) - return json_result + return json.dumps(results, indent=2, ensure_ascii=False) \ No newline at end of file diff --git a/project/docker-compose.yml b/project/docker-compose.yml index 5edb929..7feb933 100644 --- a/project/docker-compose.yml +++ b/project/docker-compose.yml @@ -29,9 +29,9 @@ services: condition: service_healthy healthcheck: test: wget --spider --no-verbose http://127.0.0.1:5000/health || exit 1 - interval: 10s - timeout: 5s - retries: 5 + interval: 20s + timeout: 10s + retries: 10 ports: - 5050:5000