85 lines
2.5 KiB
Python
85 lines
2.5 KiB
Python
from flask import Flask, request, jsonify
|
||
from extractSpacy import extract
|
||
import requests
|
||
import os
|
||
import json
|
||
from flask_cors import CORS
|
||
|
||
|
||
app = Flask(__name__)
|
||
CORS(app)
|
||
|
||
|
||
VALIDATE_SERVICE_URL = os.getenv(
|
||
"VALIDATE_SERVICE_URL", "http://localhost:5054/validate"
|
||
)
|
||
|
||
|
||
@app.route("/extract", methods=["POST"])
|
||
def extract_pdf():
|
||
json_data = request.get_json()
|
||
|
||
pitchbook_id = json_data["id"]
|
||
pages_data = json_data["extracted_text_per_page"]
|
||
|
||
entities_json = extract(pages_data)
|
||
entities = (
|
||
json.loads(entities_json) if isinstance(entities_json, str) else entities_json
|
||
)
|
||
|
||
validate_payload = {"id": pitchbook_id, "service": "spacy", "entities": entities}
|
||
|
||
print(f"[SPACY] Sending to validate service: {VALIDATE_SERVICE_URL}")
|
||
print(f"[SPACY] Payload: {validate_payload} entities for pitchbook {pitchbook_id}")
|
||
|
||
try:
|
||
response = requests.post(
|
||
VALIDATE_SERVICE_URL, json=validate_payload, timeout=600
|
||
)
|
||
print(f"[SPACY] Validate service response: {response.status_code}")
|
||
if response.status_code != 200:
|
||
print(f"[SPACY] Validate service error: {response.text}")
|
||
except Exception as e:
|
||
print(f"[SPACY] Error sending to validate service: {e}")
|
||
|
||
return jsonify("Sent to validate-service"), 200
|
||
|
||
|
||
@app.route("/append-training-entry", methods=["POST"])
|
||
def append_training_entry():
|
||
entry = request.get_json()
|
||
|
||
if not entry or "text" not in entry or "entities" not in entry:
|
||
return (
|
||
jsonify(
|
||
{"error": "Ungültiges Format – 'text' und 'entities' erforderlich."}
|
||
),
|
||
400,
|
||
)
|
||
|
||
path = os.path.join("spacy_training", "annotation_data.json")
|
||
|
||
try:
|
||
if os.path.exists(path):
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
else:
|
||
data = []
|
||
|
||
# Optional: Duplikate prüfen
|
||
if entry in data:
|
||
return jsonify({"message": "Eintrag existiert bereits."}), 200
|
||
|
||
data.append(entry)
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||
|
||
return jsonify({"message": "Eintrag erfolgreich gespeichert."}), 200
|
||
except Exception as e:
|
||
print(f"[ERROR] Fehler beim Schreiben der Datei: {e}")
|
||
return jsonify({"error": "Interner Fehler beim Schreiben."}), 500
|
||
|
||
|
||
if __name__ == "__main__":
|
||
app.run(host="0.0.0.0", port=5052, debug=True)
|