32 lines
864 B
Python
32 lines
864 B
Python
from flask import Flask, request, jsonify
|
|
from extractSpacy import extract
|
|
import requests
|
|
import os
|
|
import json
|
|
|
|
app = Flask(__name__)
|
|
|
|
VALIDATE_SERVICE_URL = os.getenv("VALIDATE_SERVICE_URL", "http://localhost:5054/validate")
|
|
|
|
@app.route('/extract', methods=['POST'])
|
|
def extract_pdf():
|
|
json_data = request.get_json()
|
|
|
|
pitchbook_id = json_data["id"]
|
|
pages_data = json_data["extracted_text_per_page"]
|
|
|
|
entities_json = extract(pages_data)
|
|
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json
|
|
|
|
validate_payload = {
|
|
"id": pitchbook_id,
|
|
"service": "spacy",
|
|
"entities": entities
|
|
}
|
|
|
|
requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600)
|
|
return jsonify({"Sent to validate-service"}), 200
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run(host="0.0.0.0", port=5052) |