Refactor coordinator/app.py and add new controllers and models

closes #29
add persistence for pitch-books, spacy model and setttings
pull/40/head
Jaronim Pracht 2025-05-27 13:10:21 +02:00
parent c5f3224c68
commit 141abc725f
12 changed files with 474 additions and 58 deletions

8
.gitignore vendored
View File

@ -2,4 +2,10 @@
.DS_Store .DS_Store
# Python virtual environments # Python virtual environments
.venv/ **/.venv/
**/venv
**/__pycache__
**/.env
**/node_modules
**/build
**/dist

View File

@ -1,66 +1,28 @@
from flask import Flask, jsonify from flask import Flask
from flask import request
import os import os
from dotenv import load_dotenv
from controller import register_routes
from model.database import init_db
app = Flask(__name__) app = Flask(__name__)
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
app.config["SQLALCHEMY_DATABASE_URI"] = DATABASE_URL
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True
app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 # 100 MB
init_db(app)
register_routes(app)
@app.route("/health") @app.route("/health")
def health_check(): def health_check():
return "OK" return "OK"
# gibt Beispiel-Konfig der Kennzahlen zurück (für die UI)
@app.route("/config", methods=["GET"])
def get_config():
config = [
{"name": "Fondname", "format": "Text", "required": True},
{"name": "IRR", "format": "Prozent", "required": False},
]
return jsonify(config)
# liefert Beispiel-Ergebnisse der Extraktion
@app.route("/extraction_results", methods=["GET"])
def get_extraction_results():
results = [
{"label": "Fondname", "entity": "ABC Fonds", "page": 1, "status": "validated"},
{
"label": "IRR",
"entity": "6,0%",
"page": 3,
"status": "single-source",
"source": "spaCy",
},
]
return jsonify(results)
# legt Upload-Ordner an, falls nicht vorhanden
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# nimmt eine PDF-Datei per POST entgegen und speichert sie
@app.route("/upload", methods=["POST"])
def upload_pdf():
if "file" not in request.files:
return {"error": "Keine Datei hochgeladen."}, 400
file = request.files["file"]
if file.filename == "":
return {"error": "Dateiname fehlt."}, 400
if not file.filename.endswith(".pdf"):
return {"error": "Nur PDF-Dateien erlaubt."}, 400
file_path = os.path.join(UPLOAD_FOLDER, file.filename)
file.save(file_path)
return {"message": f"Datei {file.filename} erfolgreich gespeichert!"}, 200
# für Docker wichtig: host='0.0.0.0' # für Docker wichtig: host='0.0.0.0'
if __name__ == "__main__": if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0") app.run(debug=True, host="0.0.0.0")

View File

@ -0,0 +1,9 @@
from controller.spacy_contoller import spacy_controller
from controller.kpi_setting_controller import kpi_setting_controller
from controller.pitch_book_controller import pitch_book_controller
def register_routes(app):
app.register_blueprint(kpi_setting_controller)
app.register_blueprint(pitch_book_controller)
app.register_blueprint(spacy_controller)

View File

@ -0,0 +1,116 @@
from flask import Blueprint, request, jsonify
from model.database import db
from model.kpi_setting_model import KPISettingModel, KPISettingType
kpi_setting_controller = Blueprint(
"kpi_settings", __name__, url_prefix="/api/kpi_setting"
)
@kpi_setting_controller.route("/", methods=["GET"])
def get_all_kpi_settings():
kpi_settings = KPISettingModel.query.all()
return jsonify([kpi_setting.to_dict() for kpi_setting in kpi_settings]), 200
@kpi_setting_controller.route("/<int:id>", methods=["GET"])
def get_kpi_setting(id):
kpi_setting = KPISettingModel.query.get_or_404(id)
return jsonify(kpi_setting.to_dict()), 200
@kpi_setting_controller.route("/", methods=["POST"])
def create_kpi_setting():
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
required_fields = [
"name",
"description",
"mandatory",
"type",
"translation",
"example",
]
for field in required_fields:
if field not in data:
return jsonify({"error": f"Missing required field: {field}"}), 400
# Check if name already exists
existing_kpi = KPISettingModel.query.filter_by(name=data["name"]).first()
if existing_kpi:
return jsonify({"error": "KPI Setting with this name already exists"}), 409
# Validate type enum
try:
kpi_type = KPISettingType(data["type"])
except ValueError:
valid_types = [t.value for t in KPISettingType]
return jsonify({"error": f"Invalid type. Must be one of: {valid_types}"}), 400
new_kpi_setting = KPISettingModel(
name=data["name"],
description=data["description"],
mandatory=data["mandatory"],
type=kpi_type,
translation=data["translation"],
example=data["example"],
)
db.session.add(new_kpi_setting)
db.session.commit()
return jsonify(new_kpi_setting.to_dict()), 201
@kpi_setting_controller.route("/<int:id>", methods=["PUT"])
def update_kpi_setting(id):
kpi_setting = KPISettingModel.query.get_or_404(id)
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
if "name" in data and data["name"] != kpi_setting.name:
existing_kpi = KPISettingModel.query.filter_by(name=data["name"]).first()
if existing_kpi:
return jsonify({"error": "KPI Setting with this name already exists"}), 409
kpi_setting.name = data["name"]
if "description" in data:
kpi_setting.description = data["description"]
if "mandatory" in data:
kpi_setting.mandatory = data["mandatory"]
if "type" in data:
try:
kpi_setting.type = KPISettingType(data["type"])
except ValueError:
valid_types = [t.value for t in KPISettingType]
return (
jsonify({"error": f"Invalid type. Must be one of: {valid_types}"}),
400,
)
if "translation" in data:
kpi_setting.translation = data["translation"]
if "example" in data:
kpi_setting.example = data["example"]
db.session.commit()
return jsonify(kpi_setting.to_dict()), 200
@kpi_setting_controller.route("/<int:id>", methods=["DELETE"])
def delete_kpi_setting(id):
kpi_setting = KPISettingModel.query.get_or_404(id)
db.session.delete(kpi_setting)
db.session.commit()
return jsonify({"message": f"KPI Setting {id} deleted successfully"}), 200

View File

@ -0,0 +1,97 @@
from flask import Blueprint, request, jsonify, send_file
from model.database import db
from model.pitch_book_model import PitchBookModel
from io import BytesIO
from werkzeug.utils import secure_filename
import puremagic
pitch_book_controller = Blueprint("pitch_books", __name__, url_prefix="/api/pitch_book")
@pitch_book_controller.route("/", methods=["GET"])
def get_all_files():
files = PitchBookModel.query.all()
return jsonify([file.to_dict() for file in files]), 200
@pitch_book_controller.route("/<int:id>", methods=["GET"])
def get_file(id):
file = PitchBookModel.query.get_or_404(id)
return jsonify(file.to_dict()), 200
@pitch_book_controller.route("/<int:id>/download", methods=["GET"])
def download_file(id):
file = PitchBookModel.query.get_or_404(id)
return send_file(
BytesIO(file.file), download_name=file.filename, as_attachment=True
)
@pitch_book_controller.route("/", methods=["POST"])
def upload_file():
if "file" not in request.files:
return jsonify({"error": "No file part in the request"}), 400
uploaded_file = request.files["file"]
if uploaded_file.filename == "":
return jsonify({"error": "No selected file"}), 400
# Read file data once
file_data = uploaded_file.read()
try:
if (
uploaded_file
and puremagic.from_string(file_data, mime=True) == "application/pdf"
):
fileName = uploaded_file.filename or ""
new_file = PitchBookModel(
filename=secure_filename(fileName), file=file_data
)
db.session.add(new_file)
db.session.commit()
return jsonify(new_file.to_dict()), 201
except Exception as e:
print(e)
return jsonify({"error": "Invalid file format. Only PDF files are accepted"}), 400
@pitch_book_controller.route("/<int:id>", methods=["PUT"])
def update_file(id):
file = PitchBookModel.query.get_or_404(id)
if "file" in request.files:
uploaded_file = request.files["file"]
if uploaded_file.filename != "":
file.filename = uploaded_file.filename
# Read file data once
file_data = uploaded_file.read()
try:
if (
uploaded_file
and puremagic.from_string(file_data, mime=True) == "application/pdf"
):
file.file = file_data
except Exception as e:
print(e)
if "kpi" in request.form:
file.kpi = request.form.get("kpi")
db.session.commit()
return jsonify(file.to_dict()), 200
@pitch_book_controller.route("/<int:id>", methods=["DELETE"])
def delete_file(id):
file = PitchBookModel.query.get_or_404(id)
db.session.delete(file)
db.session.commit()
return jsonify({"message": f"File {id} deleted successfully"}), 200

View File

@ -0,0 +1,93 @@
from flask import Blueprint, request, jsonify, send_file
from io import BytesIO
from model.spacy_model import SpacyModel
import puremagic
from werkzeug.utils import secure_filename
from model.database import db
spacy_controller = Blueprint("spacy", __name__, url_prefix="/api/spacy")
@spacy_controller.route("/", methods=["GET"])
def get_all_files():
files = SpacyModel.query.all()
return jsonify([file.to_dict() for file in files]), 200
@spacy_controller.route("/<int:id>", methods=["GET"])
def get_file(id):
file = SpacyModel.query.get_or_404(id)
return jsonify(file.to_dict()), 200
@spacy_controller.route("/<int:id>/download", methods=["GET"])
def download_file(id):
file = SpacyModel.query.get_or_404(id)
return send_file(
BytesIO(file.file), download_name=file.filename, as_attachment=True
)
@spacy_controller.route("/", methods=["POST"])
def upload_file():
print(request)
if "file" not in request.files:
return jsonify({"error": "No file part in the request"}), 400
uploaded_file = request.files["file"]
if uploaded_file.filename == "":
return jsonify({"error": "No selected file"}), 400
# Read file data once
file_data = uploaded_file.read()
try:
if uploaded_file:
fileName = uploaded_file.filename or ""
new_file = SpacyModel(filename=secure_filename(fileName), file=file_data)
db.session.add(new_file)
db.session.commit()
return jsonify(new_file.to_dict()), 201
except Exception as e:
print(e)
return jsonify({"error": "Invalid file format. Only PDF files are accepted"}), 400
@spacy_controller.route("/<int:id>", methods=["PUT"])
def update_file(id):
file = SpacyModel.query.get_or_404(id)
if "file" in request.files:
uploaded_file = request.files["file"]
if uploaded_file.filename != "":
file.filename = uploaded_file.filename
# Read file data once
file_data = uploaded_file.read()
try:
if (
uploaded_file
and puremagic.from_string(file_data, mime=True) == "application/pdf"
):
file.file = file_data
except Exception as e:
print(e)
if "kpi" in request.form:
file.kpi = request.form.get("kpi")
db.session.commit()
return jsonify(file.to_dict()), 200
@spacy_controller.route("/<int:id>", methods=["DELETE"])
def delete_file(id):
file = SpacyModel.query.get_or_404(id)
db.session.delete(file)
db.session.commit()
return jsonify({"message": f"File {id} deleted successfully"}), 200

View File

@ -0,0 +1,9 @@
services:
db:
image: postgres
environment:
POSTGRES_PASSWORD: admin
POSTGRES_USER: admin
ports:
- "5432:5432"

View File

@ -0,0 +1,15 @@
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.orm import DeclarativeBase
class Base(DeclarativeBase):
pass
db = SQLAlchemy(model_class=Base)
def init_db(app):
db.init_app(app)
with app.app_context():
db.create_all()

View File

@ -0,0 +1,43 @@
from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import Enum as SQLAlchemyEnum
from enum import Enum
class KPISettingType(Enum):
NUMBER = "number"
STRING = "string"
RANGE = "range"
BOOLEAN = "boolean"
ARRAY = "array"
class KPISettingModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(unique=True)
description: Mapped[str]
mandatory: Mapped[bool]
type: Mapped[KPISettingType] = mapped_column(
SQLAlchemyEnum(KPISettingType, native_enum=True)
)
translation: Mapped[str]
example: Mapped[str]
def to_dict(self):
return {
"id": self.id,
"name": self.name,
"description": self.description,
"mandatory": self.mandatory,
"type": self.type.value,
"translation": self.translation,
"example": self.example,
}
def __init__(self, name, description, mandatory, type, translation, example):
self.name = name
self.description = description
self.mandatory = mandatory
self.type = type
self.translation = translation
self.example = example

View File

@ -0,0 +1,17 @@
from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import LargeBinary
class PitchBookModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
filename: Mapped[str] = mapped_column()
file: Mapped[bytes] = mapped_column(LargeBinary)
kpi: Mapped[str | None]
def to_dict(self):
return {"id": self.id, "filename": self.filename, "kpi": self.kpi}
def __init__(self, filename, file):
self.filename = filename
self.file = file

View File

@ -0,0 +1,22 @@
from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import LargeBinary
from datetime import datetime
class SpacyModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
filename: Mapped[str] = mapped_column()
file: Mapped[bytes] = mapped_column(LargeBinary)
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
def to_dict(self):
return {
"id": self.id,
"filename": self.filename,
"created_at": self.created_at.isoformat(),
}
def __init__(self, filename, file):
self.filename = filename
self.file = file

View File

@ -1,4 +1,31 @@
Flask black==25.1.0
black blinker==1.9.0
flake8 cfgv==3.4.0
pre-commit click==8.2.1
distlib==0.3.9
filelock==3.18.0
flake8==7.2.0
Flask==3.1.1
Flask-SQLAlchemy==3.1.1
greenlet==3.2.2
identify==2.6.12
itsdangerous==2.2.0
Jinja2==3.1.6
MarkupSafe==3.0.2
mccabe==0.7.0
mypy_extensions==1.1.0
nodeenv==1.9.1
packaging==25.0
pathspec==0.12.1
platformdirs==4.3.8
pre_commit==4.2.0
psycopg2-binary==2.9.10
puremagic==1.29
pycodestyle==2.13.0
pyflakes==3.3.2
python-dotenv==1.1.0
PyYAML==6.0.2
SQLAlchemy==2.0.41
typing_extensions==4.13.2
virtualenv==20.31.2
Werkzeug==3.1.3