From ed7e01a39578226b912e1dc49acfdb6a278026f0 Mon Sep 17 00:00:00 2001 From: Abdulraahman Dabbagh <1924466@stud.hs-mannheim.de> Date: Sun, 25 May 2025 10:43:44 +0200 Subject: [PATCH] Backend Flask aufsetzen (Ticket #4) --- .flake8 | 3 + .pre-commit-config.yaml | 11 +++ docker-compose.yml | 14 +++ project/backend/Dockerfile | 15 ++++ project/backend/README.md | 85 +++++++++++++++++++ project/backend/app.py | 67 +++++++++++++++ project/backend/requirements.txt | 4 + .../fine_tuning_spaCy/annotated_data.json | 2 + prototypes/fine_tuning_spaCy/neue_kennzahl.py | 17 ++++ 9 files changed, 218 insertions(+) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml create mode 100644 docker-compose.yml create mode 100644 project/backend/Dockerfile create mode 100644 project/backend/README.md create mode 100644 project/backend/app.py create mode 100644 project/backend/requirements.txt create mode 100644 prototypes/fine_tuning_spaCy/annotated_data.json create mode 100644 prototypes/fine_tuning_spaCy/neue_kennzahl.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..e2f249a --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +# .flake8 +[flake8] +max-line-length = 88 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9fcd752 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + language_version: python3 + + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 + hooks: + - id: flake8 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ae56aab --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,14 @@ +services: + backend: + build: ./project/backend + container_name: fundfuechse-backend + ports: + - "5000:5000" + restart: always + + # frontend: + # build: ./project/frontend + # container_name: fundfuechse-frontend + # ports: + # - "3000:80" + # restart: always diff --git a/project/backend/Dockerfile b/project/backend/Dockerfile new file mode 100644 index 0000000..196f4bc --- /dev/null +++ b/project/backend/Dockerfile @@ -0,0 +1,15 @@ +# 1. Python-Image verwenden +FROM python:3.11-slim + +# 2. Arbeitsverzeichnis im Container setzen +WORKDIR /app + +# 3. requirements.txt kopieren und Pakete installieren +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 4. Quellcode kopieren (z. B. app.py) +COPY . . + +# 5. Flask-App starten +CMD ["python", "app.py"] diff --git a/project/backend/README.md b/project/backend/README.md new file mode 100644 index 0000000..67e6f69 --- /dev/null +++ b/project/backend/README.md @@ -0,0 +1,85 @@ +## Setup + +### Voraussetzungen + +- Python 3.11+ +- pip +- Docker (Desktop) +- Optional: `pre-commit` + +### Abhängigkeiten installieren + +```bash +pip install -r requirements.txt + +# Codequalität (lokal prüfen) +black app.py +flake8 app.py + + +## Anwendung starten + +### Lokal + +1. Abhängigkeiten installieren: + +```bash +pip install -r requirements.txt +``` + +2. Flask-App starten: + +```bash +python app.py +``` + +3. Aufrufen im Browser: + +``` +http://localhost:5000/ +``` + +--- + +### Option 2: Mit Docker + +1. Image bauen: + +```bash +docker build -t fundfuechse-backend . +``` + +2. Container starten: + +```bash +docker run -p 5000:5000 fundfuechse-backend +``` + +Die API läuft dann unter: + +``` +http://localhost:5000/ +``` + +--- + +### Option 3: Mit docker-compose + +```bash +docker-compose up --build +``` + +Danach ist der Service erreichbar unter: + +``` +http://localhost:5000/ +``` + +--- + +### Testaufruf per curl (PDF hochladen) + +```bash +curl.exe -X POST -F "file=@Pitchbook 1.pdf" http://localhost:5000/upload +``` + diff --git a/project/backend/app.py b/project/backend/app.py new file mode 100644 index 0000000..ecb21ef --- /dev/null +++ b/project/backend/app.py @@ -0,0 +1,67 @@ +from flask import Flask, jsonify +from flask import request +import os + +app = Flask(__name__) + + +# Startseite +@app.route("/") +def hello(): + return "Startseite" + + +# gibt Beispiel-Konfig der Kennzahlen zurück (für die UI) +@app.route("/config", methods=["GET"]) +def get_config(): + config = [ + {"name": "Fondname", "format": "Text", "required": True}, + {"name": "IRR", "format": "Prozent", "required": False}, + ] + return jsonify(config) + + +# liefert Beispiel-Ergebnisse der Extraktion +@app.route("/dummy_results", methods=["GET"]) +def get_dummy_results(): + results = [ + {"label": "Fondname", "entity": "ABC Fonds", "page": 1, "status": "validated"}, + { + "label": "IRR", + "entity": "6,0%", + "page": 3, + "status": "single-source", + "source": "spaCy", + }, + ] + return jsonify(results) + + +# legt Upload-Ordner an, falls nicht vorhanden +UPLOAD_FOLDER = "uploads" +os.makedirs(UPLOAD_FOLDER, exist_ok=True) + + +# nimmt eine PDF-Datei per POST entgegen und speichert sie +@app.route("/upload", methods=["POST"]) +def upload_pdf(): + if "file" not in request.files: + return {"error": "Keine Datei hochgeladen."}, 400 + + file = request.files["file"] + + if file.filename == "": + return {"error": "Dateiname fehlt."}, 400 + + if not file.filename.endswith(".pdf"): + return {"error": "Nur PDF-Dateien erlaubt."}, 400 + + file_path = os.path.join(UPLOAD_FOLDER, file.filename) + file.save(file_path) + + return {"message": f"Datei {file.filename} erfolgreich gespeichert!"}, 200 + + +# für Docker wichtig: host='0.0.0.0' +if __name__ == "__main__": + app.run(debug=True, host="0.0.0.0") diff --git a/project/backend/requirements.txt b/project/backend/requirements.txt new file mode 100644 index 0000000..8822904 --- /dev/null +++ b/project/backend/requirements.txt @@ -0,0 +1,4 @@ +Flask +black +flake8 +pre-commit diff --git a/prototypes/fine_tuning_spaCy/annotated_data.json b/prototypes/fine_tuning_spaCy/annotated_data.json new file mode 100644 index 0000000..728d74a --- /dev/null +++ b/prototypes/fine_tuning_spaCy/annotated_data.json @@ -0,0 +1,2 @@ +{"text": "Das geplante Projektvolumen beträgt 120 Mio. €.", "entities": [[28, 44, "KENNZAHL"]]} +{"text": "Das geplante Projektvolumen beträgt 120 Mio. €.", "entities": [[28, 44, "KENNZAHL"]]} diff --git a/prototypes/fine_tuning_spaCy/neue_kennzahl.py b/prototypes/fine_tuning_spaCy/neue_kennzahl.py new file mode 100644 index 0000000..696b851 --- /dev/null +++ b/prototypes/fine_tuning_spaCy/neue_kennzahl.py @@ -0,0 +1,17 @@ +import streamlit as st +import json + +st.title("Neue Kennzahl annotieren") + +text = st.text_area("Text", "Das geplante Projektvolumen beträgt 120 Mio. €.") +start = st.number_input("Start-Position", min_value=0, max_value=len(text), value=28) +end = st.number_input("End-Position", min_value=0, max_value=len(text), value=44) +label = st.text_input("Label (z. B. KENNZAHL)", "KENNZAHL") + +if st.button("Speichern"): + example = {"text": text, "entities": [[start, end, label]]} + + with open("annotated_data.json", "a", encoding="utf-8") as f: + f.write(json.dumps(example, ensure_ascii=False) + "\n") + + st.success("✅ Annotation gespeichert!")