#26-init-db #40
|
|
@ -2,4 +2,10 @@
|
|||
.DS_Store
|
||||
|
||||
# Python virtual environments
|
||||
.venv/
|
||||
**/.venv/
|
||||
**/venv
|
||||
**/__pycache__
|
||||
**/.env
|
||||
**/node_modules
|
||||
**/build
|
||||
**/dist
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -0,0 +1,4 @@
|
|||
API_KEY=
|
||||
DATABASE_URL=postgresql://admin:admin@db:5432
|
||||
POSTGRES_PASSWORD=admin
|
||||
POSTGRES_USER=admin
|
||||
|
|
@ -4,16 +4,18 @@ FROM python:3.11-alpine
|
|||
# 2. Arbeitsverzeichnis im Container setzen
|
||||
WORKDIR /app
|
||||
|
||||
# 3. requirements.txt kopieren und Pakete installieren
|
||||
# 3. production-style server mit gunicorn
|
||||
RUN pip install gunicorn
|
||||
|
||||
# 4. requirements.txt kopieren und Pakete installieren
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 4. Quellcode kopieren (z. B. app.py)
|
||||
|
||||
# 5. Quellcode kopieren (z.B. app.py)
|
||||
COPY . .
|
||||
|
||||
# 5. Flask-App starten
|
||||
# production-style server mit gunicorn
|
||||
RUN pip install gunicorn
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
EXPOSE 5000
|
||||
|
||||
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
|
||||
|
||||
|
|
@ -1,66 +1,28 @@
|
|||
from flask import Flask, jsonify
|
||||
from flask import request
|
||||
from flask import Flask
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from controller import register_routes
|
||||
from model.database import init_db
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
load_dotenv()
|
||||
DATABASE_URL = os.getenv("DATABASE_URL")
|
||||
|
||||
app.config["SQLALCHEMY_DATABASE_URI"] = DATABASE_URL
|
||||
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True
|
||||
app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 # 100 MB
|
||||
|
||||
init_db(app)
|
||||
|
||||
register_routes(app)
|
||||
|
||||
|
||||
@app.route("/health")
|
||||
def health_check():
|
||||
return "OK"
|
||||
|
||||
|
||||
# gibt Beispiel-Konfig der Kennzahlen zurück (für die UI)
|
||||
@app.route("/config", methods=["GET"])
|
||||
def get_config():
|
||||
config = [
|
||||
{"name": "Fondname", "format": "Text", "required": True},
|
||||
{"name": "IRR", "format": "Prozent", "required": False},
|
||||
]
|
||||
return jsonify(config)
|
||||
|
||||
|
||||
# liefert Beispiel-Ergebnisse der Extraktion
|
||||
@app.route("/extraction_results", methods=["GET"])
|
||||
def get_extraction_results():
|
||||
results = [
|
||||
{"label": "Fondname", "entity": "ABC Fonds", "page": 1, "status": "validated"},
|
||||
{
|
||||
"label": "IRR",
|
||||
"entity": "6,0%",
|
||||
"page": 3,
|
||||
"status": "single-source",
|
||||
"source": "spaCy",
|
||||
},
|
||||
]
|
||||
return jsonify(results)
|
||||
|
||||
|
||||
# legt Upload-Ordner an, falls nicht vorhanden
|
||||
UPLOAD_FOLDER = "uploads"
|
||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||
|
||||
|
||||
# nimmt eine PDF-Datei per POST entgegen und speichert sie
|
||||
@app.route("/upload", methods=["POST"])
|
||||
def upload_pdf():
|
||||
if "file" not in request.files:
|
||||
return {"error": "Keine Datei hochgeladen."}, 400
|
||||
|
||||
file = request.files["file"]
|
||||
|
||||
if file.filename == "":
|
||||
return {"error": "Dateiname fehlt."}, 400
|
||||
|
||||
if not file.filename.endswith(".pdf"):
|
||||
return {"error": "Nur PDF-Dateien erlaubt."}, 400
|
||||
|
||||
file_path = os.path.join(UPLOAD_FOLDER, file.filename)
|
||||
file.save(file_path)
|
||||
|
||||
return {"message": f"Datei {file.filename} erfolgreich gespeichert!"}, 200
|
||||
|
||||
|
||||
# für Docker wichtig: host='0.0.0.0'
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, host="0.0.0.0")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
from controller.spacy_contoller import spacy_controller
|
||||
from controller.kpi_setting_controller import kpi_setting_controller
|
||||
from controller.pitch_book_controller import pitch_book_controller
|
||||
|
||||
|
||||
def register_routes(app):
|
||||
app.register_blueprint(kpi_setting_controller)
|
||||
app.register_blueprint(pitch_book_controller)
|
||||
app.register_blueprint(spacy_controller)
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
from flask import Blueprint, request, jsonify
|
||||
from model.database import db
|
||||
from model.kpi_setting_model import KPISettingModel, KPISettingType
|
||||
|
||||
|
||||
kpi_setting_controller = Blueprint(
|
||||
"kpi_settings", __name__, url_prefix="/api/kpi_setting"
|
||||
)
|
||||
|
||||
|
||||
@kpi_setting_controller.route("/", methods=["GET"])
|
||||
def get_all_kpi_settings():
|
||||
kpi_settings = KPISettingModel.query.all()
|
||||
return jsonify([kpi_setting.to_dict() for kpi_setting in kpi_settings]), 200
|
||||
|
||||
|
||||
@kpi_setting_controller.route("/<int:id>", methods=["GET"])
|
||||
def get_kpi_setting(id):
|
||||
kpi_setting = KPISettingModel.query.get_or_404(id)
|
||||
return jsonify(kpi_setting.to_dict()), 200
|
||||
|
||||
|
||||
@kpi_setting_controller.route("/", methods=["POST"])
|
||||
def create_kpi_setting():
|
||||
data = request.json
|
||||
|
||||
if not data:
|
||||
return jsonify({"error": "No data provided"}), 400
|
||||
|
||||
required_fields = [
|
||||
"name",
|
||||
"description",
|
||||
"mandatory",
|
||||
"type",
|
||||
"translation",
|
||||
"example",
|
||||
]
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
return jsonify({"error": f"Missing required field: {field}"}), 400
|
||||
|
||||
# Check if name already exists
|
||||
existing_kpi = KPISettingModel.query.filter_by(name=data["name"]).first()
|
||||
if existing_kpi:
|
||||
return jsonify({"error": "KPI Setting with this name already exists"}), 409
|
||||
|
||||
# Validate type enum
|
||||
try:
|
||||
kpi_type = KPISettingType(data["type"])
|
||||
except ValueError:
|
||||
valid_types = [t.value for t in KPISettingType]
|
||||
return jsonify({"error": f"Invalid type. Must be one of: {valid_types}"}), 400
|
||||
|
||||
new_kpi_setting = KPISettingModel(
|
||||
name=data["name"],
|
||||
description=data["description"],
|
||||
mandatory=data["mandatory"],
|
||||
type=kpi_type,
|
||||
translation=data["translation"],
|
||||
example=data["example"],
|
||||
)
|
||||
|
||||
db.session.add(new_kpi_setting)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(new_kpi_setting.to_dict()), 201
|
||||
|
||||
|
||||
@kpi_setting_controller.route("/<int:id>", methods=["PUT"])
|
||||
def update_kpi_setting(id):
|
||||
kpi_setting = KPISettingModel.query.get_or_404(id)
|
||||
data = request.json
|
||||
|
||||
if not data:
|
||||
return jsonify({"error": "No data provided"}), 400
|
||||
|
||||
if "name" in data and data["name"] != kpi_setting.name:
|
||||
existing_kpi = KPISettingModel.query.filter_by(name=data["name"]).first()
|
||||
if existing_kpi:
|
||||
return jsonify({"error": "KPI Setting with this name already exists"}), 409
|
||||
kpi_setting.name = data["name"]
|
||||
|
||||
if "description" in data:
|
||||
kpi_setting.description = data["description"]
|
||||
|
||||
if "mandatory" in data:
|
||||
kpi_setting.mandatory = data["mandatory"]
|
||||
|
||||
if "type" in data:
|
||||
try:
|
||||
kpi_setting.type = KPISettingType(data["type"])
|
||||
except ValueError:
|
||||
valid_types = [t.value for t in KPISettingType]
|
||||
return (
|
||||
jsonify({"error": f"Invalid type. Must be one of: {valid_types}"}),
|
||||
400,
|
||||
)
|
||||
|
||||
if "translation" in data:
|
||||
kpi_setting.translation = data["translation"]
|
||||
|
||||
if "example" in data:
|
||||
kpi_setting.example = data["example"]
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(kpi_setting.to_dict()), 200
|
||||
|
||||
|
||||
@kpi_setting_controller.route("/<int:id>", methods=["DELETE"])
|
||||
def delete_kpi_setting(id):
|
||||
kpi_setting = KPISettingModel.query.get_or_404(id)
|
||||
db.session.delete(kpi_setting)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify({"message": f"KPI Setting {id} deleted successfully"}), 200
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
from flask import Blueprint, request, jsonify, send_file
|
||||
from model.database import db
|
||||
from model.pitch_book_model import PitchBookModel
|
||||
from io import BytesIO
|
||||
from werkzeug.utils import secure_filename
|
||||
import puremagic
|
||||
|
||||
|
||||
pitch_book_controller = Blueprint("pitch_books", __name__, url_prefix="/api/pitch_book")
|
||||
|
||||
|
||||
@pitch_book_controller.route("/", methods=["GET"])
|
||||
def get_all_files():
|
||||
files = PitchBookModel.query.all()
|
||||
return jsonify([file.to_dict() for file in files]), 200
|
||||
|
||||
|
||||
@pitch_book_controller.route("/<int:id>", methods=["GET"])
|
||||
def get_file(id):
|
||||
file = PitchBookModel.query.get_or_404(id)
|
||||
return jsonify(file.to_dict()), 200
|
||||
|
||||
|
||||
@pitch_book_controller.route("/<int:id>/download", methods=["GET"])
|
||||
def download_file(id):
|
||||
file = PitchBookModel.query.get_or_404(id)
|
||||
return send_file(
|
||||
BytesIO(file.file), download_name=file.filename, as_attachment=True
|
||||
)
|
||||
|
||||
|
||||
@pitch_book_controller.route("/", methods=["POST"])
|
||||
def upload_file():
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file part in the request"}), 400
|
||||
|
||||
uploaded_file = request.files["file"]
|
||||
if uploaded_file.filename == "":
|
||||
return jsonify({"error": "No selected file"}), 400
|
||||
|
||||
# Read file data once
|
||||
file_data = uploaded_file.read()
|
||||
|
||||
try:
|
||||
if (
|
||||
uploaded_file
|
||||
and puremagic.from_string(file_data, mime=True) == "application/pdf"
|
||||
):
|
||||
fileName = uploaded_file.filename or ""
|
||||
new_file = PitchBookModel(
|
||||
filename=secure_filename(fileName), file=file_data
|
||||
)
|
||||
|
||||
db.session.add(new_file)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(new_file.to_dict()), 201
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return jsonify({"error": "Invalid file format. Only PDF files are accepted"}), 400
|
||||
|
||||
|
||||
@pitch_book_controller.route("/<int:id>", methods=["PUT"])
|
||||
def update_file(id):
|
||||
file = PitchBookModel.query.get_or_404(id)
|
||||
|
||||
if "file" in request.files:
|
||||
uploaded_file = request.files["file"]
|
||||
if uploaded_file.filename != "":
|
||||
file.filename = uploaded_file.filename
|
||||
|
||||
# Read file data once
|
||||
file_data = uploaded_file.read()
|
||||
try:
|
||||
if (
|
||||
uploaded_file
|
||||
and puremagic.from_string(file_data, mime=True) == "application/pdf"
|
||||
):
|
||||
file.file = file_data
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if "kpi" in request.form:
|
||||
file.kpi = request.form.get("kpi")
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(file.to_dict()), 200
|
||||
|
||||
|
||||
@pitch_book_controller.route("/<int:id>", methods=["DELETE"])
|
||||
def delete_file(id):
|
||||
file = PitchBookModel.query.get_or_404(id)
|
||||
db.session.delete(file)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify({"message": f"File {id} deleted successfully"}), 200
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
from flask import Blueprint, request, jsonify, send_file
|
||||
from io import BytesIO
|
||||
|
||||
from model.spacy_model import SpacyModel
|
||||
import puremagic
|
||||
from werkzeug.utils import secure_filename
|
||||
from model.database import db
|
||||
|
||||
|
||||
spacy_controller = Blueprint("spacy", __name__, url_prefix="/api/spacy")
|
||||
|
||||
|
||||
@spacy_controller.route("/", methods=["GET"])
|
||||
def get_all_files():
|
||||
files = SpacyModel.query.all()
|
||||
return jsonify([file.to_dict() for file in files]), 200
|
||||
|
||||
|
||||
@spacy_controller.route("/<int:id>", methods=["GET"])
|
||||
def get_file(id):
|
||||
file = SpacyModel.query.get_or_404(id)
|
||||
return jsonify(file.to_dict()), 200
|
||||
|
||||
|
||||
@spacy_controller.route("/<int:id>/download", methods=["GET"])
|
||||
def download_file(id):
|
||||
file = SpacyModel.query.get_or_404(id)
|
||||
return send_file(
|
||||
BytesIO(file.file), download_name=file.filename, as_attachment=True
|
||||
)
|
||||
|
||||
|
||||
@spacy_controller.route("/", methods=["POST"])
|
||||
def upload_file():
|
||||
print(request)
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file part in the request"}), 400
|
||||
|
||||
uploaded_file = request.files["file"]
|
||||
if uploaded_file.filename == "":
|
||||
return jsonify({"error": "No selected file"}), 400
|
||||
|
||||
# Read file data once
|
||||
file_data = uploaded_file.read()
|
||||
try:
|
||||
if uploaded_file:
|
||||
fileName = uploaded_file.filename or ""
|
||||
new_file = SpacyModel(filename=secure_filename(fileName), file=file_data)
|
||||
|
||||
db.session.add(new_file)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(new_file.to_dict()), 201
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return jsonify({"error": "Invalid file format. Only PDF files are accepted"}), 400
|
||||
|
||||
|
||||
@spacy_controller.route("/<int:id>", methods=["PUT"])
|
||||
def update_file(id):
|
||||
file = SpacyModel.query.get_or_404(id)
|
||||
|
||||
if "file" in request.files:
|
||||
uploaded_file = request.files["file"]
|
||||
if uploaded_file.filename != "":
|
||||
file.filename = uploaded_file.filename
|
||||
|
||||
# Read file data once
|
||||
file_data = uploaded_file.read()
|
||||
try:
|
||||
if (
|
||||
uploaded_file
|
||||
and puremagic.from_string(file_data, mime=True) == "application/pdf"
|
||||
):
|
||||
file.file = file_data
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if "kpi" in request.form:
|
||||
file.kpi = request.form.get("kpi")
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return jsonify(file.to_dict()), 200
|
||||
|
||||
|
||||
@spacy_controller.route("/<int:id>", methods=["DELETE"])
|
||||
def delete_file(id):
|
||||
file = SpacyModel.query.get_or_404(id)
|
||||
db.session.delete(file)
|
||||
db.session.commit()
|
||||
|
||||
return jsonify({"message": f"File {id} deleted successfully"}), 200
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
services:
|
||||
db:
|
||||
image: postgres
|
||||
environment:
|
||||
POSTGRES_PASSWORD: admin
|
||||
POSTGRES_USER: admin
|
||||
ports:
|
||||
- "5432:5432"
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
from flask_sqlalchemy import SQLAlchemy
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
db = SQLAlchemy(model_class=Base)
|
||||
|
||||
|
||||
def init_db(app):
|
||||
db.init_app(app)
|
||||
with app.app_context():
|
||||
db.create_all()
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
from model.database import db
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy import Enum as SQLAlchemyEnum
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class KPISettingType(Enum):
|
||||
NUMBER = "number"
|
||||
STRING = "string"
|
||||
RANGE = "range"
|
||||
BOOLEAN = "boolean"
|
||||
ARRAY = "array"
|
||||
|
||||
|
||||
class KPISettingModel(db.Model):
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
name: Mapped[str] = mapped_column(unique=True)
|
||||
description: Mapped[str]
|
||||
mandatory: Mapped[bool]
|
||||
type: Mapped[KPISettingType] = mapped_column(
|
||||
SQLAlchemyEnum(KPISettingType, native_enum=True)
|
||||
)
|
||||
translation: Mapped[str]
|
||||
example: Mapped[str]
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"mandatory": self.mandatory,
|
||||
"type": self.type.value,
|
||||
"translation": self.translation,
|
||||
"example": self.example,
|
||||
}
|
||||
|
||||
def __init__(self, name, description, mandatory, type, translation, example):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.mandatory = mandatory
|
||||
self.type = type
|
||||
self.translation = translation
|
||||
self.example = example
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
from model.database import db
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy import LargeBinary
|
||||
|
||||
|
||||
class PitchBookModel(db.Model):
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
filename: Mapped[str] = mapped_column()
|
||||
file: Mapped[bytes] = mapped_column(LargeBinary)
|
||||
kpi: Mapped[str | None]
|
||||
|
||||
def to_dict(self):
|
||||
return {"id": self.id, "filename": self.filename, "kpi": self.kpi}
|
||||
|
||||
def __init__(self, filename, file):
|
||||
self.filename = filename
|
||||
self.file = file
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
from model.database import db
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy import LargeBinary
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class SpacyModel(db.Model):
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
filename: Mapped[str] = mapped_column()
|
||||
file: Mapped[bytes] = mapped_column(LargeBinary)
|
||||
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"filename": self.filename,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
}
|
||||
|
||||
def __init__(self, filename, file):
|
||||
self.filename = filename
|
||||
self.file = file
|
||||
|
|
@ -1,4 +1,31 @@
|
|||
Flask
|
||||
black
|
||||
flake8
|
||||
pre-commit
|
||||
black==25.1.0
|
||||
blinker==1.9.0
|
||||
cfgv==3.4.0
|
||||
click==8.2.1
|
||||
distlib==0.3.9
|
||||
filelock==3.18.0
|
||||
flake8==7.2.0
|
||||
Flask==3.1.1
|
||||
Flask-SQLAlchemy==3.1.1
|
||||
greenlet==3.2.2
|
||||
identify==2.6.12
|
||||
itsdangerous==2.2.0
|
||||
Jinja2==3.1.6
|
||||
MarkupSafe==3.0.2
|
||||
mccabe==0.7.0
|
||||
mypy_extensions==1.1.0
|
||||
nodeenv==1.9.1
|
||||
packaging==25.0
|
||||
pathspec==0.12.1
|
||||
platformdirs==4.3.8
|
||||
pre_commit==4.2.0
|
||||
psycopg2-binary==2.9.10
|
||||
puremagic==1.29
|
||||
pycodestyle==2.13.0
|
||||
pyflakes==3.3.2
|
||||
python-dotenv==1.1.0
|
||||
PyYAML==6.0.2
|
||||
SQLAlchemy==2.0.41
|
||||
typing_extensions==4.13.2
|
||||
virtualenv==20.31.2
|
||||
Werkzeug==3.1.3
|
||||
|
|
|
|||
|
|
@ -1,14 +0,0 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
EXPOSE 5050
|
||||
|
||||
CMD ["python", "app.py"]
|
||||
|
|
@ -7,11 +7,13 @@ RUN apt-get update && apt-get install -y \
|
|||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY .. /app
|
||||
COPY requirements.txt /app
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
RUN python -m spacy download en_core_web_sm
|
||||
|
||||
COPY .. /app
|
||||
|
||||
CMD ["python3.12", "app.py"]
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -0,0 +1,44 @@
|
|||
services:
|
||||
frontend:
|
||||
build:
|
||||
context: frontend
|
||||
ports:
|
||||
- 8080:80
|
||||
db:
|
||||
image: postgres:17-alpine
|
||||
env_file:
|
||||
- .env
|
||||
# ports:
|
||||
# - "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U admin"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
coordinator:
|
||||
build:
|
||||
context: backend/coordinator
|
||||
dockerfile: ../../Dockerfile
|
||||
env_file:
|
||||
- .env
|
||||
depends_on:
|
||||
- db
|
||||
healthcheck:
|
||||
test: wget --spider --no-verbose http://127.0.0.1:5000/health || exit 1
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
ports:
|
||||
- 5000:5000
|
||||
|
||||
spacy:
|
||||
build:
|
||||
context: backend/spacy-service
|
||||
|
||||
exxeta:
|
||||
build:
|
||||
context: backend/exxetaGPT
|
||||
dockerfile: ../../Dockerfile
|
||||
env_file:
|
||||
- .env
|
||||
Binary file not shown.
|
|
@ -0,0 +1 @@
|
|||
.env
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
{
|
||||
"Fondsname": {
|
||||
"value": "Real Estate Prime Europe",
|
||||
"page_number": 2,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Fondsmanager": {
|
||||
"value": null,
|
||||
"page_number": null,
|
||||
"confidence_level": "low"
|
||||
},
|
||||
"Name_Kapitalverwaltungsgesellschaft": {
|
||||
"value": "Real Estate",
|
||||
"page_number": 5,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
"Datum": {
|
||||
"value": "End of December 2018",
|
||||
"page_number": 12,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
"Risikoprofil": {
|
||||
"value": "Core/Core+",
|
||||
"page_number": 10,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Artikel_gem_SFDR": {
|
||||
"value": null,
|
||||
"page_number": null,
|
||||
"confidence_level": "low"
|
||||
},
|
||||
"Zielrendite_über_die_Fondslaufzeit": {
|
||||
"value": "IRR: 6%-7%",
|
||||
"page_number": 10,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Rendite_seit_Auflage": {
|
||||
"value": "Total return: 5.3%, 16.1%, 13.6%, 8.9%, 12.8% for years 2015-2018",
|
||||
"page_number": 12,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Zielausschüttungsrendite_über_die_Fondslaufzeit": {
|
||||
"value": "Cash on Cash: 4%-5%",
|
||||
"page_number": 10,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Ausschüttungsrendite_seit_Auflage": {
|
||||
"value": "Unlevered Cash-on-cash examples: 4.14%, 3.31%",
|
||||
"page_number": 33,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
"Laufzeit": {
|
||||
"value": "Open-ended",
|
||||
"page_number": 9,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"LTV_Loan_to_Value": {
|
||||
"value": "50% max at asset and fund level",
|
||||
"page_number": 10,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Soll_Ist": {
|
||||
"value": "59.8%",
|
||||
"page_number": 12,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
"Ziel": {
|
||||
"value": "Target 40-45%",
|
||||
"page_number": 10,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Managementgebühren_Bezogen_auf_NAV_Net_Asset_Value": {
|
||||
"value": "Sliding scale 55-50-40bp x NAV",
|
||||
"page_number": 10,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Sektorenallokation": {
|
||||
"value": {
|
||||
"Office": "75.6%",
|
||||
"Retail": "13.8%",
|
||||
"Hotels": "4.3%",
|
||||
"Industrial/logistics": "1.0%",
|
||||
"Residential": "0.4%",
|
||||
"Others": "4.9%"
|
||||
},
|
||||
"page_number": 5,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
"Länderallokation": {
|
||||
"value": {
|
||||
"Germany": null,
|
||||
"France": null,
|
||||
"Italy": null,
|
||||
"Netherlands": null,
|
||||
"United Kingdom": null,
|
||||
"Czech Republic": null,
|
||||
"Luxembourg": null,
|
||||
"Spain": null,
|
||||
"Finland": null
|
||||
},
|
||||
"page_number": 16,
|
||||
"confidence_level": "medium"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
{
|
||||
"details": [
|
||||
{
|
||||
"key": "Fondsname",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Fondsmanager",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Name Kapitalverwaltungsgesellschaft",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Datum",
|
||||
"value": "31 March 2024",
|
||||
"page": 5,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Risikoprofil",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Artikel gem. SFDR",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Zielrendite über die Fondslaufzeit",
|
||||
"value": "7%+ Net Investors' long-term return",
|
||||
"page": 1,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Rendite seit Auflage",
|
||||
"value": "+6.0% annualised Total Return",
|
||||
"page": 27,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Zielausschüttungsrendite über die Fondslaufzeit",
|
||||
"value": "4.0%",
|
||||
"page": 27,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Ausschüttungsrendite seit Auflage",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Laufzeit",
|
||||
"value": "Open-ended",
|
||||
"page": 30,
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"key": "LTV (Loan-to-Value)",
|
||||
"value": "18.7%",
|
||||
"page": 16,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Soll/Ist",
|
||||
"value": "Not directly mentioned",
|
||||
"page": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Ziel",
|
||||
"value": "Achieve a resilient income performance and a long-term capital appreciation",
|
||||
"page": 1,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Managementgebühren Bezogen auf NAV (Net Asset Value)",
|
||||
"value": "Between 70bps and 125bps based on ticket size",
|
||||
"page": 34,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Sektorenallokation",
|
||||
"value": [
|
||||
"Office",
|
||||
"Residential",
|
||||
"Industrial/Logistics",
|
||||
"Retail",
|
||||
"Hotels"
|
||||
],
|
||||
"page": 36,
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"key": "Länderallokation",
|
||||
"value": ["Europe", "North America", "Asia Pacific"],
|
||||
"page": 36,
|
||||
"confidence": "medium"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
{
|
||||
"information": [
|
||||
{
|
||||
"key": "Fondsname",
|
||||
"value": "Europäische Logistikstrategie",
|
||||
"page_number": 1,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Fondsmanager",
|
||||
"value": "Nicht direkt erwähnter spezifischer Name",
|
||||
"page_number": 5,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Name Kapitalverwaltungsgesellschaft",
|
||||
"value": "Nicht direkt erwähnt",
|
||||
"page_number": 5,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Datum",
|
||||
"value": "30.06.2023",
|
||||
"page_number": 12,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Risikoprofil",
|
||||
"value": "Halten-Strategie",
|
||||
"page_number": 2,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Artikel gem. SFDR",
|
||||
"value": "Artikel 8",
|
||||
"page_number": 8,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Zielrendite über die Fondslaufzeit",
|
||||
"value": "5,00-5,25%",
|
||||
"page_number": 2,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Rendite seit Auflage",
|
||||
"value": "Nicht direkt erwähnt",
|
||||
"page_number": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Zielausschüttungsrendite über die Fondslaufzeit",
|
||||
"value": "5,00-5,25%",
|
||||
"page_number": 6,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Ausschüttungsrendite seit Auflage",
|
||||
"value": "Nicht direkt erwähnt",
|
||||
"page_number": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Laufzeit",
|
||||
"value": "Auf Basis langfristiger Strategien und Verträge",
|
||||
"page_number": 2,
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"key": "LTV (Loan-to-Value)",
|
||||
"value": "25-40%",
|
||||
"page_number": 2,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Soll/Ist",
|
||||
"value": "Nicht direkt erwähnt",
|
||||
"page_number": null,
|
||||
"confidence": "low"
|
||||
},
|
||||
{
|
||||
"key": "Ziel",
|
||||
"value": "Langfristiges Halten und Management von Core+ Logistikimmobilien",
|
||||
"page_number": 2,
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"key": "Managementgebühren Bezogen auf NAV (Net Asset Value)",
|
||||
"value": "60 bps p.a.",
|
||||
"page_number": 26,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Sektorenallokation",
|
||||
"value": "Logistikimmobilien",
|
||||
"page_number": 5,
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"key": "Länderallokation",
|
||||
"value": "Niederlande, Frankreich, Skandinavien, Deutschland",
|
||||
"page_number": 2,
|
||||
"confidence": "high"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
{
|
||||
"extracted_information": [
|
||||
{
|
||||
"key": "Fondsname",
|
||||
"value": "Core Plus Open-ended Fund",
|
||||
"page_number": 2,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Fondsmanager",
|
||||
"value": "Specialist Nordic Manager",
|
||||
"page_number": 2,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
{
|
||||
"key": "Name Kapitalverwaltungsgesellschaft",
|
||||
"value": "Capital Management is an AIFM supervised by the CSSF in Luxembourg",
|
||||
"page_number": 36,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Datum",
|
||||
"value": "August 2024",
|
||||
"page_number": 0,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Risikoprofil",
|
||||
"value": "Prioritizing assets where strong growth is expected over the next decade",
|
||||
"page_number": 2,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
{
|
||||
"key": "Artikel gem. SFDR",
|
||||
"value": "Article 8 of the Sustainable Financial Disclosure Regulation (SFDR)",
|
||||
"page_number": 16,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Zielrendite über die Fondslaufzeit",
|
||||
"value": "7-8% net total annual return",
|
||||
"page_number": 2,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Rendite seit Auflage",
|
||||
"value": "Realized Gross IRR® (Max)",
|
||||
"page_number": 5,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
{
|
||||
"key": "Zielausschüttungsrendite über die Fondslaufzeit",
|
||||
"value": "3-4% dividend yield",
|
||||
"page_number": 2,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Ausschüttungsrendite seit Auflage",
|
||||
"value": "NA (specific historical payout not provided)",
|
||||
"page_number": null,
|
||||
"confidence_level": "low"
|
||||
},
|
||||
{
|
||||
"key": "Laufzeit",
|
||||
"value": "Open-ended with an initial 24-month lock-in for new investors",
|
||||
"page_number": 36,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "LTV (Loan-to-Value)",
|
||||
"value": "Target LTV of 35% (capped at 37.5%)",
|
||||
"page_number": 11,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Soll/Ist",
|
||||
"value": "Estimated CAPEX / Current Yield and Occupancy",
|
||||
"page_number": 28,
|
||||
"confidence_level": "medium"
|
||||
},
|
||||
{
|
||||
"key": "Ziel",
|
||||
"value": "Targeting properties that are comparatively sustainable and aligned with EU Taxonomy",
|
||||
"page_number": 11,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Managementgebühren Bezogen auf NAV (Net Asset Value)",
|
||||
"value": "Management fee of 85 bps on NAV",
|
||||
"page_number": 36,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Sektorenallokation",
|
||||
"value": "Logistics, Residential, Office",
|
||||
"page_number": 12,
|
||||
"confidence_level": "high"
|
||||
},
|
||||
{
|
||||
"key": "Länderallokation",
|
||||
"value": "Sweden, Norway, Denmark, Finland",
|
||||
"page_number": 12,
|
||||
"confidence_level": "high"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
import fitz # PyMuPDF
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from openai import AzureOpenAI
|
||||
|
||||
load_dotenv()
|
||||
|
||||
BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai"
|
||||
API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
# Open the PDF file
|
||||
doc = fitz.open(pdf_path)
|
||||
|
|
||||
|
||||
# Initialize a variable to store the extracted text
|
||||
extracted_text = ""
|
||||
|
||||
# Iterate through each page
|
||||
for page_num in range(len(doc)):
|
||||
page = doc.load_page(page_num) # Load the page
|
||||
text = page.get_text() # Extract text from the page
|
||||
extracted_text += "[Page " + str(page_num) + "]\n" + text + "\n"
|
||||
|
||||
return extracted_text
|
||||
|
||||
# Example usage
|
||||
pdf_document = "../../pitch-books/Teaser 2 FINAL.pdf"
|
||||
# pdf_document = "../../pitch-books/Pitchbook 4.pdf"
|
||||
text = extract_text_from_pdf(pdf_document)
|
||||
print(text)
|
||||
|
||||
client = AzureOpenAI(api_key=API_KEY, base_url=BASE_URL, api_version="2023-07-01-preview")
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant that always responds in JSON format. Your responses should be valid JSON objects without any explanatory text outside the JSON structure. Structure your answers appropriately with keys and values relevant to the user's query."},
|
||||
{"role": "user", "content": """
|
||||
Extract from the text the following information:
|
||||
- Fondsname
|
||||
- Fondsmanager
|
||||
- Name Kapitalverwaltungsgesellschaft
|
||||
- Datum
|
||||
- Risikoprofil
|
||||
- Artikel gem. SFDR
|
||||
- Zielrendite über die Fondslaufzeit
|
||||
- Rendite seit Auflage
|
||||
- Zielausschüttungsrendite über die Fondslaufzeit
|
||||
- Ausschüttungsrendite seit Auflage
|
||||
- Laufzeit
|
||||
- LTV (Loan-to-Value)
|
||||
- Soll/Ist
|
||||
- Ziel
|
||||
- Managementgebühren Bezogen auf NAV (Net Asset Value)
|
||||
- Sektorenallokation
|
||||
- Länderallokation
|
||||
|
||||
for each value in the list:
|
||||
- the key
|
||||
- the value
|
||||
- the page number
|
||||
- theconfidence level (high/medium/low)(that the value is correct)
|
||||
|
||||
TEXT TO ANALYZE:
|
||||
""" + text + """
|
||||
"""},
|
||||
],
|
||||
response_format={"type": "json_object"}
|
||||
)
|
||||
|
||||
print(response.choices[0].message.content)
|
||||
Loading…
Reference in New Issue
Fehlt Fehler-Handling -> wäre gut eine try-except beim Öffnen von pdf hinzufügen. Damit das Programm auf fehlerhafte Dateien gut reagieren.
Das ist in den protoypes, das ist nicht unser "reales" project