#26-init-db #40

Merged
1924466 merged 6 commits from #26-init-db into main 2025-05-28 16:14:54 +02:00
26 changed files with 1033 additions and 79 deletions

8
.gitignore vendored
View File

@ -2,4 +2,10 @@
.DS_Store
# Python virtual environments
.venv/
**/.venv/
**/venv
**/__pycache__
**/.env
**/node_modules
**/build
**/dist

Binary file not shown.

View File

@ -0,0 +1,4 @@
API_KEY=
DATABASE_URL=postgresql://admin:admin@db:5432
POSTGRES_PASSWORD=admin
POSTGRES_USER=admin

View File

@ -4,16 +4,18 @@ FROM python:3.11-alpine
# 2. Arbeitsverzeichnis im Container setzen
WORKDIR /app
# 3. requirements.txt kopieren und Pakete installieren
# 3. production-style server mit gunicorn
RUN pip install gunicorn
# 4. requirements.txt kopieren und Pakete installieren
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 4. Quellcode kopieren (z.B. app.py)
# 5. Quellcode kopieren (z.B. app.py)
COPY . .
# 5. Flask-App starten
# production-style server mit gunicorn
RUN pip install gunicorn
ENV PYTHONUNBUFFERED=1
EXPOSE 5000
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]

View File

@ -1,66 +1,28 @@
from flask import Flask, jsonify
from flask import request
from flask import Flask
import os
from dotenv import load_dotenv
from controller import register_routes
from model.database import init_db
app = Flask(__name__)
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
app.config["SQLALCHEMY_DATABASE_URI"] = DATABASE_URL
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True
app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 # 100 MB
init_db(app)
register_routes(app)
@app.route("/health")
def health_check():
return "OK"
# gibt Beispiel-Konfig der Kennzahlen zurück (für die UI)
@app.route("/config", methods=["GET"])
def get_config():
config = [
{"name": "Fondname", "format": "Text", "required": True},
{"name": "IRR", "format": "Prozent", "required": False},
]
return jsonify(config)
# liefert Beispiel-Ergebnisse der Extraktion
@app.route("/extraction_results", methods=["GET"])
def get_extraction_results():
results = [
{"label": "Fondname", "entity": "ABC Fonds", "page": 1, "status": "validated"},
{
"label": "IRR",
"entity": "6,0%",
"page": 3,
"status": "single-source",
"source": "spaCy",
},
]
return jsonify(results)
# legt Upload-Ordner an, falls nicht vorhanden
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# nimmt eine PDF-Datei per POST entgegen und speichert sie
@app.route("/upload", methods=["POST"])
def upload_pdf():
if "file" not in request.files:
return {"error": "Keine Datei hochgeladen."}, 400
file = request.files["file"]
if file.filename == "":
return {"error": "Dateiname fehlt."}, 400
if not file.filename.endswith(".pdf"):
return {"error": "Nur PDF-Dateien erlaubt."}, 400
file_path = os.path.join(UPLOAD_FOLDER, file.filename)
file.save(file_path)
return {"message": f"Datei {file.filename} erfolgreich gespeichert!"}, 200
# für Docker wichtig: host='0.0.0.0'
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0")

View File

@ -0,0 +1,9 @@
from controller.spacy_contoller import spacy_controller
from controller.kpi_setting_controller import kpi_setting_controller
from controller.pitch_book_controller import pitch_book_controller
def register_routes(app):
app.register_blueprint(kpi_setting_controller)
app.register_blueprint(pitch_book_controller)
app.register_blueprint(spacy_controller)

View File

@ -0,0 +1,116 @@
from flask import Blueprint, request, jsonify
from model.database import db
from model.kpi_setting_model import KPISettingModel, KPISettingType
kpi_setting_controller = Blueprint(
"kpi_settings", __name__, url_prefix="/api/kpi_setting"
)
@kpi_setting_controller.route("/", methods=["GET"])
def get_all_kpi_settings():
kpi_settings = KPISettingModel.query.all()
return jsonify([kpi_setting.to_dict() for kpi_setting in kpi_settings]), 200
@kpi_setting_controller.route("/<int:id>", methods=["GET"])
def get_kpi_setting(id):
kpi_setting = KPISettingModel.query.get_or_404(id)
return jsonify(kpi_setting.to_dict()), 200
@kpi_setting_controller.route("/", methods=["POST"])
def create_kpi_setting():
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
required_fields = [
"name",
"description",
"mandatory",
"type",
"translation",
"example",
]
for field in required_fields:
if field not in data:
return jsonify({"error": f"Missing required field: {field}"}), 400
# Check if name already exists
existing_kpi = KPISettingModel.query.filter_by(name=data["name"]).first()
if existing_kpi:
return jsonify({"error": "KPI Setting with this name already exists"}), 409
# Validate type enum
try:
kpi_type = KPISettingType(data["type"])
except ValueError:
valid_types = [t.value for t in KPISettingType]
return jsonify({"error": f"Invalid type. Must be one of: {valid_types}"}), 400
new_kpi_setting = KPISettingModel(
name=data["name"],
description=data["description"],
mandatory=data["mandatory"],
type=kpi_type,
translation=data["translation"],
example=data["example"],
)
db.session.add(new_kpi_setting)
db.session.commit()
return jsonify(new_kpi_setting.to_dict()), 201
@kpi_setting_controller.route("/<int:id>", methods=["PUT"])
def update_kpi_setting(id):
kpi_setting = KPISettingModel.query.get_or_404(id)
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
if "name" in data and data["name"] != kpi_setting.name:
existing_kpi = KPISettingModel.query.filter_by(name=data["name"]).first()
if existing_kpi:
return jsonify({"error": "KPI Setting with this name already exists"}), 409
kpi_setting.name = data["name"]
if "description" in data:
kpi_setting.description = data["description"]
if "mandatory" in data:
kpi_setting.mandatory = data["mandatory"]
if "type" in data:
try:
kpi_setting.type = KPISettingType(data["type"])
except ValueError:
valid_types = [t.value for t in KPISettingType]
return (
jsonify({"error": f"Invalid type. Must be one of: {valid_types}"}),
400,
)
if "translation" in data:
kpi_setting.translation = data["translation"]
if "example" in data:
kpi_setting.example = data["example"]
db.session.commit()
return jsonify(kpi_setting.to_dict()), 200
@kpi_setting_controller.route("/<int:id>", methods=["DELETE"])
def delete_kpi_setting(id):
kpi_setting = KPISettingModel.query.get_or_404(id)
db.session.delete(kpi_setting)
db.session.commit()
return jsonify({"message": f"KPI Setting {id} deleted successfully"}), 200

View File

@ -0,0 +1,97 @@
from flask import Blueprint, request, jsonify, send_file
from model.database import db
from model.pitch_book_model import PitchBookModel
from io import BytesIO
from werkzeug.utils import secure_filename
import puremagic
pitch_book_controller = Blueprint("pitch_books", __name__, url_prefix="/api/pitch_book")
@pitch_book_controller.route("/", methods=["GET"])
def get_all_files():
files = PitchBookModel.query.all()
return jsonify([file.to_dict() for file in files]), 200
@pitch_book_controller.route("/<int:id>", methods=["GET"])
def get_file(id):
file = PitchBookModel.query.get_or_404(id)
return jsonify(file.to_dict()), 200
@pitch_book_controller.route("/<int:id>/download", methods=["GET"])
def download_file(id):
file = PitchBookModel.query.get_or_404(id)
return send_file(
BytesIO(file.file), download_name=file.filename, as_attachment=True
)
@pitch_book_controller.route("/", methods=["POST"])
def upload_file():
if "file" not in request.files:
return jsonify({"error": "No file part in the request"}), 400
uploaded_file = request.files["file"]
if uploaded_file.filename == "":
return jsonify({"error": "No selected file"}), 400
# Read file data once
file_data = uploaded_file.read()
try:
if (
uploaded_file
and puremagic.from_string(file_data, mime=True) == "application/pdf"
):
fileName = uploaded_file.filename or ""
new_file = PitchBookModel(
filename=secure_filename(fileName), file=file_data
)
db.session.add(new_file)
db.session.commit()
return jsonify(new_file.to_dict()), 201
except Exception as e:
print(e)
return jsonify({"error": "Invalid file format. Only PDF files are accepted"}), 400
@pitch_book_controller.route("/<int:id>", methods=["PUT"])
def update_file(id):
file = PitchBookModel.query.get_or_404(id)
if "file" in request.files:
uploaded_file = request.files["file"]
if uploaded_file.filename != "":
file.filename = uploaded_file.filename
# Read file data once
file_data = uploaded_file.read()
try:
if (
uploaded_file
and puremagic.from_string(file_data, mime=True) == "application/pdf"
):
file.file = file_data
except Exception as e:
print(e)
if "kpi" in request.form:
file.kpi = request.form.get("kpi")
db.session.commit()
return jsonify(file.to_dict()), 200
@pitch_book_controller.route("/<int:id>", methods=["DELETE"])
def delete_file(id):
file = PitchBookModel.query.get_or_404(id)
db.session.delete(file)
db.session.commit()
return jsonify({"message": f"File {id} deleted successfully"}), 200

View File

@ -0,0 +1,93 @@
from flask import Blueprint, request, jsonify, send_file
from io import BytesIO
from model.spacy_model import SpacyModel
import puremagic
from werkzeug.utils import secure_filename
from model.database import db
spacy_controller = Blueprint("spacy", __name__, url_prefix="/api/spacy")
@spacy_controller.route("/", methods=["GET"])
def get_all_files():
files = SpacyModel.query.all()
return jsonify([file.to_dict() for file in files]), 200
@spacy_controller.route("/<int:id>", methods=["GET"])
def get_file(id):
file = SpacyModel.query.get_or_404(id)
return jsonify(file.to_dict()), 200
@spacy_controller.route("/<int:id>/download", methods=["GET"])
def download_file(id):
file = SpacyModel.query.get_or_404(id)
return send_file(
BytesIO(file.file), download_name=file.filename, as_attachment=True
)
@spacy_controller.route("/", methods=["POST"])
def upload_file():
print(request)
if "file" not in request.files:
return jsonify({"error": "No file part in the request"}), 400
uploaded_file = request.files["file"]
if uploaded_file.filename == "":
return jsonify({"error": "No selected file"}), 400
# Read file data once
file_data = uploaded_file.read()
try:
if uploaded_file:
fileName = uploaded_file.filename or ""
new_file = SpacyModel(filename=secure_filename(fileName), file=file_data)
db.session.add(new_file)
db.session.commit()
return jsonify(new_file.to_dict()), 201
except Exception as e:
print(e)
return jsonify({"error": "Invalid file format. Only PDF files are accepted"}), 400
@spacy_controller.route("/<int:id>", methods=["PUT"])
def update_file(id):
file = SpacyModel.query.get_or_404(id)
if "file" in request.files:
uploaded_file = request.files["file"]
if uploaded_file.filename != "":
file.filename = uploaded_file.filename
# Read file data once
file_data = uploaded_file.read()
try:
if (
uploaded_file
and puremagic.from_string(file_data, mime=True) == "application/pdf"
):
file.file = file_data
except Exception as e:
print(e)
if "kpi" in request.form:
file.kpi = request.form.get("kpi")
db.session.commit()
return jsonify(file.to_dict()), 200
@spacy_controller.route("/<int:id>", methods=["DELETE"])
def delete_file(id):
file = SpacyModel.query.get_or_404(id)
db.session.delete(file)
db.session.commit()
return jsonify({"message": f"File {id} deleted successfully"}), 200

View File

@ -0,0 +1,9 @@
services:
db:
image: postgres
environment:
POSTGRES_PASSWORD: admin
POSTGRES_USER: admin
ports:
- "5432:5432"

View File

@ -0,0 +1,15 @@
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.orm import DeclarativeBase
class Base(DeclarativeBase):
pass
db = SQLAlchemy(model_class=Base)
def init_db(app):
db.init_app(app)
with app.app_context():
db.create_all()

View File

@ -0,0 +1,43 @@
from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import Enum as SQLAlchemyEnum
from enum import Enum
class KPISettingType(Enum):
NUMBER = "number"
STRING = "string"
RANGE = "range"
BOOLEAN = "boolean"
ARRAY = "array"
class KPISettingModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(unique=True)
description: Mapped[str]
mandatory: Mapped[bool]
type: Mapped[KPISettingType] = mapped_column(
SQLAlchemyEnum(KPISettingType, native_enum=True)
)
translation: Mapped[str]
example: Mapped[str]
def to_dict(self):
return {
"id": self.id,
"name": self.name,
"description": self.description,
"mandatory": self.mandatory,
"type": self.type.value,
"translation": self.translation,
"example": self.example,
}
def __init__(self, name, description, mandatory, type, translation, example):
self.name = name
self.description = description
self.mandatory = mandatory
self.type = type
self.translation = translation
self.example = example

View File

@ -0,0 +1,17 @@
from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import LargeBinary
class PitchBookModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
filename: Mapped[str] = mapped_column()
file: Mapped[bytes] = mapped_column(LargeBinary)
kpi: Mapped[str | None]
def to_dict(self):
return {"id": self.id, "filename": self.filename, "kpi": self.kpi}
def __init__(self, filename, file):
self.filename = filename
self.file = file

View File

@ -0,0 +1,22 @@
from model.database import db
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import LargeBinary
from datetime import datetime
class SpacyModel(db.Model):
id: Mapped[int] = mapped_column(primary_key=True)
filename: Mapped[str] = mapped_column()
file: Mapped[bytes] = mapped_column(LargeBinary)
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
def to_dict(self):
return {
"id": self.id,
"filename": self.filename,
"created_at": self.created_at.isoformat(),
}
def __init__(self, filename, file):
self.filename = filename
self.file = file

View File

@ -1,4 +1,31 @@
Flask
black
flake8
pre-commit
black==25.1.0
blinker==1.9.0
cfgv==3.4.0
click==8.2.1
distlib==0.3.9
filelock==3.18.0
flake8==7.2.0
Flask==3.1.1
Flask-SQLAlchemy==3.1.1
greenlet==3.2.2
identify==2.6.12
itsdangerous==2.2.0
Jinja2==3.1.6
MarkupSafe==3.0.2
mccabe==0.7.0
mypy_extensions==1.1.0
nodeenv==1.9.1
packaging==25.0
pathspec==0.12.1
platformdirs==4.3.8
pre_commit==4.2.0
psycopg2-binary==2.9.10
puremagic==1.29
pycodestyle==2.13.0
pyflakes==3.3.2
python-dotenv==1.1.0
PyYAML==6.0.2
SQLAlchemy==2.0.41
typing_extensions==4.13.2
virtualenv==20.31.2
Werkzeug==3.1.3

View File

@ -1,14 +0,0 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
ENV PYTHONUNBUFFERED=1
EXPOSE 5050
CMD ["python", "app.py"]

View File

@ -7,11 +7,13 @@ RUN apt-get update && apt-get install -y \
build-essential \
&& rm -rf /var/lib/apt/lists/*
COPY .. /app
COPY requirements.txt /app
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
RUN python -m spacy download en_core_web_sm
COPY .. /app
CMD ["python3.12", "app.py"]

View File

@ -0,0 +1,44 @@
services:
frontend:
build:
context: frontend
ports:
- 8080:80
db:
image: postgres:17-alpine
env_file:
- .env
# ports:
# - "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U admin"]
interval: 10s
timeout: 5s
retries: 5
coordinator:
build:
context: backend/coordinator
dockerfile: ../../Dockerfile
env_file:
- .env
depends_on:
- db
healthcheck:
test: wget --spider --no-verbose http://127.0.0.1:5000/health || exit 1
interval: 10s
timeout: 5s
retries: 5
ports:
- 5000:5000
spacy:
build:
context: backend/spacy-service
exxeta:
build:
context: backend/exxetaGPT
dockerfile: ../../Dockerfile
env_file:
- .env

Binary file not shown.

View File

@ -0,0 +1 @@
.env

View File

@ -0,0 +1,104 @@
{
"Fondsname": {
"value": "Real Estate Prime Europe",
"page_number": 2,
"confidence_level": "high"
},
"Fondsmanager": {
"value": null,
"page_number": null,
"confidence_level": "low"
},
"Name_Kapitalverwaltungsgesellschaft": {
"value": "Real Estate",
"page_number": 5,
"confidence_level": "medium"
},
"Datum": {
"value": "End of December 2018",
"page_number": 12,
"confidence_level": "medium"
},
"Risikoprofil": {
"value": "Core/Core+",
"page_number": 10,
"confidence_level": "high"
},
"Artikel_gem_SFDR": {
"value": null,
"page_number": null,
"confidence_level": "low"
},
"Zielrendite_über_die_Fondslaufzeit": {
"value": "IRR: 6%-7%",
"page_number": 10,
"confidence_level": "high"
},
"Rendite_seit_Auflage": {
"value": "Total return: 5.3%, 16.1%, 13.6%, 8.9%, 12.8% for years 2015-2018",
"page_number": 12,
"confidence_level": "high"
},
"Zielausschüttungsrendite_über_die_Fondslaufzeit": {
"value": "Cash on Cash: 4%-5%",
"page_number": 10,
"confidence_level": "high"
},
"Ausschüttungsrendite_seit_Auflage": {
"value": "Unlevered Cash-on-cash examples: 4.14%, 3.31%",
"page_number": 33,
"confidence_level": "medium"
},
"Laufzeit": {
"value": "Open-ended",
"page_number": 9,
"confidence_level": "high"
},
"LTV_Loan_to_Value": {
"value": "50% max at asset and fund level",
"page_number": 10,
"confidence_level": "high"
},
"Soll_Ist": {
"value": "59.8%",
"page_number": 12,
"confidence_level": "medium"
},
"Ziel": {
"value": "Target 40-45%",
"page_number": 10,
"confidence_level": "high"
},
"Managementgebühren_Bezogen_auf_NAV_Net_Asset_Value": {
"value": "Sliding scale 55-50-40bp x NAV",
"page_number": 10,
"confidence_level": "high"
},
"Sektorenallokation": {
"value": {
"Office": "75.6%",
"Retail": "13.8%",
"Hotels": "4.3%",
"Industrial/logistics": "1.0%",
"Residential": "0.4%",
"Others": "4.9%"
},
"page_number": 5,
"confidence_level": "high"
},
"Länderallokation": {
"value": {
"Germany": null,
"France": null,
"Italy": null,
"Netherlands": null,
"United Kingdom": null,
"Czech Republic": null,
"Luxembourg": null,
"Spain": null,
"Finland": null
},
"page_number": 16,
"confidence_level": "medium"
}
}

View File

@ -0,0 +1,112 @@
{
"details": [
{
"key": "Fondsname",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Fondsmanager",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Name Kapitalverwaltungsgesellschaft",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Datum",
"value": "31 March 2024",
"page": 5,
"confidence": "high"
},
{
"key": "Risikoprofil",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Artikel gem. SFDR",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Zielrendite über die Fondslaufzeit",
"value": "7%+ Net Investors' long-term return",
"page": 1,
"confidence": "high"
},
{
"key": "Rendite seit Auflage",
"value": "+6.0% annualised Total Return",
"page": 27,
"confidence": "high"
},
{
"key": "Zielausschüttungsrendite über die Fondslaufzeit",
"value": "4.0%",
"page": 27,
"confidence": "high"
},
{
"key": "Ausschüttungsrendite seit Auflage",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Laufzeit",
"value": "Open-ended",
"page": 30,
"confidence": "medium"
},
{
"key": "LTV (Loan-to-Value)",
"value": "18.7%",
"page": 16,
"confidence": "high"
},
{
"key": "Soll/Ist",
"value": "Not directly mentioned",
"page": null,
"confidence": "low"
},
{
"key": "Ziel",
"value": "Achieve a resilient income performance and a long-term capital appreciation",
"page": 1,
"confidence": "high"
},
{
"key": "Managementgebühren Bezogen auf NAV (Net Asset Value)",
"value": "Between 70bps and 125bps based on ticket size",
"page": 34,
"confidence": "high"
},
{
"key": "Sektorenallokation",
"value": [
"Office",
"Residential",
"Industrial/Logistics",
"Retail",
"Hotels"
],
"page": 36,
"confidence": "medium"
},
{
"key": "Länderallokation",
"value": ["Europe", "North America", "Asia Pacific"],
"page": 36,
"confidence": "medium"
}
]
}

View File

@ -0,0 +1,106 @@
{
"information": [
{
"key": "Fondsname",
"value": "Europäische Logistikstrategie",
"page_number": 1,
"confidence": "high"
},
{
"key": "Fondsmanager",
"value": "Nicht direkt erwähnter spezifischer Name",
"page_number": 5,
"confidence": "low"
},
{
"key": "Name Kapitalverwaltungsgesellschaft",
"value": "Nicht direkt erwähnt",
"page_number": 5,
"confidence": "low"
},
{
"key": "Datum",
"value": "30.06.2023",
"page_number": 12,
"confidence": "high"
},
{
"key": "Risikoprofil",
"value": "Halten-Strategie",
"page_number": 2,
"confidence": "high"
},
{
"key": "Artikel gem. SFDR",
"value": "Artikel 8",
"page_number": 8,
"confidence": "high"
},
{
"key": "Zielrendite über die Fondslaufzeit",
"value": "5,00-5,25%",
"page_number": 2,
"confidence": "high"
},
{
"key": "Rendite seit Auflage",
"value": "Nicht direkt erwähnt",
"page_number": null,
"confidence": "low"
},
{
"key": "Zielausschüttungsrendite über die Fondslaufzeit",
"value": "5,00-5,25%",
"page_number": 6,
"confidence": "high"
},
{
"key": "Ausschüttungsrendite seit Auflage",
"value": "Nicht direkt erwähnt",
"page_number": null,
"confidence": "low"
},
{
"key": "Laufzeit",
"value": "Auf Basis langfristiger Strategien und Verträge",
"page_number": 2,
"confidence": "medium"
},
{
"key": "LTV (Loan-to-Value)",
"value": "25-40%",
"page_number": 2,
"confidence": "high"
},
{
"key": "Soll/Ist",
"value": "Nicht direkt erwähnt",
"page_number": null,
"confidence": "low"
},
{
"key": "Ziel",
"value": "Langfristiges Halten und Management von Core+ Logistikimmobilien",
"page_number": 2,
"confidence": "medium"
},
{
"key": "Managementgebühren Bezogen auf NAV (Net Asset Value)",
"value": "60 bps p.a.",
"page_number": 26,
"confidence": "high"
},
{
"key": "Sektorenallokation",
"value": "Logistikimmobilien",
"page_number": 5,
"confidence": "high"
},
{
"key": "Länderallokation",
"value": "Niederlande, Frankreich, Skandinavien, Deutschland",
"page_number": 2,
"confidence": "high"
}
]
}

View File

@ -0,0 +1,106 @@
{
"extracted_information": [
{
"key": "Fondsname",
"value": "Core Plus Open-ended Fund",
"page_number": 2,
"confidence_level": "high"
},
{
"key": "Fondsmanager",
"value": "Specialist Nordic Manager",
"page_number": 2,
"confidence_level": "medium"
},
{
"key": "Name Kapitalverwaltungsgesellschaft",
"value": "Capital Management is an AIFM supervised by the CSSF in Luxembourg",
"page_number": 36,
"confidence_level": "high"
},
{
"key": "Datum",
"value": "August 2024",
"page_number": 0,
"confidence_level": "high"
},
{
"key": "Risikoprofil",
"value": "Prioritizing assets where strong growth is expected over the next decade",
"page_number": 2,
"confidence_level": "medium"
},
{
"key": "Artikel gem. SFDR",
"value": "Article 8 of the Sustainable Financial Disclosure Regulation (SFDR)",
"page_number": 16,
"confidence_level": "high"
},
{
"key": "Zielrendite über die Fondslaufzeit",
"value": "7-8% net total annual return",
"page_number": 2,
"confidence_level": "high"
},
{
"key": "Rendite seit Auflage",
"value": "Realized Gross IRR® (Max)",
"page_number": 5,
"confidence_level": "medium"
},
{
"key": "Zielausschüttungsrendite über die Fondslaufzeit",
"value": "3-4% dividend yield",
"page_number": 2,
"confidence_level": "high"
},
{
"key": "Ausschüttungsrendite seit Auflage",
"value": "NA (specific historical payout not provided)",
"page_number": null,
"confidence_level": "low"
},
{
"key": "Laufzeit",
"value": "Open-ended with an initial 24-month lock-in for new investors",
"page_number": 36,
"confidence_level": "high"
},
{
"key": "LTV (Loan-to-Value)",
"value": "Target LTV of 35% (capped at 37.5%)",
"page_number": 11,
"confidence_level": "high"
},
{
"key": "Soll/Ist",
"value": "Estimated CAPEX / Current Yield and Occupancy",
"page_number": 28,
"confidence_level": "medium"
},
{
"key": "Ziel",
"value": "Targeting properties that are comparatively sustainable and aligned with EU Taxonomy",
"page_number": 11,
"confidence_level": "high"
},
{
"key": "Managementgebühren Bezogen auf NAV (Net Asset Value)",
"value": "Management fee of 85 bps on NAV",
"page_number": 36,
"confidence_level": "high"
},
{
"key": "Sektorenallokation",
"value": "Logistics, Residential, Office",
"page_number": 12,
"confidence_level": "high"
},
{
"key": "Länderallokation",
"value": "Sweden, Norway, Denmark, Finland",
"page_number": 12,
"confidence_level": "high"
}
]
}

View File

@ -0,0 +1,71 @@
import fitz # PyMuPDF
from dotenv import load_dotenv
import os
from openai import AzureOpenAI
load_dotenv()
BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai"
API_KEY = os.getenv("OPENAI_API_KEY")
def extract_text_from_pdf(pdf_path):
# Open the PDF file
doc = fitz.open(pdf_path)

Fehlt Fehler-Handling -> wäre gut eine try-except beim Öffnen von pdf hinzufügen. Damit das Programm auf fehlerhafte Dateien gut reagieren.

Fehlt Fehler-Handling -> wäre gut eine try-except beim Öffnen von pdf hinzufügen. Damit das Programm auf fehlerhafte Dateien gut reagieren.

Das ist in den protoypes, das ist nicht unser "reales" project

Das ist in den protoypes, das ist nicht unser "reales" project
# Initialize a variable to store the extracted text
extracted_text = ""
# Iterate through each page
for page_num in range(len(doc)):
page = doc.load_page(page_num) # Load the page
text = page.get_text() # Extract text from the page
extracted_text += "[Page " + str(page_num) + "]\n" + text + "\n"
return extracted_text
# Example usage
pdf_document = "../../pitch-books/Teaser 2 FINAL.pdf"
# pdf_document = "../../pitch-books/Pitchbook 4.pdf"
text = extract_text_from_pdf(pdf_document)
print(text)
client = AzureOpenAI(api_key=API_KEY, base_url=BASE_URL, api_version="2023-07-01-preview")
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant that always responds in JSON format. Your responses should be valid JSON objects without any explanatory text outside the JSON structure. Structure your answers appropriately with keys and values relevant to the user's query."},
{"role": "user", "content": """
Extract from the text the following information:
- Fondsname
- Fondsmanager
- Name Kapitalverwaltungsgesellschaft
- Datum
- Risikoprofil
- Artikel gem. SFDR
- Zielrendite über die Fondslaufzeit
- Rendite seit Auflage
- Zielausschüttungsrendite über die Fondslaufzeit
- Ausschüttungsrendite seit Auflage
- Laufzeit
- LTV (Loan-to-Value)
- Soll/Ist
- Ziel
- Managementgebühren Bezogen auf NAV (Net Asset Value)
- Sektorenallokation
- Länderallokation
for each value in the list:
- the key
- the value
- the page number
- theconfidence level (high/medium/low)(that the value is correct)
TEXT TO ANALYZE:
""" + text + """
"""},
],
response_format={"type": "json_object"}
)
print(response.choices[0].message.content)