WIP: Fehler F401 behoben, cleanup vor Branchwechsel

pull/94/head
Abdulrahman Dabbagh 2025-06-20 10:16:36 +02:00
parent 09c314eea3
commit abccb43741
12 changed files with 8226 additions and 22 deletions

1652
annotation_data.json 100644

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@ from model.database import init_db
from controller.socketIO import socketio from controller.socketIO import socketio
from controller.kennzahlen import kennzahlen_bp from controller.kennzahlen import kennzahlen_bp
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
socketio.init_app(app) socketio.init_app(app)
@ -25,6 +26,7 @@ register_routes(app)
# Register blueprints # Register blueprints
app.register_blueprint(kennzahlen_bp) app.register_blueprint(kennzahlen_bp)
@app.route("/health") @app.route("/health")
def health_check(): def health_check():
return "OK" return "OK"

View File

@ -1,4 +1,4 @@
from controller.spacy_contoller import spacy_controller from controller.spacy_controller import spacy_controller
from controller.kpi_setting_controller import kpi_setting_controller from controller.kpi_setting_controller import kpi_setting_controller
from controller.pitch_book_controller import pitch_book_controller from controller.pitch_book_controller import pitch_book_controller
from controller.progress_controller import progress_controller from controller.progress_controller import progress_controller

View File

@ -1,10 +1,11 @@
from flask import Blueprint, request, jsonify, send_file from flask import Blueprint, request, jsonify, send_file
from io import BytesIO from io import BytesIO
from model.spacy_model import SpacyModel from model.spacy_model import SpacyModel
import puremagic import puremagic
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from model.database import db from model.database import db
import os
import json
spacy_controller = Blueprint("spacy", __name__, url_prefix="/api/spacy") spacy_controller = Blueprint("spacy", __name__, url_prefix="/api/spacy")
@ -91,3 +92,39 @@ def delete_file(id):
db.session.commit() db.session.commit()
return jsonify({"message": f"File {id} deleted successfully"}), 200 return jsonify({"message": f"File {id} deleted successfully"}), 200
@spacy_controller.route("/append-training-entry", methods=["POST"])
def append_training_entry():
entry = request.get_json()
if not entry or "text" not in entry or "entities" not in entry:
return (
jsonify(
{"error": "Ungültiges Format 'text' und 'entities' erforderlich."}
),
400,
)
path = os.path.join("spacy_training", "annotation_data.json")
try:
os.makedirs(os.path.dirname(path), exist_ok=True)
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
else:
data = []
if entry in data:
return jsonify({"message": "Eintrag existiert bereits."}), 200
data.append(entry)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return jsonify({"message": "Eintrag erfolgreich gespeichert."}), 200
except Exception as e:
print(f"[ERROR] Fehler beim Schreiben: {e}")
return jsonify({"error": "Interner Fehler beim Schreiben."}), 500

View File

@ -3,12 +3,19 @@ from extractSpacy import extract
import requests import requests
import os import os
import json import json
from flask_cors import CORS
app = Flask(__name__) app = Flask(__name__)
CORS(app)
VALIDATE_SERVICE_URL = os.getenv("VALIDATE_SERVICE_URL", "http://localhost:5054/validate")
@app.route('/extract', methods=['POST']) VALIDATE_SERVICE_URL = os.getenv(
"VALIDATE_SERVICE_URL", "http://localhost:5054/validate"
)
@app.route("/extract", methods=["POST"])
def extract_pdf(): def extract_pdf():
json_data = request.get_json() json_data = request.get_json()
@ -16,19 +23,19 @@ def extract_pdf():
pages_data = json_data["extracted_text_per_page"] pages_data = json_data["extracted_text_per_page"]
entities_json = extract(pages_data) entities_json = extract(pages_data)
entities = json.loads(entities_json) if isinstance(entities_json, str) else entities_json entities = (
json.loads(entities_json) if isinstance(entities_json, str) else entities_json
)
validate_payload = { validate_payload = {"id": pitchbook_id, "service": "spacy", "entities": entities}
"id": pitchbook_id,
"service": "spacy",
"entities": entities
}
print(f"[SPACY] Sending to validate service: {VALIDATE_SERVICE_URL}") print(f"[SPACY] Sending to validate service: {VALIDATE_SERVICE_URL}")
print(f"[SPACY] Payload: {validate_payload} entities for pitchbook {pitchbook_id}") print(f"[SPACY] Payload: {validate_payload} entities for pitchbook {pitchbook_id}")
try: try:
response = requests.post(VALIDATE_SERVICE_URL, json=validate_payload, timeout=600) response = requests.post(
VALIDATE_SERVICE_URL, json=validate_payload, timeout=600
)
print(f"[SPACY] Validate service response: {response.status_code}") print(f"[SPACY] Validate service response: {response.status_code}")
if response.status_code != 200: if response.status_code != 200:
print(f"[SPACY] Validate service error: {response.text}") print(f"[SPACY] Validate service error: {response.text}")
@ -38,5 +45,40 @@ def extract_pdf():
return jsonify("Sent to validate-service"), 200 return jsonify("Sent to validate-service"), 200
@app.route("/append-training-entry", methods=["POST"])
def append_training_entry():
entry = request.get_json()
if not entry or "text" not in entry or "entities" not in entry:
return (
jsonify(
{"error": "Ungültiges Format 'text' und 'entities' erforderlich."}
),
400,
)
path = os.path.join("spacy_training", "annotation_data.json")
try:
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
else:
data = []
# Optional: Duplikate prüfen
if entry in data:
return jsonify({"message": "Eintrag existiert bereits."}), 200
data.append(entry)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return jsonify({"message": "Eintrag erfolgreich gespeichert."}), 200
except Exception as e:
print(f"[ERROR] Fehler beim Schreiben der Datei: {e}")
return jsonify({"error": "Interner Fehler beim Schreiben."}), 500
if __name__ == "__main__": if __name__ == "__main__":
app.run(host="0.0.0.0", port=5052, debug=True) app.run(host="0.0.0.0", port=5052, debug=True)

View File

@ -0,0 +1,35 @@
from flask import Flask, request, jsonify
import os
import json
app = Flask(__name__)
ANNOTATION_FILE = (
"spacy_training/annotation_data.json" # relativer Pfad im Container/Projekt
)
@app.route("/api/spacy-training-entry", methods=["POST"])
def append_training_entry():
new_entry = request.get_json()
if not new_entry or "text" not in new_entry or "entities" not in new_entry:
return jsonify({"error": "Ungültiges Format"}), 400
# Bestehende Datei laden
if os.path.exists(ANNOTATION_FILE):
with open(ANNOTATION_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
else:
data = []
# Optional: Duplikat vermeiden
if new_entry in data:
return jsonify({"message": "Eintrag bereits vorhanden."}), 200
# Anfügen
data.append(new_entry)
with open(ANNOTATION_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return jsonify({"message": "Eintrag erfolgreich gespeichert."}), 200

View File

@ -37,6 +37,8 @@ services:
retries: 10 retries: 10
ports: ports:
- 5050:5000 - 5050:5000
volumes:
- ./backend/spacy-service/spacy_training:/app/spacy_training
ocr: ocr:
build: build:

View File

@ -4,7 +4,9 @@ WORKDIR /usr/src/app
# install dependencies into temp directory # install dependencies into temp directory
# this will cache them and speed up future builds # this will cache them and speed up future builds
COPY package.json bun.lockb ./ COPY package.json bun.lockb ./
RUN bun install --frozen-lockfile #RUN bun install --frozen-lockfile
RUN bun install
COPY . . COPY . .

6335
project/frontend/package-lock.json generated 100644

File diff suppressed because it is too large Load Diff

View File

@ -23,6 +23,7 @@
"@tanstack/react-router": "^1.114.3", "@tanstack/react-router": "^1.114.3",
"@tanstack/react-router-devtools": "^1.114.3", "@tanstack/react-router-devtools": "^1.114.3",
"@tanstack/router-plugin": "^1.114.3", "@tanstack/router-plugin": "^1.114.3",
"file-saver": "^2.0.5",
"react": "^19.0.0", "react": "^19.0.0",
"react-dom": "^19.0.0", "react-dom": "^19.0.0",
"react-material-file-upload": "^0.0.4", "react-material-file-upload": "^0.0.4",
@ -33,6 +34,7 @@
"@biomejs/biome": "1.9.4", "@biomejs/biome": "1.9.4",
"@testing-library/dom": "^10.4.0", "@testing-library/dom": "^10.4.0",
"@testing-library/react": "^16.2.0", "@testing-library/react": "^16.2.0",
"@types/file-saver": "^2.0.7",
"@types/react": "^19.0.8", "@types/react": "^19.0.8",
"@types/react-dom": "^19.0.3", "@types/react-dom": "^19.0.3",
"@vitejs/plugin-react": "^4.3.4", "@vitejs/plugin-react": "^4.3.4",

View File

@ -3,6 +3,7 @@ import { Box, Typography, Button, Paper, TextField, FormControlLabel,
import { useState, useEffect } from "react"; import { useState, useEffect } from "react";
import type { Kennzahl } from "../types/kpi"; import type { Kennzahl } from "../types/kpi";
import { typeDisplayMapping } from "../types/kpi"; import { typeDisplayMapping } from "../types/kpi";
// import { saveAs } from "file-saver";
interface KPIFormProps { interface KPIFormProps {
mode: 'add' | 'edit'; mode: 'add' | 'edit';
@ -19,7 +20,9 @@ const emptyKPI: Partial<Kennzahl> = {
type: 'string', type: 'string',
translation: '', translation: '',
example: '', example: '',
active: true active: true,
exampleText: '',
markedValue: '',
}; };
export function KPIForm({ mode, initialData, onSave, onCancel, loading = false }: KPIFormProps) { export function KPIForm({ mode, initialData, onSave, onCancel, loading = false }: KPIFormProps) {
@ -40,16 +43,60 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false }
return; return;
} }
if (!formData.exampleText?.trim()) {
alert('Beispielsatz ist erforderlich');
return;
}
if (!formData.markedValue?.trim()) {
alert('Bezeichneter Wert im Satz ist erforderlich');
return;
}
setIsSaving(true); setIsSaving(true);
try { try {
const spacyEntry = generateSpacyEntry(formData);
//in localStorage merken
const stored = localStorage.getItem("spacyData");
const existingData = stored ? JSON.parse(stored) : [];
const updated = [...existingData, spacyEntry];
localStorage.setItem("spacyData", JSON.stringify(updated));
// an Flask senden
const response = await fetch("http://localhost:5050/api/spacy/append-training-entry", {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(spacyEntry)
});
const data = await response.json();
console.log("Response von /append-training-entry:", data);
if (!response.ok) {
throw new Error(data.error || "Fehler beim Aufruf von append-training-entry");
}
if (!response.ok) {
throw new Error("Fehler vom Backend: " + response.status);
}
// anschließend in der Datenbank speichern
await onSave(formData); await onSave(formData);
} catch (error) {
console.error('Error saving KPI:', error); alert("SpaCy-Eintrag erfolgreich gespeichert!");
} catch (e: any) {
alert(e.message || "Fehler beim Erzeugen des Trainingsbeispiels.");
console.error(e);
} finally { } finally {
setIsSaving(false); setIsSaving(false);
} }
}; };
const handleCancel = () => { const handleCancel = () => {
onCancel(); onCancel();
}; };
@ -106,18 +153,40 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false }
<Box mb={4}> <Box mb={4}>
<Typography variant="h6" fontWeight="bold" mb={2}> <Typography variant="h6" fontWeight="bold" mb={2}>
Beschreibung Beispielsatz
</Typography> </Typography>
<TextField <TextField
fullWidth fullWidth
multiline multiline
rows={3} rows={3}
label="Beschreibung" label="Beispielsatz"
value={formData.description || ''} required
onChange={(e) => updateField('description', e.target.value)} value={formData.exampleText || ''}
helperText="Beschreibung der Kennzahl" onChange={(e) => updateField('exampleText', e.target.value)}
error={!formData.exampleText?.trim()}
helperText={
!formData.exampleText?.trim()
? "Beispielsatz ist erforderlich"
: "Ein vollständiger Satz, in dem der markierte Begriff vorkommt"
}
/> />
<TextField
fullWidth
required
sx={{ mt: 2 }}
label="Bezeichneter Wert im Satz *"
value={formData.markedValue || ''}
onChange={(e) => updateField('markedValue', e.target.value)}
error={!formData.markedValue?.trim()}
helperText={
!formData.markedValue?.trim()
? "Markierter Begriff ist erforderlich"
: "Nur der Begriff, der im Satz markiert werden soll (z.B. Core/Core+)"
}
/>
<Box mt={3}> <Box mt={3}>
<FormControlLabel <FormControlLabel
control={ control={
@ -244,4 +313,28 @@ export function KPIForm({ mode, initialData, onSave, onCancel, loading = false }
</Box> </Box>
</Paper> </Paper>
); );
} }
function generateSpacyEntry(formData: Partial<Kennzahl>) {
const text = formData.exampleText?.trim() || "";
const value = formData.markedValue?.trim() || "";
const label = formData.name?.trim().toUpperCase() || "";
const start = text.indexOf(value);
if (start === -1) {
throw new Error("Bezeichneter Begriff wurde im Satz nicht gefunden.");
}
return {
text,
entities: [[start, start + value.length, label]],
};
}
// function appendAndDownload(newEntry: any, existing: any[] = []) {
// const updated = [...existing, newEntry];
// const blob = new Blob([JSON.stringify(updated, null, 2)], {
// type: "application/json",
// });
// saveAs(blob, "..\project\backend\spacy-service\spacy_training\annotation_data.json");
// }

View File

@ -8,6 +8,8 @@ export interface Kennzahl {
example: string; example: string;
position: number; position: number;
active: boolean; active: boolean;
exampleText?: string;
markedValue?: string;
} }
export const typeDisplayMapping: Record<string, string> = { export const typeDisplayMapping: Record<string, string> = {