Compare commits
4 Commits
8ed5f7c114
...
63882d77c0
| Author | SHA1 | Date |
|---|---|---|
|
|
63882d77c0 | |
|
|
96ad5fd15c | |
|
|
f81624b8ab | |
|
|
93334898c9 |
|
|
@ -0,0 +1,70 @@
|
|||
# PSE2 - Pitchbook Extraction Webapplication
|
||||
|
||||
A microservices platform for processing pitchbook PDFs using OCR and entity extraction services. Combines SpaCy NLP and GPT-based (ExxetaGPT) extraction of kpi in Pitchbooks.
|
||||
|
||||
```
|
||||
## Quick Start
|
||||
|
||||
### 1. Environment Setup
|
||||
Create a `.env` file in the project root:
|
||||
|
||||
# Database
|
||||
DATABASE_URL=url
|
||||
POSTGRES_USER=admin
|
||||
POSTGRES_PASSWORD=password
|
||||
|
||||
# API Key (required for ExxetaGPT service)
|
||||
API_KEY=your_exxeta_jwt_token_here
|
||||
```
|
||||
|
||||
### 2. Start Application
|
||||
```bash
|
||||
# Build and start all services
|
||||
docker-compose up --build
|
||||
|
||||
# Run in background
|
||||
docker-compose up --build -d
|
||||
|
||||
# Stop services
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
### 3. Access Application
|
||||
- **Frontend:** http://localhost:8080
|
||||
- **API:** http://localhost:5050
|
||||
|
||||
## Services Overview
|
||||
|
||||
| Service | Port | Purpose |
|
||||
|---------|------|---------|
|
||||
| **Frontend** | 8080 | React UI for file upload and results display |
|
||||
| **Coordinator** | 5050 | Main API, file storage, database management |
|
||||
| **OCR** | 5051 | PDF text extraction using OCRmyPDF |
|
||||
| **ExxetaGPT** | 5053 | AI entity extraction using GPT-4o-mini |
|
||||
| **SpaCy** | 5052 | NLP entity extraction using custom model |
|
||||
| **Validate** | 5054 | Merges and validates results from both extractors |
|
||||
| **Database** | 5432 | PostgreSQL for data persistence |
|
||||
|
||||
## Usage Flow
|
||||
|
||||
1. Upload PDF via web interface
|
||||
2. OCR service extracts text from PDF
|
||||
3. Both ExxetaGPT and SpaCy services extract kpi's entities
|
||||
4. Validate service merges and validates results
|
||||
5. View extracted kpi's and original PDF side-by-side
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**Services won't start:**
|
||||
```bash
|
||||
# Check logs
|
||||
docker-compose logs
|
||||
```
|
||||
|
||||
**ExxetaGPT errors:**
|
||||
- Ensure `API_KEY` is set in `.env` file
|
||||
- Check API key validity and network access
|
||||
|
||||
**Database connection issues:**
|
||||
- Wait for database health check to pass
|
||||
- Verify `DATABASE_URL` format in `.env`
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
# ExxetaGPT Microservice
|
||||
|
||||
## Lokaler Start (ohne Container)
|
||||
|
||||
### 1. Voraussetzungen
|
||||
|
||||
- Python 3.11+
|
||||
- Virtuelle Umgebung (empfohlen)
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 2. .env Datei erstellen
|
||||
Leg eine .env Datei im Projektverzeichnis mit der Exxeta API-Key an
|
||||
|
||||
(Der API Key ist ein JWT von Exxeta – nicht veröffentlichen!)
|
||||
|
||||
### 3. Starten
|
||||
python app.py
|
||||
|
||||
## Verwendung als Docker-Container
|
||||
|
||||
### 1. Build
|
||||
```bash
|
||||
docker build -t exxeta-gpt .
|
||||
```
|
||||
|
||||
### 2. Starten
|
||||
```bash
|
||||
docker run -p 5050:5050 --env-file .env exxeta-gpt
|
||||
```
|
||||
|
||||
## Beispielaufruf:
|
||||
```bash
|
||||
curl -X POST http://localhost:5050/extract \
|
||||
-H "Content-Type: application/json" \
|
||||
-d @text-per-page.json
|
||||
```
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
# SpaCy Microservice
|
||||
|
||||
## Den Service mit in einem Docker-Container starten
|
||||
|
||||
### 1. Build
|
||||
```bash
|
||||
docker build -t spacy-service .
|
||||
```
|
||||
|
||||
### 2. Starten
|
||||
```bash
|
||||
docker run -p 5050:5050 spacy-service
|
||||
```
|
||||
|
||||
## Beispielaufruf:
|
||||
```bash
|
||||
curl -X POST http://localhost:5050/extraction \
|
||||
-H "Content-Type: application/json" \
|
||||
-d @text-per-page.json
|
||||
```
|
||||
|
|
@ -2,7 +2,7 @@ import {
|
|||
Table, TableBody, TableCell, TableContainer,
|
||||
TableHead, TableRow, Paper, Box,
|
||||
Dialog, DialogActions, DialogContent, DialogTitle,
|
||||
TextField, Button
|
||||
TextField, Button, Link
|
||||
} from '@mui/material';
|
||||
import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline';
|
||||
import SearchIcon from '@mui/icons-material/Search';
|
||||
|
|
@ -15,11 +15,16 @@ import {
|
|||
{ label: 'Fondsname', value: 'Fund Real Estate Prime Europe', page: 1, status: 'ok' },
|
||||
{ label: 'Fondsmanager', value: '', page: 1, status: 'error' },
|
||||
{ label: 'Risikoprofil', value: 'Core/Core+', page: 10, status: 'warning' },
|
||||
{ label: 'LTV', value: '30-35 %', page: 8, status: 'ok' }
|
||||
{ label: 'LTV', value: '30-35 %', page: 8, status: 'ok' },
|
||||
{ label: 'Ausschüttungsrendite', value: '4%', page: 34, status: 'ok' }
|
||||
];
|
||||
|
||||
interface KennzahlenTableProps {
|
||||
onPageClick?: (page: number) => void;
|
||||
}
|
||||
|
||||
// React-Komponente
|
||||
export default function KennzahlenTable() {
|
||||
export default function KennzahlenTable({ onPageClick }: KennzahlenTableProps) {
|
||||
// Zustand für bearbeitbare Daten
|
||||
const [rows, setRows] = useState(exampleData);
|
||||
|
||||
|
|
@ -100,7 +105,15 @@ import {
|
|||
</TableCell>
|
||||
|
||||
{/* Seitenzahl */}
|
||||
<TableCell>{row.page}</TableCell>
|
||||
<TableCell>
|
||||
<Link
|
||||
component="button"
|
||||
onClick={() => onPageClick?.(row.page)}
|
||||
sx={{ cursor: 'pointer' }}
|
||||
>
|
||||
{row.page}
|
||||
</Link>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
);
|
||||
})}
|
||||
|
|
|
|||
|
|
@ -9,11 +9,12 @@ import { socket } from "../socket";
|
|||
|
||||
interface PDFViewerProps {
|
||||
pitchBookId: string;
|
||||
currentPage?: number;
|
||||
}
|
||||
|
||||
export default function PDFViewer({ pitchBookId }: PDFViewerProps) {
|
||||
export default function PDFViewer({ pitchBookId, currentPage }: PDFViewerProps) {
|
||||
const [numPages, setNumPages] = useState<number | null>(null);
|
||||
const [pageNumber, setPageNumber] = useState(1);
|
||||
const [pageNumber, setPageNumber] = useState(currentPage || 1);
|
||||
const [containerWidth, setContainerWidth] = useState<number | null>(null);
|
||||
const [pdfKey, setPdfKey] = useState(Date.now());
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
|
|
@ -34,6 +35,12 @@ export default function PDFViewer({ pitchBookId }: PDFViewerProps) {
|
|||
return () => window.removeEventListener("resize", updateWidth);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (currentPage && currentPage !== pageNumber) {
|
||||
setPageNumber(currentPage);
|
||||
}
|
||||
}, [currentPage]);
|
||||
|
||||
useEffect(() => {
|
||||
const handleProgress = (data: { id: number; progress: number }) => {
|
||||
if (data.id.toString() === pitchBookId && data.progress === 50) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import ContentPasteIcon from "@mui/icons-material/ContentPaste";
|
||||
import { Box, Button, Paper, Typography } from "@mui/material";
|
||||
import { createFileRoute, useNavigate } from "@tanstack/react-router";
|
||||
import { useState } from "react";
|
||||
import KennzahlenTable from "../components/KennzahlenTable";
|
||||
import PDFViewer from "../components/pdfViewer";
|
||||
|
||||
|
|
@ -12,6 +13,7 @@ function ExtractedResultsPage() {
|
|||
const { pitchBook } = Route.useParams();
|
||||
const navigate = useNavigate();
|
||||
const status: "green" | "yellow" | "red" = "red";
|
||||
const [currentPage, setCurrentPage] = useState(1);
|
||||
|
||||
const statusColor = {
|
||||
red: "#f43131",
|
||||
|
|
@ -58,7 +60,7 @@ function ExtractedResultsPage() {
|
|||
overflow: "auto",
|
||||
}}
|
||||
>
|
||||
<KennzahlenTable />
|
||||
<KennzahlenTable onPageClick={setCurrentPage} />
|
||||
</Paper>
|
||||
<Box
|
||||
display="flex"
|
||||
|
|
@ -78,7 +80,7 @@ function ExtractedResultsPage() {
|
|||
justifyContent: "center",
|
||||
}}
|
||||
>
|
||||
<PDFViewer pitchBookId={pitchBook} />
|
||||
<PDFViewer pitchBookId={pitchBook} currentPage={currentPage} />
|
||||
</Paper>
|
||||
<Box mt={2} display="flex" justifyContent="flex-end" gap={2}>
|
||||
<Button variant="contained" sx={{ backgroundColor: "#383838" }}>
|
||||
|
|
|
|||
Loading…
Reference in New Issue