Test exxeta gpt

pull/34/head
s8613 2025-04-26 12:48:21 +02:00
parent 5945122fb0
commit 7b6a19bbc3
2 changed files with 142 additions and 0 deletions

View File

@ -0,0 +1,90 @@
from openai import AzureOpenAI
from dotenv import load_dotenv
import os
import pymupdf
load_dotenv()
BASE_URL = "https://ai.exxeta.com/api/v2/azure/openai"
API_KEY = os.getenv("API_KEY")
client = AzureOpenAI(
api_key=API_KEY,
api_version="2023-07-01-preview",
base_url=BASE_URL
)
def extract_text_from_pdf(file_path):
"""Extract text content from a PDF file using PyMuPDF (fitz)."""
all_text = ""
# Open the PDF file
doc = pymupdf.open(file_path)
# Print number of pages
print(f"PDF has {len(doc)} pages")
# Extract and print text from each page
for page_num in range(len(doc)):
page = doc[page_num]
text = page.get_text()
# Print page number and content
print(text)
all_text += "[Page " + str(page_num + 1) + "]\n" + text + "\n\n"
return all_text
file_path = "../../pitch-books/Pitchbook 1.pdf"
pdf_text = extract_text_from_pdf(file_path)
response = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "Always respond with a valid JSON object"
},
{
"role": "user",
"content": """extract the values from the text. let not found values empty:
-Fondsname
-Fondsmanager
-Name Kapitalverwaltungsgesellschaft
-Datum
-Risikoprofil
-Artikel gem. SFDR
-Ziel
-Zielrendite über die Fondslaufzeit
-Rendite seit Auflage
-Zielausschüttungsrendite über die Fondslaufzeit
-Ausschüttungsrendite seit Auflage
-Laufzeit
-LTV (Loan-to-Value)
-Soll/Ist
-Ziel
-Managementgebühren Bezogen auf NAV (Net Asset Value)
-Sektorenallokation
-Länderallokation
for each value return:
- the Key
- the Value
- the page where this value was found
- a confidence score, how confident the model is about the value (low, medium, high)
Here ist the text:""" + pdf_text
}
],
model="gpt-4o-mini",
response_format={"type": "json_object"}
# temperature=0.7,
# top_p=0.95,
# frequency_penalty=0,
# presence_penalty=0,
# max_tokens=800,
# stop="",
# stream=False
)
print(response.choices[0].message.content)

View File

@ -0,0 +1,52 @@
acres==0.3.0
annotated-types==0.7.0
anyio==4.9.0
certifi==2025.1.31
charset-normalizer==3.4.1
ci-info==0.3.0
click==8.1.8
configobj==5.0.9
configparser==7.2.0
distro==1.9.0
etelemetry==0.3.1
filelock==3.18.0
h11==0.14.0
httpcore==1.0.8
httplib2==0.22.0
httpx==0.28.1
idna==3.10
isodate==0.6.1
jiter==0.9.0
looseversion==1.3.0
lxml==5.4.0
networkx==3.4.2
nibabel==5.3.2
nipype==1.10.0
numpy==2.2.5
openai==1.75.0
packaging==25.0
pandas==2.2.3
pathlib==1.0.1
prov==2.0.1
puremagic==1.28
pydantic==2.11.3
pydantic_core==2.33.1
pydot==3.0.4
PyMuPDF==1.25.5
pyparsing==3.2.3
python-dateutil==2.9.0.post0
python-dotenv==1.1.0
pytz==2025.2
pyxnat==1.6.3
rdflib==6.3.2
requests==2.32.3
scipy==1.15.2
simplejson==3.20.1
six==1.17.0
sniffio==1.3.1
tqdm==4.67.1
traits==7.0.2
typing-inspection==0.4.0
typing_extensions==4.13.2
tzdata==2025.2
urllib3==2.4.0