Neue Prototyp-Dateien hinzugefügt

2025-04-28 14:49:45 +02:00 · 2025-04-28 14:49:45 +02:00 · 89bbc68c2a
parent 14b66a31b8
commit 89bbc68c2a
2 changed files with 87 additions and 0 deletions
--- a/prototypes/PyMuPdf/PyMuPdf_st.py
+++ b/prototypes/PyMuPdf/PyMuPdf_st.py
@ -0,0 +1,64 @@
+#########################################################
+#Run: in Terminal -> streamlit run PyMuPdf_st.py  
+#########################################################
+
+import streamlit as st
+import fitz  # PyMuPDF
+from PIL import Image, ImageDraw
+import io
+
+st.title("🔍 PDF Kennzahlen-Finder")
+
+# PDF hochladen
+uploaded_file = st.file_uploader("PDF hochladen", type="pdf")
+
+# Suchwort eingeben
+suchwort = st.text_input("Suchwort (z. B. wie)", value="wie")
+
+if uploaded_file and suchwort:
+    # PDF öffnen
+    pdf_bytes = uploaded_file.read()
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+
+    fundstellen = []
+
+    # Suche durch alle Seiten
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        rects = page.search_for(suchwort)
+
+        for rect in rects:
+            fundstellen.append({
+                "seite": page_num,
+                "rect": rect
+            })
+
+    if fundstellen:
+        st.success(f"🔎 {len(fundstellen)} Fundstelle(n) für „{suchwort}“ gefunden.")
+
+        # Auswahl der Fundstelle
+        auswahl = st.selectbox(
+            "Fundstelle auswählen:",
+            [f"Seite {f['seite'] + 1}" for f in fundstellen]
+        )
+
+        index = [f"Seite {f['seite'] + 1}" for f in fundstellen].index(auswahl)
+        fund = fundstellen[index]
+        seite = doc.load_page(fund["seite"])
+        rect = fund["rect"]
+
+        # Seite als Bild rendern
+        zoom = 2  # Qualität
+        pix = seite.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+
+        # Markierung zeichnen
+        draw = ImageDraw.Draw(img)
+        scale = pix.width / seite.rect.width
+        r = rect
+        rect_scaled = [r.x0 * scale, r.y0 * scale, r.x1 * scale, r.y1 * scale]
+        draw.rectangle(rect_scaled, outline="red", width=3)
+
+        st.image(img, caption=f"Markierte Fundstelle auf Seite {fund['seite'] + 1}")
+    else:
+        st.warning("Keine Fundstellen gefunden.")
--- a/prototypes/pdfplumber/tabellentext_holen.py
+++ b/prototypes/pdfplumber/tabellentext_holen.py
@ -0,0 +1,23 @@
+import pdfplumber  
+
+pdf_path = "\pse2_ff\pitch-books\Teaser 2 FINAL.pdf"
+
+# with pdfplumber.open(pdf_path) as pdf:
+#     # Access the pages
+#     for page in pdf.pages:
+#         # Extract all text from the page with detailed positional data
+#         page_text = page.extract_text(layout=True)
+
+#         # Print the extracted text with preserved structure
+#         print(f"Page {page.page_number}:\n{page_text}\n")
+with pdfplumber.open(pdf_path) as pdf:  
+
+    for i, page in enumerate(pdf.pages):
+        tables = page.extract_tables()
+
+        if tables:
+            print(f"\n Tabellen auf Seite {i + 1} gefunden:")
+            for t_index, table in enumerate(tables):
+                print(f"\nTabelle {t_index + 1}:\n")
+                for row in table:
+                    print(row)