From b88eb0f4ce90e0b8483470f8e3ac7519c5f6c41c Mon Sep 17 00:00:00 2001 From: Jaronim Pracht Date: Sat, 14 Jun 2025 16:48:20 +0200 Subject: [PATCH] Add PDF text highlighting to PDF viewer --- .../src/components/KennzahlenTable.tsx | 7 +- project/frontend/src/components/pdfViewer.tsx | 103 ++++++++++- project/frontend/src/main.tsx | 2 + .../src/routes/extractedResult.$pitchBook.tsx | 20 ++- .../extractedResult_.$pitchBook.$kpi.tsx | 165 ++++++++++-------- project/frontend/src/util/highlighting.ts | 12 ++ 6 files changed, 225 insertions(+), 84 deletions(-) create mode 100644 project/frontend/src/util/highlighting.ts diff --git a/project/frontend/src/components/KennzahlenTable.tsx b/project/frontend/src/components/KennzahlenTable.tsx index 406798e..59138d6 100644 --- a/project/frontend/src/components/KennzahlenTable.tsx +++ b/project/frontend/src/components/KennzahlenTable.tsx @@ -22,7 +22,7 @@ import type { KeyboardEvent } from "react"; import { fetchPutKPI } from "../util/api"; interface KennzahlenTableProps { - onPageClick?: (page: number) => void; + onPageClick?: (page: number, text: string) => void; pdfId: string; settings: Kennzahl[]; data: { @@ -286,7 +286,10 @@ export default function KennzahlenTable({ - onPageClick?.(Number(row.extractedValues.at(0)?.page)) + onPageClick?.( + Number(row.extractedValues.at(0)?.page), + row.extractedValues.at(0)?.entity, + ) } sx={{ cursor: "pointer" }} > diff --git a/project/frontend/src/components/pdfViewer.tsx b/project/frontend/src/components/pdfViewer.tsx index 5abc1f4..b39421d 100644 --- a/project/frontend/src/components/pdfViewer.tsx +++ b/project/frontend/src/components/pdfViewer.tsx @@ -1,28 +1,42 @@ -import { useEffect, useRef, useState } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; import { Document, Page } from "react-pdf"; import "react-pdf/dist/esm/Page/AnnotationLayer.css"; import "react-pdf/dist/esm/Page/TextLayer.css"; import ArrowCircleLeftIcon from "@mui/icons-material/ArrowCircleLeft"; import ArrowCircleRightIcon from "@mui/icons-material/ArrowCircleRight"; import { Box, IconButton } from "@mui/material"; +import type { + CustomTextRenderer, + OnGetTextSuccess, +} from "node_modules/react-pdf/dist/esm/shared/types"; import { socket } from "../socket"; +import { highlightPattern } from "../util/highlighting"; interface PDFViewerProps { pitchBookId: string; currentPage?: number; onPageChange?: (page: number) => void; + highlight: { text: string; page: number }[]; + focusHighlight: { text: string; page: number }; } export default function PDFViewer({ pitchBookId, currentPage, onPageChange, + highlight = [], + focusHighlight, }: PDFViewerProps) { const [numPages, setNumPages] = useState(null); const [pageNumber, setPageNumber] = useState(currentPage || 1); const [containerWidth, setContainerWidth] = useState(null); const [pdfKey, setPdfKey] = useState(Date.now()); const containerRef = useRef(null); + const [posHighlight, setPosHighlight] = useState([]); + const [posHighlightFocus, setPosHighlightFocus] = useState([]); + const [textContent, setTextContent] = useState< + { posKey: string; text: string; i: number }[] + >([]); const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => { setNumPages(numPages); @@ -44,7 +58,7 @@ export default function PDFViewer({ if (currentPage && currentPage !== pageNumber) { setPageNumber(currentPage); } - }, [currentPage]); + }, [currentPage, pageNumber]); useEffect(() => { const handleProgress = (data: { id: number; progress: number }) => { @@ -65,6 +79,78 @@ export default function PDFViewer({ onPageChange?.(newPage); }; + const textRenderer: CustomTextRenderer = useCallback( + (textItem) => { + return highlightPattern( + textItem.str, + `${textItem.width};${textItem.height};${textItem.transform}`, + posHighlight, + posHighlightFocus, + ); + }, + [posHighlight, posHighlightFocus], + ); + + useEffect(() => { + const tmpPos: string[] = []; + const tmpPosHighlight: string[] = []; + const textItems = textContent.filter( + (e) => e.text !== "" && e.text !== " ", + ); + + textItems.forEach((e, i) => { + for (const s of highlight + .filter((h) => h.page === pageNumber) + .map((h) => h.text)) { + if (s.split(" ")[0] === e.text) { + if ( + s.split(" ").reduce((prev, curr, j) => { + return prev && curr === textItems[i + j].text; + }, true) + ) { + for ( + let k = textItems[i].i; + k < textItems[i + s.split(" ").length].i; + k++ + ) { + tmpPos.push(textContent[k].posKey); + } + } + } + } + + if (focusHighlight?.page === pageNumber) { + if (focusHighlight.text.split(" ")[0] === e.text) { + if ( + focusHighlight.text.split(" ").reduce((prev, curr, j) => { + return prev && curr === textItems[i + j].text; + }, true) + ) { + for ( + let k = textItems[i].i; + k < textItems[i + focusHighlight.text.split(" ").length].i; + k++ + ) { + tmpPosHighlight.push(textContent[k].posKey); + } + } + } + } + }); + setPosHighlight(tmpPos); + setPosHighlightFocus(tmpPosHighlight); + }, [highlight, focusHighlight, pageNumber, textContent]); + + const onGetTextSuccess: OnGetTextSuccess = useCallback((fullText) => { + setTextContent( + fullText.items.map((e, i) => ({ + posKey: `${"width" in e ? e.width : 0};${"height" in e ? e.height : 0};${"transform" in e ? e.transform : ""}`, + text: "str" in e ? e.str : "", + i, + })), + ); + }, []); + return ( console.error("Ungültige PDF:", error)} > {containerWidth && ( - + )} @@ -113,8 +204,8 @@ export default function PDFViewer({ - {pageNumber} / {numPages} - + {pageNumber} / {numPages} + = (numPages || 1)} onClick={() => handlePageChange(pageNumber + 1)} @@ -124,4 +215,4 @@ export default function PDFViewer({ ); -} \ No newline at end of file +} diff --git a/project/frontend/src/main.tsx b/project/frontend/src/main.tsx index 0f5071d..bffa8aa 100644 --- a/project/frontend/src/main.tsx +++ b/project/frontend/src/main.tsx @@ -3,6 +3,7 @@ import { ThemeProvider, createTheme } from "@mui/material/styles"; import { RouterProvider, createRouter } from "@tanstack/react-router"; import { StrictMode } from "react"; import ReactDOM from "react-dom/client"; +import "react-pdf/dist/Page/TextLayer.css"; import "@fontsource/roboto/300.css"; import "@fontsource/roboto/400.css"; @@ -12,6 +13,7 @@ import "@fontsource/roboto/700.css"; import * as TanStackQueryProvider from "./integrations/tanstack-query/root-provider.tsx"; import { pdfjs } from "react-pdf"; + // Import the generated route tree import { routeTree } from "./routeTree.gen"; diff --git a/project/frontend/src/routes/extractedResult.$pitchBook.tsx b/project/frontend/src/routes/extractedResult.$pitchBook.tsx index 6878542..bee005d 100644 --- a/project/frontend/src/routes/extractedResult.$pitchBook.tsx +++ b/project/frontend/src/routes/extractedResult.$pitchBook.tsx @@ -2,7 +2,7 @@ import ContentPasteIcon from "@mui/icons-material/ContentPaste"; import { Box, Button, Paper, Typography } from "@mui/material"; import { useSuspenseQuery } from "@tanstack/react-query"; import { createFileRoute, useNavigate } from "@tanstack/react-router"; -import { useState } from "react"; +import { useCallback, useState } from "react"; import KennzahlenTable from "../components/KennzahlenTable"; import PDFViewer from "../components/pdfViewer"; import { kpiQueryOptions, settingsQueryOptions } from "../util/query"; @@ -21,6 +21,15 @@ function ExtractedResultsPage() { const navigate = useNavigate(); const status: "green" | "yellow" | "red" = "red"; const [currentPage, setCurrentPage] = useState(1); + const [focusHighlight, setFocusHighlight] = useState({ + page: 5, + text: "Langjährige", + }); + + const onSiteClick = useCallback((page: number, entity: string) => { + setCurrentPage(page); + setFocusHighlight({ page, text: entity }); + }, []); const statusColor = { red: "#f43131", @@ -72,7 +81,7 @@ function ExtractedResultsPage() { > @@ -105,6 +114,13 @@ function ExtractedResultsPage() { pitchBookId={pitchBook} currentPage={currentPage} onPageChange={setCurrentPage} + highlight={Object.values(kpi) + .map((item) => item.at(0)) + .map((item) => ({ + page: item?.page || -1, + text: item?.entity || "", + }))} + focusHighlight={focusHighlight} /> { setHasChanges(selectedValue !== originalValue); @@ -56,10 +62,12 @@ function ExtractedResultsPage() { const { mutate: updateKPI } = useMutation({ mutationFn: () => { const updatedData = { ...kpiData }; - updatedData[kpi.toUpperCase()] = [{ - ...kpiValues[0], - entity: selectedValue - }]; + updatedData[kpi.toUpperCase()] = [ + { + ...kpiValues[0], + entity: selectedValue, + }, + ]; return fetchPutKPI(Number(pitchBook), updatedData); }, onSuccess: () => { @@ -68,26 +76,28 @@ function ExtractedResultsPage() { }); navigate({ to: "/extractedResult/$pitchBook", - params: { pitchBook } + params: { pitchBook }, }); }, onError: (error) => { - console.error('Error updating KPI:', error); - } + console.error("Error updating KPI:", error); + }, }); const handleRadioChange = (event: React.ChangeEvent) => { const value = event.target.value; - if (value === 'custom') { + if (value === "custom") { setSelectedIndex(-1); } else { - const index = parseInt(value); + const index = Number.parseInt(value); setSelectedIndex(index); - setCustomValue(''); + setCustomValue(""); } }; - const handleCustomValueChange = (event: React.ChangeEvent) => { + const handleCustomValueChange = ( + event: React.ChangeEvent, + ) => { const value = event.target.value; setCustomValue(value); setSelectedIndex(-1); @@ -95,7 +105,7 @@ function ExtractedResultsPage() { const handleRowClick = (index: number) => { setSelectedIndex(index); - setCustomValue(''); + setCustomValue(""); }; const handleBackClick = () => { @@ -104,7 +114,7 @@ function ExtractedResultsPage() { } else { navigate({ to: "/extractedResult/$pitchBook", - params: { pitchBook } + params: { pitchBook }, }); } }; @@ -113,7 +123,7 @@ function ExtractedResultsPage() { setShowConfirmDialog(false); navigate({ to: "/extractedResult/$pitchBook", - params: { pitchBook } + params: { pitchBook }, }); }; @@ -127,9 +137,9 @@ function ExtractedResultsPage() { return ( - + - + Überprüfung der Kennzahl: {kpi} @@ -175,8 +185,8 @@ function ExtractedResultsPage() { handleRowClick(index)} > @@ -184,14 +194,14 @@ function ExtractedResultsPage() { {item.entity} @@ -218,7 +228,7 @@ function ExtractedResultsPage() { e.stopPropagation(); setCurrentPage(item.page); }} - sx={{ cursor: 'pointer' }} + sx={{ cursor: "pointer" }} > {item.page} @@ -230,14 +240,14 @@ function ExtractedResultsPage() { { setSelectedIndex(-1); @@ -245,16 +255,16 @@ function ExtractedResultsPage() { > { e.stopPropagation(); @@ -300,13 +310,20 @@ function ExtractedResultsPage() { display: "flex", flexDirection: "column", overflow: "auto", - padding: 2 + padding: 2, }} > ({ page: k.page, text: k.entity }))} + focusHighlight={{ + page: kpiValues.at(selectedIndex)?.page || -1, + text: kpiValues.at(selectedIndex)?.entity || "", + }} /> @@ -315,9 +332,9 @@ function ExtractedResultsPage() { onClick={handleAcceptReview} disabled={!selectedValue} sx={{ - backgroundColor: '#383838', - '&:hover': { backgroundColor: '#2e2e2e' }, - '&.Mui-disabled': { backgroundColor: '#ccc' } + backgroundColor: "#383838", + "&:hover": { backgroundColor: "#2e2e2e" }, + "&.Mui-disabled": { backgroundColor: "#ccc" }, }} > Überprüfung Annehmen @@ -332,11 +349,11 @@ function ExtractedResultsPage() { maxWidth="sm" fullWidth > - + Achtung - + Alle vorgenommenen Änderungen werden verworfen. @@ -345,9 +362,9 @@ function ExtractedResultsPage() { onClick={handleCancelDiscard} variant="outlined" sx={{ - color: '#666', - borderColor: '#ddd', - '&:hover': { backgroundColor: '#f5f5f5' } + color: "#666", + borderColor: "#ddd", + "&:hover": { backgroundColor: "#f5f5f5" }, }} > Abbrechen @@ -356,8 +373,8 @@ function ExtractedResultsPage() { onClick={handleConfirmDiscard} variant="contained" sx={{ - backgroundColor: '#383838', - '&:hover': { backgroundColor: '#2e2e2e' } + backgroundColor: "#383838", + "&:hover": { backgroundColor: "#2e2e2e" }, }} > Bestätigen @@ -366,4 +383,4 @@ function ExtractedResultsPage() { ); -} \ No newline at end of file +} diff --git a/project/frontend/src/util/highlighting.ts b/project/frontend/src/util/highlighting.ts new file mode 100644 index 0000000..19a106d --- /dev/null +++ b/project/frontend/src/util/highlighting.ts @@ -0,0 +1,12 @@ +export const highlightPattern = ( + text: string, + widthHeight: string, + pos: string[], + posFocus: string[], +) => { + if (posFocus.includes(widthHeight)) { + return `${text}`; + } + + return pos.includes(widthHeight) ? `${text}` : text; +};