Add PDF text highlighting to PDF viewer

pull/68/head
Jaronim Pracht 2025-06-14 16:48:20 +02:00
parent 082317318a
commit b88eb0f4ce
6 changed files with 225 additions and 84 deletions

View File

@ -22,7 +22,7 @@ import type { KeyboardEvent } from "react";
import { fetchPutKPI } from "../util/api";
interface KennzahlenTableProps {
onPageClick?: (page: number) => void;
onPageClick?: (page: number, text: string) => void;
pdfId: string;
settings: Kennzahl[];
data: {
@ -286,7 +286,10 @@ export default function KennzahlenTable({
<Link
component="button"
onClick={() =>
onPageClick?.(Number(row.extractedValues.at(0)?.page))
onPageClick?.(
Number(row.extractedValues.at(0)?.page),
row.extractedValues.at(0)?.entity,
)
}
sx={{ cursor: "pointer" }}
>

View File

@ -1,28 +1,42 @@
import { useEffect, useRef, useState } from "react";
import { useCallback, useEffect, useRef, useState } from "react";
import { Document, Page } from "react-pdf";
import "react-pdf/dist/esm/Page/AnnotationLayer.css";
import "react-pdf/dist/esm/Page/TextLayer.css";
import ArrowCircleLeftIcon from "@mui/icons-material/ArrowCircleLeft";
import ArrowCircleRightIcon from "@mui/icons-material/ArrowCircleRight";
import { Box, IconButton } from "@mui/material";
import type {
CustomTextRenderer,
OnGetTextSuccess,
} from "node_modules/react-pdf/dist/esm/shared/types";
import { socket } from "../socket";
import { highlightPattern } from "../util/highlighting";
interface PDFViewerProps {
pitchBookId: string;
currentPage?: number;
onPageChange?: (page: number) => void;
highlight: { text: string; page: number }[];
focusHighlight: { text: string; page: number };
}
export default function PDFViewer({
pitchBookId,
currentPage,
onPageChange,
highlight = [],
focusHighlight,
}: PDFViewerProps) {
const [numPages, setNumPages] = useState<number | null>(null);
const [pageNumber, setPageNumber] = useState(currentPage || 1);
const [containerWidth, setContainerWidth] = useState<number | null>(null);
const [pdfKey, setPdfKey] = useState(Date.now());
const containerRef = useRef<HTMLDivElement>(null);
const [posHighlight, setPosHighlight] = useState<string[]>([]);
const [posHighlightFocus, setPosHighlightFocus] = useState<string[]>([]);
const [textContent, setTextContent] = useState<
{ posKey: string; text: string; i: number }[]
>([]);
const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
setNumPages(numPages);
@ -44,7 +58,7 @@ export default function PDFViewer({
if (currentPage && currentPage !== pageNumber) {
setPageNumber(currentPage);
}
}, [currentPage]);
}, [currentPage, pageNumber]);
useEffect(() => {
const handleProgress = (data: { id: number; progress: number }) => {
@ -65,6 +79,78 @@ export default function PDFViewer({
onPageChange?.(newPage);
};
const textRenderer: CustomTextRenderer = useCallback(
(textItem) => {
return highlightPattern(
textItem.str,
`${textItem.width};${textItem.height};${textItem.transform}`,
posHighlight,
posHighlightFocus,
);
},
[posHighlight, posHighlightFocus],
);
useEffect(() => {
const tmpPos: string[] = [];
const tmpPosHighlight: string[] = [];
const textItems = textContent.filter(
(e) => e.text !== "" && e.text !== " ",
);
textItems.forEach((e, i) => {
for (const s of highlight
.filter((h) => h.page === pageNumber)
.map((h) => h.text)) {
if (s.split(" ")[0] === e.text) {
if (
s.split(" ").reduce((prev, curr, j) => {
return prev && curr === textItems[i + j].text;
}, true)
) {
for (
let k = textItems[i].i;
k < textItems[i + s.split(" ").length].i;
k++
) {
tmpPos.push(textContent[k].posKey);
}
}
}
}
if (focusHighlight?.page === pageNumber) {
if (focusHighlight.text.split(" ")[0] === e.text) {
if (
focusHighlight.text.split(" ").reduce((prev, curr, j) => {
return prev && curr === textItems[i + j].text;
}, true)
) {
for (
let k = textItems[i].i;
k < textItems[i + focusHighlight.text.split(" ").length].i;
k++
) {
tmpPosHighlight.push(textContent[k].posKey);
}
}
}
}
});
setPosHighlight(tmpPos);
setPosHighlightFocus(tmpPosHighlight);
}, [highlight, focusHighlight, pageNumber, textContent]);
const onGetTextSuccess: OnGetTextSuccess = useCallback((fullText) => {
setTextContent(
fullText.items.map((e, i) => ({
posKey: `${"width" in e ? e.width : 0};${"height" in e ? e.height : 0};${"transform" in e ? e.transform : ""}`,
text: "str" in e ? e.str : "",
i,
})),
);
}, []);
return (
<Box
display="flex"
@ -94,7 +180,12 @@ export default function PDFViewer({
onSourceError={(error) => console.error("Ungültige PDF:", error)}
>
{containerWidth && (
<Page pageNumber={pageNumber} width={containerWidth * 0.98} />
<Page
pageNumber={pageNumber}
width={containerWidth * 0.98}
customTextRenderer={textRenderer}
onGetTextSuccess={onGetTextSuccess}
/>
)}
</Document>
</Box>
@ -113,8 +204,8 @@ export default function PDFViewer({
<ArrowCircleLeftIcon fontSize="large" />
</IconButton>
<span>
{pageNumber} / {numPages}
</span>
{pageNumber} / {numPages}
</span>
<IconButton
disabled={pageNumber >= (numPages || 1)}
onClick={() => handlePageChange(pageNumber + 1)}

View File

@ -3,6 +3,7 @@ import { ThemeProvider, createTheme } from "@mui/material/styles";
import { RouterProvider, createRouter } from "@tanstack/react-router";
import { StrictMode } from "react";
import ReactDOM from "react-dom/client";
import "react-pdf/dist/Page/TextLayer.css";
import "@fontsource/roboto/300.css";
import "@fontsource/roboto/400.css";
@ -12,6 +13,7 @@ import "@fontsource/roboto/700.css";
import * as TanStackQueryProvider from "./integrations/tanstack-query/root-provider.tsx";
import { pdfjs } from "react-pdf";
// Import the generated route tree
import { routeTree } from "./routeTree.gen";

View File

@ -2,7 +2,7 @@ import ContentPasteIcon from "@mui/icons-material/ContentPaste";
import { Box, Button, Paper, Typography } from "@mui/material";
import { useSuspenseQuery } from "@tanstack/react-query";
import { createFileRoute, useNavigate } from "@tanstack/react-router";
import { useState } from "react";
import { useCallback, useState } from "react";
import KennzahlenTable from "../components/KennzahlenTable";
import PDFViewer from "../components/pdfViewer";
import { kpiQueryOptions, settingsQueryOptions } from "../util/query";
@ -21,6 +21,15 @@ function ExtractedResultsPage() {
const navigate = useNavigate();
const status: "green" | "yellow" | "red" = "red";
const [currentPage, setCurrentPage] = useState(1);
const [focusHighlight, setFocusHighlight] = useState({
page: 5,
text: "Langjährige",
});
const onSiteClick = useCallback((page: number, entity: string) => {
setCurrentPage(page);
setFocusHighlight({ page, text: entity });
}, []);
const statusColor = {
red: "#f43131",
@ -72,7 +81,7 @@ function ExtractedResultsPage() {
>
<KennzahlenTable
settings={settings}
onPageClick={setCurrentPage}
onPageClick={onSiteClick}
data={kpi}
pdfId={pitchBook}
/>
@ -105,6 +114,13 @@ function ExtractedResultsPage() {
pitchBookId={pitchBook}
currentPage={currentPage}
onPageChange={setCurrentPage}
highlight={Object.values(kpi)
.map((item) => item.at(0))
.map((item) => ({
page: item?.page || -1,
text: item?.entity || "",
}))}
focusHighlight={focusHighlight}
/>
</Paper>
<Box

View File

@ -1,6 +1,10 @@
import ArrowBackIcon from "@mui/icons-material/ArrowBack";
import {
Box,
Button, Dialog, DialogActions, DialogContent,
Button,
Dialog,
DialogActions,
DialogContent,
DialogContentText,
DialogTitle,
IconButton,
@ -14,15 +18,18 @@ import {
TableHead,
TableRow,
TextField,
Typography
Typography,
} from "@mui/material";
import {useMutation, useQueryClient, useSuspenseQuery} from "@tanstack/react-query";
import {
useMutation,
useQueryClient,
useSuspenseQuery,
} from "@tanstack/react-query";
import { createFileRoute, useNavigate } from "@tanstack/react-router";
import {useEffect, useState} from "react";
import { useEffect, useState } from "react";
import PDFViewer from "../components/pdfViewer";
import { fetchPutKPI } from "../util/api";
import { kpiQueryOptions } from "../util/query";
import ArrowBackIcon from "@mui/icons-material/ArrowBack";
import {fetchPutKPI} from "../util/api";
export const Route = createFileRoute("/extractedResult_/$pitchBook/$kpi")({
component: ExtractedResultsPage,
@ -36,18 +43,17 @@ function ExtractedResultsPage() {
const navigate = useNavigate();
const queryClient = useQueryClient();
const {
data: kpiData
} = useSuspenseQuery(kpiQueryOptions(pitchBook));
const { data: kpiData } = useSuspenseQuery(kpiQueryOptions(pitchBook));
const kpiValues = kpiData[kpi.toUpperCase()] || [];
const [selectedIndex, setSelectedIndex] = useState(0);
const [currentPage, setCurrentPage] = useState(kpiValues[0]?.page || 1);
const [showConfirmDialog, setShowConfirmDialog] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
const [customValue, setCustomValue] = useState('');
const originalValue = kpiValues[0]?.entity || '';
const selectedValue = selectedIndex === -1 ? customValue : (kpiValues[selectedIndex]?.entity || '');
const [customValue, setCustomValue] = useState("");
const originalValue = kpiValues[0]?.entity || "";
const selectedValue =
selectedIndex === -1 ? customValue : kpiValues[selectedIndex]?.entity || "";
useEffect(() => {
setHasChanges(selectedValue !== originalValue);
@ -56,10 +62,12 @@ function ExtractedResultsPage() {
const { mutate: updateKPI } = useMutation({
mutationFn: () => {
const updatedData = { ...kpiData };
updatedData[kpi.toUpperCase()] = [{
...kpiValues[0],
entity: selectedValue
}];
updatedData[kpi.toUpperCase()] = [
{
...kpiValues[0],
entity: selectedValue,
},
];
return fetchPutKPI(Number(pitchBook), updatedData);
},
onSuccess: () => {
@ -68,26 +76,28 @@ function ExtractedResultsPage() {
});
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook }
params: { pitchBook },
});
},
onError: (error) => {
console.error('Error updating KPI:', error);
}
console.error("Error updating KPI:", error);
},
});
const handleRadioChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const value = event.target.value;
if (value === 'custom') {
if (value === "custom") {
setSelectedIndex(-1);
} else {
const index = parseInt(value);
const index = Number.parseInt(value);
setSelectedIndex(index);
setCustomValue('');
setCustomValue("");
}
};
const handleCustomValueChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const handleCustomValueChange = (
event: React.ChangeEvent<HTMLInputElement>,
) => {
const value = event.target.value;
setCustomValue(value);
setSelectedIndex(-1);
@ -95,7 +105,7 @@ function ExtractedResultsPage() {
const handleRowClick = (index: number) => {
setSelectedIndex(index);
setCustomValue('');
setCustomValue("");
};
const handleBackClick = () => {
@ -104,7 +114,7 @@ function ExtractedResultsPage() {
} else {
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook }
params: { pitchBook },
});
}
};
@ -113,7 +123,7 @@ function ExtractedResultsPage() {
setShowConfirmDialog(false);
navigate({
to: "/extractedResult/$pitchBook",
params: { pitchBook }
params: { pitchBook },
});
};
@ -127,9 +137,9 @@ function ExtractedResultsPage() {
return (
<Box p={4}>
<Box sx={{ display: 'flex', alignItems: 'center', mb: 3 }}>
<Box sx={{ display: "flex", alignItems: "center", mb: 3 }}>
<IconButton onClick={handleBackClick} sx={{ mr: 2 }}>
<ArrowBackIcon fontSize="large" sx={{ color: '#383838' }} />
<ArrowBackIcon fontSize="large" sx={{ color: "#383838" }} />
</IconButton>
<Typography variant="h5" fontWeight="bold">
Überprüfung der Kennzahl: {kpi}
@ -175,8 +185,8 @@ function ExtractedResultsPage() {
<TableRow
key={`${item.entity}_${item.page}_${index}`}
sx={{
'&:hover': { backgroundColor: '#f9f9f9' },
cursor: 'pointer'
"&:hover": { backgroundColor: "#f9f9f9" },
cursor: "pointer",
}}
onClick={() => handleRowClick(index)}
>
@ -184,14 +194,14 @@ function ExtractedResultsPage() {
<Box
sx={{
borderRadius: 1,
padding: '4px 8px',
display: 'flex',
alignItems: 'center',
width: '100%',
cursor: 'pointer',
'&:hover': {
borderColor: '#ccc'
}
padding: "4px 8px",
display: "flex",
alignItems: "center",
width: "100%",
cursor: "pointer",
"&:hover": {
borderColor: "#ccc",
},
}}
>
<Radio
@ -199,13 +209,13 @@ function ExtractedResultsPage() {
checked={selectedIndex === index}
onChange={handleRadioChange}
sx={{
color: '#383838',
'&.Mui-checked': { color: '#383838' },
padding: '4px',
color: "#383838",
"&.Mui-checked": { color: "#383838" },
padding: "4px",
marginRight: 1,
'&:focus': {
outline: 'none'
}
"&:focus": {
outline: "none",
},
}}
/>
<span>{item.entity}</span>
@ -218,7 +228,7 @@ function ExtractedResultsPage() {
e.stopPropagation();
setCurrentPage(item.page);
}}
sx={{ cursor: 'pointer' }}
sx={{ cursor: "pointer" }}
>
{item.page}
</Link>
@ -230,14 +240,14 @@ function ExtractedResultsPage() {
<TableCell>
<Box
sx={{
padding: '4px 8px',
display: 'flex',
alignItems: 'center',
width: '100%',
cursor: 'pointer',
'&:hover': {
borderColor: '#ccc'
}
padding: "4px 8px",
display: "flex",
alignItems: "center",
width: "100%",
cursor: "pointer",
"&:hover": {
borderColor: "#ccc",
},
}}
onClick={() => {
setSelectedIndex(-1);
@ -245,16 +255,16 @@ function ExtractedResultsPage() {
>
<Radio
value="custom"
checked={selectedIndex === -1 && customValue !== ''}
checked={selectedIndex === -1 && customValue !== ""}
onChange={handleRadioChange}
sx={{
color: '#383838',
'&.Mui-checked': { color: '#383838' },
padding: '4px',
color: "#383838",
"&.Mui-checked": { color: "#383838" },
padding: "4px",
marginRight: 1,
'&:focus': {
outline: 'none'
}
"&:focus": {
outline: "none",
},
}}
/>
<TextField
@ -267,9 +277,9 @@ function ExtractedResultsPage() {
disableUnderline: true,
}}
sx={{
'& .MuiInput-input': {
"& .MuiInput-input": {
padding: 0,
}
},
}}
onClick={(e: React.MouseEvent) => {
e.stopPropagation();
@ -300,13 +310,20 @@ function ExtractedResultsPage() {
display: "flex",
flexDirection: "column",
overflow: "auto",
padding: 2
padding: 2,
}}
>
<PDFViewer
pitchBookId={pitchBook}
currentPage={currentPage}
onPageChange={setCurrentPage}
highlight={Object.values(kpiValues)
.flat()
.map((k) => ({ page: k.page, text: k.entity }))}
focusHighlight={{
page: kpiValues.at(selectedIndex)?.page || -1,
text: kpiValues.at(selectedIndex)?.entity || "",
}}
/>
</Paper>
<Box mt={2} display="flex" justifyContent="flex-end" gap={2}>
@ -315,9 +332,9 @@ function ExtractedResultsPage() {
onClick={handleAcceptReview}
disabled={!selectedValue}
sx={{
backgroundColor: '#383838',
'&:hover': { backgroundColor: '#2e2e2e' },
'&.Mui-disabled': { backgroundColor: '#ccc' }
backgroundColor: "#383838",
"&:hover": { backgroundColor: "#2e2e2e" },
"&.Mui-disabled": { backgroundColor: "#ccc" },
}}
>
Überprüfung Annehmen
@ -332,11 +349,11 @@ function ExtractedResultsPage() {
maxWidth="sm"
fullWidth
>
<DialogTitle sx={{ fontSize: '1.25rem', fontWeight: 'bold' }}>
<DialogTitle sx={{ fontSize: "1.25rem", fontWeight: "bold" }}>
Achtung
</DialogTitle>
<DialogContent>
<DialogContentText sx={{ fontSize: '1rem' }}>
<DialogContentText sx={{ fontSize: "1rem" }}>
Alle vorgenommenen Änderungen werden verworfen.
</DialogContentText>
</DialogContent>
@ -345,9 +362,9 @@ function ExtractedResultsPage() {
onClick={handleCancelDiscard}
variant="outlined"
sx={{
color: '#666',
borderColor: '#ddd',
'&:hover': { backgroundColor: '#f5f5f5' }
color: "#666",
borderColor: "#ddd",
"&:hover": { backgroundColor: "#f5f5f5" },
}}
>
Abbrechen
@ -356,8 +373,8 @@ function ExtractedResultsPage() {
onClick={handleConfirmDiscard}
variant="contained"
sx={{
backgroundColor: '#383838',
'&:hover': { backgroundColor: '#2e2e2e' }
backgroundColor: "#383838",
"&:hover": { backgroundColor: "#2e2e2e" },
}}
>
Bestätigen

View File

@ -0,0 +1,12 @@
export const highlightPattern = (
text: string,
widthHeight: string,
pos: string[],
posFocus: string[],
) => {
if (posFocus.includes(widthHeight)) {
return `<mark style="background-color: orange;">${text}</mark>`;
}
return pos.includes(widthHeight) ? `<mark>${text}</mark>` : text;
};