浏览代码

feat: add Preview with react-pdf-highlighter (#89)

* feat: add selected style to chunk item

* feat: hightlight pdf

* feat: add Preview with react-pdf-highlighter
tags/v0.1.0
balibabu 1年前
父节点
当前提交
7f174fb9d3
没有帐户链接到提交者的电子邮件

+ 96
- 2
web/package-lock.json 查看文件

@@ -24,6 +24,7 @@
"react-infinite-scroll-component": "^6.1.0",
"react-markdown": "^9.0.1",
"react-pdf": "^7.7.1",
"react-pdf-highlighter": "^6.1.0",
"react-string-replace": "^1.1.1",
"umi": "^4.0.90",
"umi-request": "^1.4.0",
@@ -7264,6 +7265,12 @@
"node": ">= 4"
}
},
"node_modules/dommatrix": {
"version": "1.0.3",
"resolved": "https://registry.npmmirror.com/dommatrix/-/dommatrix-1.0.3.tgz",
"integrity": "sha512-l32Xp/TLgWb8ReqbVJAFIvXmY7go4nTxxlWiAFyhoQw9RKEOHBZNnyGvJWqDVSPmq3Y9HlM4npqF/T6VMOXhww==",
"deprecated": "dommatrix is no longer maintained. Please use @thednp/dommatrix."
},
"node_modules/domutils": {
"version": "2.8.0",
"resolved": "https://registry.npmmirror.com/domutils/-/domutils-2.8.0.tgz",
@@ -8562,6 +8569,11 @@
"integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
"peer": true
},
"node_modules/fast-memoize": {
"version": "2.5.2",
"resolved": "https://registry.npmmirror.com/fast-memoize/-/fast-memoize-2.5.2.tgz",
"integrity": "sha512-Ue0LwpDYErFbmNnZSF0UH6eImUwDmogUO1jyE+JbN2gsQz/jICm1Ve7t9QT0rNSsfJt+Hs4/S3GnsDVjL4HVrw=="
},
"node_modules/fast-redact": {
"version": "3.3.0",
"resolved": "https://registry.npmmirror.com/fast-redact/-/fast-redact-3.3.0.tgz",
@@ -11209,8 +11221,7 @@
"node_modules/lodash.debounce": {
"version": "4.0.8",
"resolved": "https://registry.npmmirror.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
"integrity": "sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==",
"dev": true
"integrity": "sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow=="
},
"node_modules/lodash.merge": {
"version": "4.6.2",
@@ -14417,6 +14428,18 @@
"react-dom": "*"
}
},
"node_modules/re-resizable": {
"version": "6.9.6",
"resolved": "https://registry.npmmirror.com/re-resizable/-/re-resizable-6.9.6.tgz",
"integrity": "sha512-0xYKS5+Z0zk+vICQlcZW+g54CcJTTmHluA7JUUgvERDxnKAnytylcyPsA+BSFi759s5hPlHmBRegFrwXs2FuBQ==",
"dependencies": {
"fast-memoize": "^2.5.1"
},
"peerDependencies": {
"react": "^16.13.1 || ^17.0.0 || ^18.0.0",
"react-dom": "^16.13.1 || ^17.0.0 || ^18.0.0"
}
},
"node_modules/react": {
"version": "18.2.0",
"resolved": "https://registry.npmmirror.com/react/-/react-18.2.0.tgz",
@@ -14736,6 +14759,27 @@
"react": "^18.2.0"
}
},
"node_modules/react-draggable": {
"version": "4.4.5",
"resolved": "https://registry.npmmirror.com/react-draggable/-/react-draggable-4.4.5.tgz",
"integrity": "sha512-OMHzJdyJbYTZo4uQE393fHcqqPYsEtkjfMgvCHr6rejT+Ezn4OZbNyGH50vv+SunC1RMvwOTSWkEODQLzw1M9g==",
"dependencies": {
"clsx": "^1.1.1",
"prop-types": "^15.8.1"
},
"peerDependencies": {
"react": ">= 16.3.0",
"react-dom": ">= 16.3.0"
}
},
"node_modules/react-draggable/node_modules/clsx": {
"version": "1.2.1",
"resolved": "https://registry.npmmirror.com/clsx/-/clsx-1.2.1.tgz",
"integrity": "sha512-EcR6r5a8bj6pu3ycsa/E/cKVGuTgZJZdsyUYHOksG/UHIiKfjxzRxYJpyVBwYaQeOvghal9fcc4PidlgzugAQg==",
"engines": {
"node": ">=6"
}
},
"node_modules/react-error-overlay": {
"version": "6.0.9",
"resolved": "https://registry.npmmirror.com/react-error-overlay/-/react-error-overlay-6.0.9.tgz",
@@ -14872,6 +14916,37 @@
}
}
},
"node_modules/react-pdf-highlighter": {
"version": "6.1.0",
"resolved": "https://registry.npmmirror.com/react-pdf-highlighter/-/react-pdf-highlighter-6.1.0.tgz",
"integrity": "sha512-PD7l+0q1v+pZahLA/2AeWIb0n8d1amL6o+mOKnldIqtyChBHSE3gfnY5ZNMSFrhWXdlM6l4Eet+aydnYo6Skow==",
"dependencies": {
"lodash.debounce": "^4.0.8",
"pdfjs-dist": "2.16.105",
"react-rnd": "^10.1.10"
},
"peerDependencies": {
"react": ">=18.0.0",
"react-dom": ">=18.0.0"
}
},
"node_modules/react-pdf-highlighter/node_modules/pdfjs-dist": {
"version": "2.16.105",
"resolved": "https://registry.npmmirror.com/pdfjs-dist/-/pdfjs-dist-2.16.105.tgz",
"integrity": "sha512-J4dn41spsAwUxCpEoVf6GVoz908IAA3mYiLmNxg8J9kfRXc2jxpbUepcP0ocp0alVNLFthTAM8DZ1RaHh8sU0A==",
"dependencies": {
"dommatrix": "^1.0.3",
"web-streams-polyfill": "^3.2.1"
},
"peerDependencies": {
"worker-loader": "^3.0.8"
},
"peerDependenciesMeta": {
"worker-loader": {
"optional": true
}
}
},
"node_modules/react-refresh": {
"version": "0.14.0",
"resolved": "https://registry.npmmirror.com/react-refresh/-/react-refresh-0.14.0.tgz",
@@ -14880,6 +14955,25 @@
"node": ">=0.10.0"
}
},
"node_modules/react-rnd": {
"version": "10.4.1",
"resolved": "https://registry.npmmirror.com/react-rnd/-/react-rnd-10.4.1.tgz",
"integrity": "sha512-0m887AjQZr6p2ADLNnipquqsDq4XJu/uqVqI3zuoGD19tRm6uB83HmZWydtkilNp5EWsOHbLGF4IjWMdd5du8Q==",
"dependencies": {
"re-resizable": "6.9.6",
"react-draggable": "4.4.5",
"tslib": "2.3.1"
},
"peerDependencies": {
"react": ">=16.3.0",
"react-dom": ">=16.3.0"
}
},
"node_modules/react-rnd/node_modules/tslib": {
"version": "2.3.1",
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.1.tgz",
"integrity": "sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw=="
},
"node_modules/react-router": {
"version": "6.3.0",
"resolved": "https://registry.npmmirror.com/react-router/-/react-router-6.3.0.tgz",

+ 1
- 0
web/package.json 查看文件

@@ -28,6 +28,7 @@
"react-infinite-scroll-component": "^6.1.0",
"react-markdown": "^9.0.1",
"react-pdf": "^7.7.1",
"react-pdf-highlighter": "^6.1.0",
"react-string-replace": "^1.1.1",
"umi": "^4.0.90",
"umi-request": "^1.4.0",

+ 1
- 0
web/src/less/variable.less 查看文件

@@ -7,6 +7,7 @@
@gray8: rgba(165, 163, 169, 1);
@gray11: rgba(232, 232, 234, 1);
@purple: rgba(127, 86, 217, 1);
@selectedBackgroundColor: rgba(239, 248, 255, 1);

@fontSize12: 12px;
@fontSize14: 14px;

+ 4
- 0
web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-card/index.less 查看文件

@@ -14,3 +14,7 @@
font-style: normal;
}
}

.cardSelected {
background-color: @selectedBackgroundColor;
}

+ 12
- 3
web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-card/index.tsx 查看文件

@@ -10,6 +10,8 @@ interface IProps {
switchChunk: (available?: number, chunkIds?: string[]) => void;
editChunk: (chunkId: string) => void;
handleCheckboxClick: (chunkId: string, checked: boolean) => void;
selected: boolean;
clickChunkCard: (chunkId: string) => void;
}

const ChunkCard = ({
@@ -18,6 +20,8 @@ const ChunkCard = ({
handleCheckboxClick,
editChunk,
switchChunk,
selected,
clickChunkCard,
}: IProps) => {
const available = Number(item.available_int);
const [enabled, setEnabled] = useState(available === 1);
@@ -31,13 +35,17 @@ const ChunkCard = ({
handleCheckboxClick(item.chunk_id, e.target.checked);
};

const handleContentClick = () => {
const handleContentDoubleClick = () => {
editChunk(item.chunk_id);
};

const handleContentClick = () => {
clickChunkCard(item.chunk_id);
};

return (
<div>
<Card>
<Card className={selected ? styles.cardSelected : ''}>
<Flex gap={'middle'} justify={'space-between'}>
<Checkbox onChange={handleCheck} checked={checked}></Checkbox>
{item.img_id && (
@@ -52,7 +60,8 @@ const ChunkCard = ({
)}

<section
onDoubleClick={handleContentClick}
onDoubleClick={handleContentDoubleClick}
onClick={handleContentClick}
className={styles.content}
dangerouslySetInnerHTML={{ __html: item.content_with_weight }}
>

+ 33
- 0
web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/hightlights.ts 查看文件

@@ -0,0 +1,33 @@
export const testHighlights = [
{
content: {
text: '实验证明,由氧氯化锆锂和高镍三元正极组成的全固态锂电池展示了极为优异的性能:在12 分钟快速充电的条件下,该电池仍然成功地在室温稳定循环2000 圈以上。',
},
position: {
boundingRect: {
x1: 219.7,
y1: 204.3,
x2: 547.0,
y2: 264.0,
width: 849,
height: 1200,
},
rects: [
{
x1: 219.7,
y1: 204.3,
x2: 547.0,
y2: 264.0,
width: 849,
height: 1200,
},
],
pageNumber: 9,
},
comment: {
text: 'Flow or TypeScript?',
emoji: '🔥',
},
id: '8245652131754351',
},
];

+ 36
- 1
web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/hooks.ts 查看文件

@@ -1,5 +1,8 @@
import { useGetKnowledgeSearchParams } from '@/hooks/knowledgeHook';
import { api_host } from '@/utils/api';
import { useSize } from 'ahooks';
import { useCallback, useEffect, useState } from 'react';
import { CustomTextRenderer } from 'node_modules/react-pdf/dist/esm/shared/types';
import { useCallback, useEffect, useMemo, useState } from 'react';

export const useDocumentResizeObserver = () => {
const [containerWidth, setContainerWidth] = useState<number>();
@@ -18,3 +21,35 @@ export const useDocumentResizeObserver = () => {

return { containerWidth, setContainerRef };
};

function highlightPattern(text: string, pattern: string, pageNumber: number) {
if (pageNumber === 2) {
return `<mark>${text}</mark>`;
}
if (text.trim() !== '' && pattern.match(text)) {
// return pattern.replace(text, (value) => `<mark>${value}</mark>`);
return `<mark>${text}</mark>`;
}
return text.replace(pattern, (value) => `<mark>${value}</mark>`);
}

export const useHighlightText = (searchText: string = '') => {
const textRenderer: CustomTextRenderer = useCallback(
(textItem) => {
return highlightPattern(textItem.str, searchText, textItem.pageNumber);
},
[searchText],
);

return textRenderer;
};

export const useGetDocumentUrl = () => {
const { documentId } = useGetKnowledgeSearchParams();

const url = useMemo(() => {
return `${api_host}/document/get/${documentId}`;
}, [documentId]);

return url;
};

+ 7
- 2
web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/index.less 查看文件

@@ -1,6 +1,11 @@
.documentContainer {
width: 100%;
height: calc(100vh - 284px);
overflow-y: auto;
overflow-x: hidden;
// overflow-y: auto;
// overflow-x: hidden;
position: relative;
:global(.PdfHighlighter) {
overflow-x: hidden;
// left: 0;
}
}

+ 12
- 28
web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/index.tsx 查看文件

@@ -5,69 +5,53 @@ import { Document, Page, pdfjs } from 'react-pdf';

import 'react-pdf/dist/esm/Page/AnnotationLayer.css';
import 'react-pdf/dist/esm/Page/TextLayer.css';
import { useDocumentResizeObserver } from './hooks';
import { useDocumentResizeObserver, useHighlightText } from './hooks';

import { Spin } from 'antd';
import { useGetSelectedChunk } from '../../hooks';
import styles from './index.less';

// type PDFFile = string | File | null;

pdfjs.GlobalWorkerOptions.workerSrc = new URL(
'pdfjs-dist/build/pdf.worker.min.js',
import.meta.url,
).toString();

// const options = {
// cMapUrl: '/cmaps/',
// standardFontDataUrl: '/standard_fonts/',
// };
interface IProps {
selectedChunkId: string;
}

const DocumentPreview = () => {
const DocumentPreview = ({ selectedChunkId }: IProps) => {
const [numPages, setNumPages] = useState<number>();
const { documentId } = useGetKnowledgeSearchParams();
// const [file, setFile] = useState<PDFFile>(null);
const { containerWidth, setContainerRef } = useDocumentResizeObserver();
const selectedChunk = useGetSelectedChunk(selectedChunkId);
console.info(selectedChunk?.content_with_weight);
const textRenderer = useHighlightText(selectedChunk?.content_with_weight);

function onDocumentLoadSuccess({ numPages }: { numPages: number }): void {
setNumPages(numPages);
}

// const handleChange = (e: any) => {
// console.info(e.files);
// setFile(e.target.files[0] || null);
// };

const url = useMemo(() => {
return `${api_host}/document/get/${documentId}`;
}, [documentId]);

// const fetch_document_file = useCallback(async () => {
// const ret: Blob = await getDocumentFile(documentId);
// console.info(ret);
// const f = new File([ret], 'xx.pdf', { type: ret.type });
// setFile(f);
// }, [documentId]);

// useEffect(() => {
// // dispatch({ type: 'kFModel/fetch_document_file', payload: documentId });
// fetch_document_file();
// }, [fetch_document_file]);

return (
<div ref={setContainerRef} className={styles.documentContainer}>
<Document
file={url}
onLoadSuccess={onDocumentLoadSuccess}
// options={options}
loading={<Spin></Spin>}
>
{Array.from(new Array(numPages), (el, index) => (
<Page
key={`page_${index + 1}`}
pageNumber={index + 1}
width={containerWidth}
customTextRenderer={textRenderer}
/>
))}
</Document>
{/* <input type="file" onChange={handleChange} /> */}
</div>
);
};

+ 189
- 0
web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx 查看文件

@@ -0,0 +1,189 @@
import { Spin } from 'antd';
import { useRef, useState } from 'react';
import type { NewHighlight } from 'react-pdf-highlighter';
import {
AreaHighlight,
Highlight,
PdfHighlighter,
PdfLoader,
Popup,
Tip,
} from 'react-pdf-highlighter';
import { useGetSelectedChunk } from '../../hooks';
import { testHighlights } from './hightlights';
import { useGetDocumentUrl } from './hooks';

import styles from './index.less';

interface IProps {
selectedChunkId: string;
}

const getNextId = () => String(Math.random()).slice(2);

const HighlightPopup = ({
comment,
}: {
comment: { text: string; emoji: string };
}) =>
comment.text ? (
<div className="Highlight__popup">
{comment.emoji} {comment.text}
</div>
) : null;

const Preview = ({ selectedChunkId }: IProps) => {
const url = useGetDocumentUrl();
const selectedChunk = useGetSelectedChunk(selectedChunkId);

const [state, setState] = useState<any>(testHighlights);
const ref = useRef((highlight: any) => {});

const parseIdFromHash = () =>
document.location.hash.slice('#highlight-'.length);

const resetHash = () => {
document.location.hash = '';
};

const getHighlightById = (id: string) => {
const highlights = state;

return highlights.find((highlight: any) => highlight.id === id);
};

// let scrollViewerTo = (highlight: any) => {};

let scrollToHighlightFromHash = () => {
const highlight = getHighlightById(parseIdFromHash());

if (highlight) {
ref.current(highlight);
}
};

const addHighlight = (highlight: NewHighlight) => {
const highlights = state;

console.log('Saving highlight', highlight);

setState([{ ...highlight, id: getNextId() }, ...highlights]);
};

const updateHighlight = (
highlightId: string,
position: Object,
content: Object,
) => {
console.log('Updating highlight', highlightId, position, content);

setState(
state.map((h: any) => {
const {
id,
position: originalPosition,
content: originalContent,
...rest
} = h;
return id === highlightId
? {
id,
position: { ...originalPosition, ...position },
content: { ...originalContent, ...content },
...rest,
}
: h;
}),
);
};

// useEffect(() => {
// ref.current(testHighlights[0]);
// }, [selectedChunk]);

return (
<div className={styles.documentContainer}>
<PdfLoader url={url} beforeLoad={<Spin />}>
{(pdfDocument) => (
<PdfHighlighter
pdfDocument={pdfDocument}
enableAreaSelection={(event) => event.altKey}
onScrollChange={resetHash}
// pdfScaleValue="page-width"

scrollRef={(scrollTo) => {
// scrollViewerTo = scrollTo;
ref.current = scrollTo;

scrollToHighlightFromHash();
}}
onSelectionFinished={(
position,
content,
hideTipAndSelection,
transformSelection,
) => (
<Tip
onOpen={transformSelection}
onConfirm={(comment) => {
addHighlight({ content, position, comment });

hideTipAndSelection();
}}
/>
)}
highlightTransform={(
highlight,
index,
setTip,
hideTip,
viewportToScaled,
screenshot,
isScrolledTo,
) => {
const isTextHighlight = !Boolean(
highlight.content && highlight.content.image,
);

const component = isTextHighlight ? (
<Highlight
isScrolledTo={isScrolledTo}
position={highlight.position}
comment={highlight.comment}
/>
) : (
<AreaHighlight
isScrolledTo={isScrolledTo}
highlight={highlight}
onChange={(boundingRect) => {
updateHighlight(
highlight.id,
{ boundingRect: viewportToScaled(boundingRect) },
{ image: screenshot(boundingRect) },
);
}}
/>
);

return (
<Popup
popupContent={<HighlightPopup {...highlight} />}
onMouseOver={(popupContent) =>
setTip(highlight, (highlight: any) => popupContent)
}
onMouseOut={hideTip}
key={index}
>
{component}
</Popup>
);
}}
highlights={state}
/>
)}
</PdfLoader>
</div>
);
};

export default Preview;

+ 24
- 1
web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts 查看文件

@@ -1,4 +1,5 @@
import { IKnowledgeFile } from '@/interfaces/database/knowledge';
import { IChunk, IKnowledgeFile } from '@/interfaces/database/knowledge';
import { useCallback, useState } from 'react';
import { useSelector } from 'umi';

export const useSelectDocumentInfo = () => {
@@ -7,3 +8,25 @@ export const useSelectDocumentInfo = () => {
);
return documentInfo;
};

export const useSelectChunkList = () => {
const chunkList: IChunk[] = useSelector(
(state: any) => state.chunkModel.data,
);
return chunkList;
};

export const useHandleChunkCardClick = () => {
const [selectedChunkId, setSelectedChunkId] = useState<string>('');

const handleChunkCardClick = useCallback((chunkId: string) => {
setSelectedChunkId(chunkId);
}, []);

return { handleChunkCardClick, selectedChunkId };
};

export const useGetSelectedChunk = (selectedChunkId: string) => {
const chunkList: IChunk[] = useSelectChunkList();
return chunkList.find((x) => x.chunk_id === selectedChunkId);
};

+ 9
- 3
web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx 查看文件

@@ -8,8 +8,9 @@ import CreatingModal from './components/chunk-creating-modal';
import { useDeleteChunkByIds } from '@/hooks/knowledgeHook';
import ChunkCard from './components/chunk-card';
import ChunkToolBar from './components/chunk-toolbar';
import DocumentPreview from './components/document-preview';
import { useSelectDocumentInfo } from './hooks';
// import DocumentPreview from './components/document-preview';
import DocumentPreview from './components/document-preview/preview';
import { useHandleChunkCardClick, useSelectDocumentInfo } from './hooks';
import styles from './index.less';
import { ChunkModelState } from './model';

@@ -36,6 +37,7 @@ const Chunk = () => {
const [chunkId, setChunkId] = useState<string | undefined>();
const { removeChunk } = useDeleteChunkByIds();
const documentInfo = useSelectDocumentInfo();
const { handleChunkCardClick, selectedChunkId } = useHandleChunkCardClick();

const getChunkList = useCallback(() => {
const payload: PayloadType = {
@@ -180,6 +182,8 @@ const Chunk = () => {
)}
handleCheckboxClick={handleSingleCheckboxClick}
switchChunk={switchChunk}
clickChunkCard={handleChunkCardClick}
selected={item.chunk_id === selectedChunkId}
></ChunkCard>
))}
</Space>
@@ -202,7 +206,9 @@ const Chunk = () => {

{documentInfo.type === 'pdf' && (
<section className={styles.documentPreview}>
<DocumentPreview></DocumentPreview>
<DocumentPreview
selectedChunkId={selectedChunkId}
></DocumentPreview>
</section>
)}
</Flex>

+ 2
- 2
web/src/pages/add-knowledge/components/knowledge-chunk/model.ts 查看文件

@@ -1,5 +1,5 @@
import { BaseState } from '@/interfaces/common';
import { IKnowledgeFile } from '@/interfaces/database/knowledge';
import { IChunk, IKnowledgeFile } from '@/interfaces/database/knowledge';
import kbService from '@/services/kbService';
import { message } from 'antd';
import { pick } from 'lodash';
@@ -7,7 +7,7 @@ import { pick } from 'lodash';
import { DvaModel } from 'umi';

export interface ChunkModelState extends BaseState {
data: any[];
data: IChunk[];
total: number;
isShowCreateModal: boolean;
chunk_id: string;

正在加载...
取消
保存