### What problem does this PR solve? Added support for preview of txt, md, excel, csv, ppt, image, doc and other files [#3221](https://github.com/infiniflow/ragflow/issues/3221) ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.20.0
| @@ -81,6 +81,7 @@ | |||
| "mammoth": "^1.7.2", | |||
| "next-themes": "^0.4.6", | |||
| "openai-speech-stream-player": "^1.0.8", | |||
| "pptx-preview": "^1.0.5", | |||
| "rc-tween-one": "^3.0.6", | |||
| "react-copy-to-clipboard": "^5.1.0", | |||
| "react-dropzone": "^14.3.5", | |||
| @@ -0,0 +1,114 @@ | |||
| import message from '@/components/ui/message'; | |||
| import { Spin } from '@/components/ui/spin'; | |||
| import request from '@/utils/request'; | |||
| import classNames from 'classnames'; | |||
| import React, { useEffect, useRef, useState } from 'react'; | |||
| import { useGetDocumentUrl } from './hooks'; | |||
| interface CSVData { | |||
| rows: string[][]; | |||
| headers: string[]; | |||
| } | |||
| interface FileViewerProps { | |||
| className?: string; | |||
| } | |||
| const CSVFileViewer: React.FC<FileViewerProps> = () => { | |||
| const [data, setData] = useState<CSVData | null>(null); | |||
| const [isLoading, setIsLoading] = useState<boolean>(true); | |||
| const containerRef = useRef<HTMLDivElement>(null); | |||
| const url = useGetDocumentUrl(); | |||
| const parseCSV = (csvText: string): CSVData => { | |||
| console.log('Parsing CSV data:', csvText); | |||
| const lines = csvText.split('\n'); | |||
| const headers = lines[0].split(',').map((header) => header.trim()); | |||
| const rows = lines | |||
| .slice(1) | |||
| .map((line) => line.split(',').map((cell) => cell.trim())); | |||
| return { headers, rows }; | |||
| }; | |||
| useEffect(() => { | |||
| const loadCSV = async () => { | |||
| try { | |||
| const res = await request(url, { | |||
| method: 'GET', | |||
| responseType: 'blob', | |||
| onError: (err) => { | |||
| message.error('file load failed'); | |||
| setIsLoading(false); | |||
| }, | |||
| }); | |||
| // parse CSV file | |||
| const reader = new FileReader(); | |||
| reader.readAsText(res.data); | |||
| reader.onload = () => { | |||
| const parsedData = parseCSV(reader.result as string); | |||
| console.log('file loaded successfully', reader.result); | |||
| setData(parsedData); | |||
| }; | |||
| } catch (error) { | |||
| message.error('CSV file parse failed'); | |||
| console.error('Error loading CSV file:', error); | |||
| } finally { | |||
| setIsLoading(false); | |||
| } | |||
| }; | |||
| loadCSV(); | |||
| return () => { | |||
| setData(null); | |||
| }; | |||
| }, [url]); | |||
| return ( | |||
| <div | |||
| ref={containerRef} | |||
| className={classNames( | |||
| 'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md', | |||
| 'overflow-auto max-h-[80vh] p-2', | |||
| )} | |||
| > | |||
| {isLoading ? ( | |||
| <div className="absolute inset-0 flex items-center justify-center"> | |||
| <Spin /> | |||
| </div> | |||
| ) : data ? ( | |||
| <table className="min-w-full divide-y divide-border-normal"> | |||
| <thead className="bg-background-header-bar"> | |||
| <tr> | |||
| {data.headers.map((header, index) => ( | |||
| <th | |||
| key={`header-${index}`} | |||
| className="px-6 py-3 text-left text-sm font-medium text-text-title" | |||
| > | |||
| {header} | |||
| </th> | |||
| ))} | |||
| </tr> | |||
| </thead> | |||
| <tbody className="bg-background-paper divide-y divide-border-normal"> | |||
| {data.rows.map((row, rowIndex) => ( | |||
| <tr key={`row-${rowIndex}`}> | |||
| {row.map((cell, cellIndex) => ( | |||
| <td | |||
| key={`cell-${rowIndex}-${cellIndex}`} | |||
| className="px-6 py-4 whitespace-nowrap text-sm text-text-secondary" | |||
| > | |||
| {cell || '-'} | |||
| </td> | |||
| ))} | |||
| </tr> | |||
| ))} | |||
| </tbody> | |||
| </table> | |||
| ) : null} | |||
| </div> | |||
| ); | |||
| }; | |||
| export default CSVFileViewer; | |||
| @@ -0,0 +1,67 @@ | |||
| import message from '@/components/ui/message'; | |||
| import { Spin } from '@/components/ui/spin'; | |||
| import request from '@/utils/request'; | |||
| import classNames from 'classnames'; | |||
| import mammoth from 'mammoth'; | |||
| import { useEffect, useState } from 'react'; | |||
| import { useGetDocumentUrl } from './hooks'; | |||
| interface DocPreviewerProps { | |||
| className?: string; | |||
| } | |||
| export const DocPreviewer: React.FC<DocPreviewerProps> = ({ className }) => { | |||
| const url = useGetDocumentUrl(); | |||
| const [htmlContent, setHtmlContent] = useState<string>(''); | |||
| const [loading, setLoading] = useState(false); | |||
| const fetchDocument = async () => { | |||
| setLoading(true); | |||
| const res = await request(url, { | |||
| method: 'GET', | |||
| responseType: 'blob', | |||
| onError: () => { | |||
| message.error('Document parsing failed'); | |||
| console.error('Error loading document:', url); | |||
| }, | |||
| }); | |||
| try { | |||
| const arrayBuffer = await res.data.arrayBuffer(); | |||
| const result = await mammoth.convertToHtml( | |||
| { arrayBuffer }, | |||
| { includeDefaultStyleMap: true }, | |||
| ); | |||
| const styledContent = result.value | |||
| .replace(/<p>/g, '<p class="mb-2">') | |||
| .replace(/<h(\d)>/g, '<h$1 class="font-semibold mt-4 mb-2">'); | |||
| setHtmlContent(styledContent); | |||
| } catch (err) { | |||
| message.error('Document parsing failed'); | |||
| console.error('Error parsing document:', err); | |||
| } | |||
| setLoading(false); | |||
| }; | |||
| useEffect(() => { | |||
| if (url) { | |||
| fetchDocument(); | |||
| } | |||
| }, [url]); | |||
| return ( | |||
| <div | |||
| className={classNames( | |||
| 'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md', | |||
| className, | |||
| )} | |||
| > | |||
| {loading && ( | |||
| <div className="absolute inset-0 flex items-center justify-center"> | |||
| <Spin /> | |||
| </div> | |||
| )} | |||
| {!loading && <div dangerouslySetInnerHTML={{ __html: htmlContent }} />} | |||
| </div> | |||
| ); | |||
| }; | |||
| @@ -0,0 +1,24 @@ | |||
| import { useFetchExcel } from '@/pages/document-viewer/hooks'; | |||
| import classNames from 'classnames'; | |||
| import { useGetDocumentUrl } from './hooks'; | |||
| interface ExcelCsvPreviewerProps { | |||
| className?: string; | |||
| } | |||
| export const ExcelCsvPreviewer: React.FC<ExcelCsvPreviewerProps> = ({ | |||
| className, | |||
| }) => { | |||
| const url = useGetDocumentUrl(); | |||
| const { containerRef } = useFetchExcel(url); | |||
| return ( | |||
| <div | |||
| ref={containerRef} | |||
| className={classNames( | |||
| 'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md excel-csv-previewer', | |||
| className, | |||
| )} | |||
| ></div> | |||
| ); | |||
| }; | |||
| @@ -0,0 +1,72 @@ | |||
| import message from '@/components/ui/message'; | |||
| import { Spin } from '@/components/ui/spin'; | |||
| import request from '@/utils/request'; | |||
| import classNames from 'classnames'; | |||
| import { useEffect, useState } from 'react'; | |||
| import { useGetDocumentUrl } from './hooks'; | |||
| interface ImagePreviewerProps { | |||
| className?: string; | |||
| } | |||
| export const ImagePreviewer: React.FC<ImagePreviewerProps> = ({ | |||
| className, | |||
| }) => { | |||
| const url = useGetDocumentUrl(); | |||
| const [imageSrc, setImageSrc] = useState<string | null>(null); | |||
| const [isLoading, setIsLoading] = useState<boolean>(true); | |||
| const fetchImage = async () => { | |||
| setIsLoading(true); | |||
| const res = await request(url, { | |||
| method: 'GET', | |||
| responseType: 'blob', | |||
| onError: () => { | |||
| message.error('Failed to load image'); | |||
| setIsLoading(false); | |||
| }, | |||
| }); | |||
| const objectUrl = URL.createObjectURL(res.data); | |||
| setImageSrc(objectUrl); | |||
| setIsLoading(false); | |||
| }; | |||
| useEffect(() => { | |||
| if (url) { | |||
| fetchImage(); | |||
| } | |||
| }, [url]); | |||
| useEffect(() => { | |||
| return () => { | |||
| if (imageSrc) { | |||
| URL.revokeObjectURL(imageSrc); | |||
| } | |||
| }; | |||
| }, [imageSrc]); | |||
| return ( | |||
| <div | |||
| className={classNames( | |||
| 'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md image-previewer', | |||
| className, | |||
| )} | |||
| > | |||
| {isLoading && ( | |||
| <div className="absolute inset-0 flex items-center justify-center"> | |||
| <Spin /> | |||
| </div> | |||
| )} | |||
| {!isLoading && imageSrc && ( | |||
| <div className="max-h-[80vh] overflow-auto p-2"> | |||
| <img | |||
| src={imageSrc} | |||
| alt={'image'} | |||
| className="w-full h-auto max-w-full object-contain" | |||
| onLoad={() => URL.revokeObjectURL(imageSrc!)} | |||
| /> | |||
| </div> | |||
| )} | |||
| </div> | |||
| ); | |||
| }; | |||
| @@ -0,0 +1,65 @@ | |||
| import { memo } from 'react'; | |||
| import CSVFileViewer from './csv-preview'; | |||
| import { DocPreviewer } from './doc-preview'; | |||
| import { ExcelCsvPreviewer } from './excel-preview'; | |||
| import { ImagePreviewer } from './image-preview'; | |||
| import styles from './index.less'; | |||
| import PdfPreviewer, { IProps } from './pdf-preview'; | |||
| import { PptPreviewer } from './ppt-preview'; | |||
| import { TxtPreviewer } from './txt-preview'; | |||
| type PreviewProps = { | |||
| fileType: string; | |||
| className?: string; | |||
| }; | |||
| const Preview = ({ | |||
| fileType, | |||
| className, | |||
| highlights, | |||
| setWidthAndHeight, | |||
| }: PreviewProps & Partial<IProps>) => { | |||
| return ( | |||
| <> | |||
| {fileType === 'pdf' && highlights && setWidthAndHeight && ( | |||
| <section className={styles.documentPreview}> | |||
| <PdfPreviewer | |||
| highlights={highlights} | |||
| setWidthAndHeight={setWidthAndHeight} | |||
| ></PdfPreviewer> | |||
| </section> | |||
| )} | |||
| {['doc', 'docx'].indexOf(fileType) > -1 && ( | |||
| <section> | |||
| <DocPreviewer className={className} /> | |||
| </section> | |||
| )} | |||
| {['txt', 'md'].indexOf(fileType) > -1 && ( | |||
| <section> | |||
| <TxtPreviewer className={className} /> | |||
| </section> | |||
| )} | |||
| {['visual'].indexOf(fileType) > -1 && ( | |||
| <section> | |||
| <ImagePreviewer className={className} /> | |||
| </section> | |||
| )} | |||
| {['pptx'].indexOf(fileType) > -1 && ( | |||
| <section> | |||
| <PptPreviewer className={className} /> | |||
| </section> | |||
| )} | |||
| {['xlsx'].indexOf(fileType) > -1 && ( | |||
| <section> | |||
| <ExcelCsvPreviewer className={className} /> | |||
| </section> | |||
| )} | |||
| {['csv'].indexOf(fileType) > -1 && ( | |||
| <section> | |||
| <CSVFileViewer className={className} /> | |||
| </section> | |||
| )} | |||
| </> | |||
| ); | |||
| }; | |||
| export default memo(Preview); | |||
| @@ -14,7 +14,7 @@ import { useCatchDocumentError } from '@/components/pdf-previewer/hooks'; | |||
| import FileError from '@/pages/document-viewer/file-error'; | |||
| import styles from './index.less'; | |||
| interface IProps { | |||
| export interface IProps { | |||
| highlights: IHighlight[]; | |||
| setWidthAndHeight: (width: number, height: number) => void; | |||
| } | |||
| @@ -30,7 +30,7 @@ const HighlightPopup = ({ | |||
| ) : null; | |||
| // TODO: merge with DocumentPreviewer | |||
| const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => { | |||
| const PdfPreview = ({ highlights: state, setWidthAndHeight }: IProps) => { | |||
| const url = useGetDocumentUrl(); | |||
| const ref = useRef<(highlight: IHighlight) => void>(() => {}); | |||
| @@ -120,4 +120,4 @@ const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => { | |||
| ); | |||
| }; | |||
| export default memo(Preview); | |||
| export default memo(PdfPreview); | |||
| @@ -0,0 +1,67 @@ | |||
| import message from '@/components/ui/message'; | |||
| import request from '@/utils/request'; | |||
| import classNames from 'classnames'; | |||
| import { init } from 'pptx-preview'; | |||
| import { useEffect, useRef } from 'react'; | |||
| import { useGetDocumentUrl } from './hooks'; | |||
| interface PptPreviewerProps { | |||
| className?: string; | |||
| } | |||
| export const PptPreviewer: React.FC<PptPreviewerProps> = ({ className }) => { | |||
| const url = useGetDocumentUrl(); | |||
| const wrapper = useRef<HTMLDivElement>(null); | |||
| const containerRef = useRef<HTMLDivElement>(null); | |||
| const fetchDocument = async () => { | |||
| const res = await request(url, { | |||
| method: 'GET', | |||
| responseType: 'blob', | |||
| onError: () => { | |||
| message.error('Document parsing failed'); | |||
| console.error('Error loading document:', url); | |||
| }, | |||
| }); | |||
| console.log(res); | |||
| try { | |||
| const arrayBuffer = await res.data.arrayBuffer(); | |||
| if (containerRef.current) { | |||
| let width = 500; | |||
| let height = 900; | |||
| if (containerRef.current) { | |||
| width = containerRef.current.clientWidth - 50; | |||
| height = containerRef.current.clientHeight - 50; | |||
| } | |||
| let pptxPrviewer = init(containerRef.current, { | |||
| width: width, | |||
| height: height, | |||
| }); | |||
| pptxPrviewer.preview(arrayBuffer); | |||
| } | |||
| } catch (err) { | |||
| message.error('ppt parse failed'); | |||
| } | |||
| }; | |||
| useEffect(() => { | |||
| if (url) { | |||
| fetchDocument(); | |||
| } | |||
| }, [url]); | |||
| return ( | |||
| <div | |||
| ref={containerRef} | |||
| className={classNames( | |||
| 'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md ppt-previewer', | |||
| className, | |||
| )} | |||
| > | |||
| <div className="overflow-auto p-2"> | |||
| <div className="flex flex-col gap-4"> | |||
| <div ref={wrapper} /> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| ); | |||
| }; | |||
| @@ -0,0 +1,57 @@ | |||
| import message from '@/components/ui/message'; | |||
| import request from '@/utils/request'; | |||
| import { Spin } from 'antd'; | |||
| import classNames from 'classnames'; | |||
| import { useEffect, useState } from 'react'; | |||
| import { useGetDocumentUrl } from './hooks'; | |||
| type TxtPreviewerProps = { className?: string }; | |||
| export const TxtPreviewer = ({ className }: TxtPreviewerProps) => { | |||
| const url = useGetDocumentUrl(); | |||
| const [loading, setLoading] = useState(false); | |||
| const [data, setData] = useState<string>(''); | |||
| const fetchTxt = async () => { | |||
| setLoading(true); | |||
| const res = await request(url, { | |||
| method: 'GET', | |||
| responseType: 'blob', | |||
| onError: (err: any) => { | |||
| message.error('Failed to load file'); | |||
| console.error('Error loading file:', err); | |||
| }, | |||
| }); | |||
| // blob to string | |||
| const reader = new FileReader(); | |||
| reader.readAsText(res.data); | |||
| reader.onload = () => { | |||
| setData(reader.result as string); | |||
| setLoading(false); | |||
| console.log('file loaded successfully', reader.result); | |||
| }; | |||
| console.log('file data:', res); | |||
| }; | |||
| useEffect(() => { | |||
| if (url) { | |||
| fetchTxt(); | |||
| } else { | |||
| setLoading(false); | |||
| setData(''); | |||
| } | |||
| }, [url]); | |||
| return ( | |||
| <div | |||
| className={classNames( | |||
| 'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md', | |||
| className, | |||
| )} | |||
| > | |||
| {loading && ( | |||
| <div className="absolute inset-0 flex items-center justify-center"> | |||
| <Spin /> | |||
| </div> | |||
| )} | |||
| {!loading && <pre className="whitespace-pre-wrap p-2 ">{data}</pre>} | |||
| </div> | |||
| ); | |||
| }; | |||
| @@ -35,7 +35,8 @@ | |||
| .documentPreview { | |||
| // width: 40%; | |||
| height: 100%; | |||
| height: calc(100vh - 130px); | |||
| overflow: auto; | |||
| } | |||
| .chunkContainer { | |||
| @@ -3,11 +3,11 @@ import { | |||
| useSwitchChunk, | |||
| } from '@/hooks/use-chunk-request'; | |||
| import classNames from 'classnames'; | |||
| import { useCallback, useEffect, useState } from 'react'; | |||
| import { useCallback, useEffect, useMemo, useState } from 'react'; | |||
| import { useTranslation } from 'react-i18next'; | |||
| import ChunkCard from './components/chunk-card'; | |||
| import CreatingModal from './components/chunk-creating-modal'; | |||
| import DocumentPreview from './components/document-preview/preview'; | |||
| import DocumentPreview from './components/document-preview'; | |||
| import { | |||
| useChangeChunkTextMode, | |||
| useDeleteChunkByIds, | |||
| @@ -143,6 +143,20 @@ const Chunk = () => { | |||
| const { highlights, setWidthAndHeight } = | |||
| useGetChunkHighlights(selectedChunkId); | |||
| const fileType = useMemo(() => { | |||
| switch (documentInfo?.type) { | |||
| case 'doc': | |||
| return documentInfo?.name.split('.').pop() || 'doc'; | |||
| case 'visual': | |||
| case 'docx': | |||
| case 'txt': | |||
| case 'md': | |||
| case 'pdf': | |||
| return documentInfo?.type; | |||
| } | |||
| return 'unknown'; | |||
| }, [documentInfo]); | |||
| return ( | |||
| <> | |||
| <div className={styles.chunkPage}> | |||
| @@ -151,14 +165,14 @@ const Chunk = () => { | |||
| <div className="h-[100px] flex flex-col justify-end pb-[5px]"> | |||
| <DocumentHeader {...documentInfo} /> | |||
| </div> | |||
| {isPdf && ( | |||
| <section className={styles.documentPreview}> | |||
| <DocumentPreview | |||
| highlights={highlights} | |||
| setWidthAndHeight={setWidthAndHeight} | |||
| ></DocumentPreview> | |||
| </section> | |||
| )} | |||
| <section className={styles.documentPreview}> | |||
| <DocumentPreview | |||
| className={styles.documentPreview} | |||
| fileType={fileType} | |||
| highlights={highlights} | |||
| setWidthAndHeight={setWidthAndHeight} | |||
| ></DocumentPreview> | |||
| </section> | |||
| </div> | |||
| <div | |||
| className={classNames( | |||