Browse Source

Fix:Added support for preview of txt, md, excel, csv, ppt, image, doc and other files (#8712)

### What problem does this PR solve?

Added support for preview of txt, md, excel, csv, ppt, image, doc and
other files [#3221](https://github.com/infiniflow/ragflow/issues/3221)

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.20.0
chanx 3 months ago
parent
commit
01f81b24f6
No account linked to committer's email address

+ 1
- 0
web/package.json View File

@@ -81,6 +81,7 @@
"mammoth": "^1.7.2",
"next-themes": "^0.4.6",
"openai-speech-stream-player": "^1.0.8",
"pptx-preview": "^1.0.5",
"rc-tween-one": "^3.0.6",
"react-copy-to-clipboard": "^5.1.0",
"react-dropzone": "^14.3.5",

+ 114
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/csv-preview.tsx View File

@@ -0,0 +1,114 @@
import message from '@/components/ui/message';
import { Spin } from '@/components/ui/spin';
import request from '@/utils/request';
import classNames from 'classnames';
import React, { useEffect, useRef, useState } from 'react';
import { useGetDocumentUrl } from './hooks';

interface CSVData {
rows: string[][];
headers: string[];
}

interface FileViewerProps {
className?: string;
}

const CSVFileViewer: React.FC<FileViewerProps> = () => {
const [data, setData] = useState<CSVData | null>(null);
const [isLoading, setIsLoading] = useState<boolean>(true);
const containerRef = useRef<HTMLDivElement>(null);
const url = useGetDocumentUrl();
const parseCSV = (csvText: string): CSVData => {
console.log('Parsing CSV data:', csvText);
const lines = csvText.split('\n');
const headers = lines[0].split(',').map((header) => header.trim());
const rows = lines
.slice(1)
.map((line) => line.split(',').map((cell) => cell.trim()));

return { headers, rows };
};

useEffect(() => {
const loadCSV = async () => {
try {
const res = await request(url, {
method: 'GET',
responseType: 'blob',
onError: (err) => {
message.error('file load failed');
setIsLoading(false);
},
});

// parse CSV file
const reader = new FileReader();
reader.readAsText(res.data);
reader.onload = () => {
const parsedData = parseCSV(reader.result as string);
console.log('file loaded successfully', reader.result);
setData(parsedData);
};
} catch (error) {
message.error('CSV file parse failed');
console.error('Error loading CSV file:', error);
} finally {
setIsLoading(false);
}
};

loadCSV();

return () => {
setData(null);
};
}, [url]);

return (
<div
ref={containerRef}
className={classNames(
'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md',
'overflow-auto max-h-[80vh] p-2',
)}
>
{isLoading ? (
<div className="absolute inset-0 flex items-center justify-center">
<Spin />
</div>
) : data ? (
<table className="min-w-full divide-y divide-border-normal">
<thead className="bg-background-header-bar">
<tr>
{data.headers.map((header, index) => (
<th
key={`header-${index}`}
className="px-6 py-3 text-left text-sm font-medium text-text-title"
>
{header}
</th>
))}
</tr>
</thead>
<tbody className="bg-background-paper divide-y divide-border-normal">
{data.rows.map((row, rowIndex) => (
<tr key={`row-${rowIndex}`}>
{row.map((cell, cellIndex) => (
<td
key={`cell-${rowIndex}-${cellIndex}`}
className="px-6 py-4 whitespace-nowrap text-sm text-text-secondary"
>
{cell || '-'}
</td>
))}
</tr>
))}
</tbody>
</table>
) : null}
</div>
);
};

export default CSVFileViewer;

+ 67
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/doc-preview.tsx View File

@@ -0,0 +1,67 @@
import message from '@/components/ui/message';
import { Spin } from '@/components/ui/spin';
import request from '@/utils/request';
import classNames from 'classnames';
import mammoth from 'mammoth';
import { useEffect, useState } from 'react';
import { useGetDocumentUrl } from './hooks';

interface DocPreviewerProps {
className?: string;
}

export const DocPreviewer: React.FC<DocPreviewerProps> = ({ className }) => {
const url = useGetDocumentUrl();
const [htmlContent, setHtmlContent] = useState<string>('');
const [loading, setLoading] = useState(false);
const fetchDocument = async () => {
setLoading(true);
const res = await request(url, {
method: 'GET',
responseType: 'blob',
onError: () => {
message.error('Document parsing failed');
console.error('Error loading document:', url);
},
});
try {
const arrayBuffer = await res.data.arrayBuffer();
const result = await mammoth.convertToHtml(
{ arrayBuffer },
{ includeDefaultStyleMap: true },
);

const styledContent = result.value
.replace(/<p>/g, '<p class="mb-2">')
.replace(/<h(\d)>/g, '<h$1 class="font-semibold mt-4 mb-2">');

setHtmlContent(styledContent);
} catch (err) {
message.error('Document parsing failed');
console.error('Error parsing document:', err);
}
setLoading(false);
};

useEffect(() => {
if (url) {
fetchDocument();
}
}, [url]);
return (
<div
className={classNames(
'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md',
className,
)}
>
{loading && (
<div className="absolute inset-0 flex items-center justify-center">
<Spin />
</div>
)}

{!loading && <div dangerouslySetInnerHTML={{ __html: htmlContent }} />}
</div>
);
};

+ 24
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/excel-preview.tsx View File

@@ -0,0 +1,24 @@
import { useFetchExcel } from '@/pages/document-viewer/hooks';
import classNames from 'classnames';
import { useGetDocumentUrl } from './hooks';

interface ExcelCsvPreviewerProps {
className?: string;
}

export const ExcelCsvPreviewer: React.FC<ExcelCsvPreviewerProps> = ({
className,
}) => {
const url = useGetDocumentUrl();
const { containerRef } = useFetchExcel(url);

return (
<div
ref={containerRef}
className={classNames(
'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md excel-csv-previewer',
className,
)}
></div>
);
};

+ 72
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/image-preview.tsx View File

@@ -0,0 +1,72 @@
import message from '@/components/ui/message';
import { Spin } from '@/components/ui/spin';
import request from '@/utils/request';
import classNames from 'classnames';
import { useEffect, useState } from 'react';
import { useGetDocumentUrl } from './hooks';

interface ImagePreviewerProps {
className?: string;
}

export const ImagePreviewer: React.FC<ImagePreviewerProps> = ({
className,
}) => {
const url = useGetDocumentUrl();
const [imageSrc, setImageSrc] = useState<string | null>(null);
const [isLoading, setIsLoading] = useState<boolean>(true);

const fetchImage = async () => {
setIsLoading(true);
const res = await request(url, {
method: 'GET',
responseType: 'blob',
onError: () => {
message.error('Failed to load image');
setIsLoading(false);
},
});
const objectUrl = URL.createObjectURL(res.data);
setImageSrc(objectUrl);
setIsLoading(false);
};
useEffect(() => {
if (url) {
fetchImage();
}
}, [url]);

useEffect(() => {
return () => {
if (imageSrc) {
URL.revokeObjectURL(imageSrc);
}
};
}, [imageSrc]);

return (
<div
className={classNames(
'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md image-previewer',
className,
)}
>
{isLoading && (
<div className="absolute inset-0 flex items-center justify-center">
<Spin />
</div>
)}

{!isLoading && imageSrc && (
<div className="max-h-[80vh] overflow-auto p-2">
<img
src={imageSrc}
alt={'image'}
className="w-full h-auto max-w-full object-contain"
onLoad={() => URL.revokeObjectURL(imageSrc!)}
/>
</div>
)}
</div>
);
};

+ 65
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/index.tsx View File

@@ -0,0 +1,65 @@
import { memo } from 'react';

import CSVFileViewer from './csv-preview';
import { DocPreviewer } from './doc-preview';
import { ExcelCsvPreviewer } from './excel-preview';
import { ImagePreviewer } from './image-preview';
import styles from './index.less';
import PdfPreviewer, { IProps } from './pdf-preview';
import { PptPreviewer } from './ppt-preview';
import { TxtPreviewer } from './txt-preview';

type PreviewProps = {
fileType: string;
className?: string;
};
const Preview = ({
fileType,
className,
highlights,
setWidthAndHeight,
}: PreviewProps & Partial<IProps>) => {
return (
<>
{fileType === 'pdf' && highlights && setWidthAndHeight && (
<section className={styles.documentPreview}>
<PdfPreviewer
highlights={highlights}
setWidthAndHeight={setWidthAndHeight}
></PdfPreviewer>
</section>
)}
{['doc', 'docx'].indexOf(fileType) > -1 && (
<section>
<DocPreviewer className={className} />
</section>
)}
{['txt', 'md'].indexOf(fileType) > -1 && (
<section>
<TxtPreviewer className={className} />
</section>
)}
{['visual'].indexOf(fileType) > -1 && (
<section>
<ImagePreviewer className={className} />
</section>
)}
{['pptx'].indexOf(fileType) > -1 && (
<section>
<PptPreviewer className={className} />
</section>
)}
{['xlsx'].indexOf(fileType) > -1 && (
<section>
<ExcelCsvPreviewer className={className} />
</section>
)}
{['csv'].indexOf(fileType) > -1 && (
<section>
<CSVFileViewer className={className} />
</section>
)}
</>
);
};
export default memo(Preview);

web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx → web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/pdf-preview.tsx View File

@@ -14,7 +14,7 @@ import { useCatchDocumentError } from '@/components/pdf-previewer/hooks';
import FileError from '@/pages/document-viewer/file-error';
import styles from './index.less';

interface IProps {
export interface IProps {
highlights: IHighlight[];
setWidthAndHeight: (width: number, height: number) => void;
}
@@ -30,7 +30,7 @@ const HighlightPopup = ({
) : null;

// TODO: merge with DocumentPreviewer
const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => {
const PdfPreview = ({ highlights: state, setWidthAndHeight }: IProps) => {
const url = useGetDocumentUrl();

const ref = useRef<(highlight: IHighlight) => void>(() => {});
@@ -120,4 +120,4 @@ const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => {
);
};

export default memo(Preview);
export default memo(PdfPreview);

+ 67
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/ppt-preview.tsx View File

@@ -0,0 +1,67 @@
import message from '@/components/ui/message';
import request from '@/utils/request';
import classNames from 'classnames';
import { init } from 'pptx-preview';
import { useEffect, useRef } from 'react';
import { useGetDocumentUrl } from './hooks';
interface PptPreviewerProps {
className?: string;
}

export const PptPreviewer: React.FC<PptPreviewerProps> = ({ className }) => {
const url = useGetDocumentUrl();
const wrapper = useRef<HTMLDivElement>(null);
const containerRef = useRef<HTMLDivElement>(null);
const fetchDocument = async () => {
const res = await request(url, {
method: 'GET',
responseType: 'blob',
onError: () => {
message.error('Document parsing failed');
console.error('Error loading document:', url);
},
});
console.log(res);
try {
const arrayBuffer = await res.data.arrayBuffer();

if (containerRef.current) {
let width = 500;
let height = 900;
if (containerRef.current) {
width = containerRef.current.clientWidth - 50;
height = containerRef.current.clientHeight - 50;
}
let pptxPrviewer = init(containerRef.current, {
width: width,
height: height,
});
pptxPrviewer.preview(arrayBuffer);
}
} catch (err) {
message.error('ppt parse failed');
}
};

useEffect(() => {
if (url) {
fetchDocument();
}
}, [url]);

return (
<div
ref={containerRef}
className={classNames(
'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md ppt-previewer',
className,
)}
>
<div className="overflow-auto p-2">
<div className="flex flex-col gap-4">
<div ref={wrapper} />
</div>
</div>
</div>
);
};

+ 57
- 0
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/txt-preview.tsx View File

@@ -0,0 +1,57 @@
import message from '@/components/ui/message';
import request from '@/utils/request';
import { Spin } from 'antd';
import classNames from 'classnames';
import { useEffect, useState } from 'react';
import { useGetDocumentUrl } from './hooks';

type TxtPreviewerProps = { className?: string };
export const TxtPreviewer = ({ className }: TxtPreviewerProps) => {
const url = useGetDocumentUrl();
const [loading, setLoading] = useState(false);
const [data, setData] = useState<string>('');
const fetchTxt = async () => {
setLoading(true);
const res = await request(url, {
method: 'GET',
responseType: 'blob',
onError: (err: any) => {
message.error('Failed to load file');
console.error('Error loading file:', err);
},
});
// blob to string
const reader = new FileReader();
reader.readAsText(res.data);
reader.onload = () => {
setData(reader.result as string);
setLoading(false);
console.log('file loaded successfully', reader.result);
};
console.log('file data:', res);
};
useEffect(() => {
if (url) {
fetchTxt();
} else {
setLoading(false);
setData('');
}
}, [url]);
return (
<div
className={classNames(
'relative w-full h-full p-4 bg-background-paper border border-border-normal rounded-md',
className,
)}
>
{loading && (
<div className="absolute inset-0 flex items-center justify-center">
<Spin />
</div>
)}

{!loading && <pre className="whitespace-pre-wrap p-2 ">{data}</pre>}
</div>
);
};

+ 2
- 1
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.less View File

@@ -35,7 +35,8 @@

.documentPreview {
// width: 40%;
height: 100%;
height: calc(100vh - 130px);
overflow: auto;
}

.chunkContainer {

+ 24
- 10
web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx View File

@@ -3,11 +3,11 @@ import {
useSwitchChunk,
} from '@/hooks/use-chunk-request';
import classNames from 'classnames';
import { useCallback, useEffect, useState } from 'react';
import { useCallback, useEffect, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import ChunkCard from './components/chunk-card';
import CreatingModal from './components/chunk-creating-modal';
import DocumentPreview from './components/document-preview/preview';
import DocumentPreview from './components/document-preview';
import {
useChangeChunkTextMode,
useDeleteChunkByIds,
@@ -143,6 +143,20 @@ const Chunk = () => {
const { highlights, setWidthAndHeight } =
useGetChunkHighlights(selectedChunkId);

const fileType = useMemo(() => {
switch (documentInfo?.type) {
case 'doc':
return documentInfo?.name.split('.').pop() || 'doc';
case 'visual':
case 'docx':
case 'txt':
case 'md':
case 'pdf':
return documentInfo?.type;
}
return 'unknown';
}, [documentInfo]);

return (
<>
<div className={styles.chunkPage}>
@@ -151,14 +165,14 @@ const Chunk = () => {
<div className="h-[100px] flex flex-col justify-end pb-[5px]">
<DocumentHeader {...documentInfo} />
</div>
{isPdf && (
<section className={styles.documentPreview}>
<DocumentPreview
highlights={highlights}
setWidthAndHeight={setWidthAndHeight}
></DocumentPreview>
</section>
)}
<section className={styles.documentPreview}>
<DocumentPreview
className={styles.documentPreview}
fileType={fileType}
highlights={highlights}
setWidthAndHeight={setWidthAndHeight}
></DocumentPreview>
</section>
</div>
<div
className={classNames(

Loading…
Cancel
Save