Browse Source

Feat: Modify the parsing method string to an enumeration type. #5467 (#5468)

### What problem does this PR solve?

Feat: Modify the parsing method string to an enumeration type. #5467

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
tags/v0.17.0
balibabu 8 months ago
parent
commit
b2a5482d2c
No account linked to committer's email address

+ 79
- 48
web/src/components/chunk-method-modal/hooks.ts View File

import { DocumentParserType } from '@/constants/knowledge';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks'; import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { useSelectParserList } from '@/hooks/user-setting-hooks'; import { useSelectParserList } from '@/hooks/user-setting-hooks';
import { FormInstance } from 'antd'; import { FormInstance } from 'antd';
[ [
['pdf'], ['pdf'],
[ [
'naive',
'resume',
'manual',
'paper',
'book',
'laws',
'presentation',
'one',
'qa',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Manual,
DocumentParserType.Paper,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.Presentation,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
], ],
], ],
[ [
['doc', 'docx'], ['doc', 'docx'],
[ [
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'manual',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Manual,
DocumentParserType.KnowledgeGraph,
], ],
], ],
[ [
['xlsx', 'xls'], ['xlsx', 'xls'],
['naive', 'qa', 'table', 'one', 'knowledge_graph'],
[
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
],
], ],
[['ppt', 'pptx'], ['presentation']],
[['ppt', 'pptx'], [DocumentParserType.Presentation]],
[ [
['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'], ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'],
['picture'],
[DocumentParserType.Picture],
], ],
[ [
['txt'], ['txt'],
[ [
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'table',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
], ],
], ],
[ [
['csv'], ['csv'],
[ [
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'table',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
],
],
[
['md'],
[
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
], ],
], ],
[['md'], ['naive', 'qa', 'knowledge_graph']],
[['json'], ['naive', 'knowledge_graph']],
[['eml'], ['email']],
[['json'], [DocumentParserType.Naive, DocumentParserType.KnowledgeGraph]],
[['eml'], [DocumentParserType.Email]],
]); ]);


const getParserList = ( const getParserList = (


export const useFetchParserListOnMount = ( export const useFetchParserListOnMount = (
documentId: string, documentId: string,
parserId: string,
parserId: DocumentParserType,
documentExtension: string, documentExtension: string,
form: FormInstance, form: FormInstance,
) => { ) => {
const [selectedTag, setSelectedTag] = useState('');
const [selectedTag, setSelectedTag] = useState<DocumentParserType>();
const parserList = useSelectParserList(); const parserList = useSelectParserList();
const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form); const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form);


} }


return getParserList( return getParserList(
['naive', 'resume', 'book', 'laws', 'one', 'qa', 'table'],
[
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
],
parserList, parserList,
); );
}, [parserList, documentExtension]); }, [parserList, documentExtension]);


const handleChange = (tag: string) => { const handleChange = (tag: string) => {
handleChunkMethodSelectChange(tag); handleChunkMethodSelectChange(tag);
setSelectedTag(tag);
setSelectedTag(tag as DocumentParserType);
}; };


return { parserList: nextParserList, handleChange, selectedTag }; return { parserList: nextParserList, handleChange, selectedTag };
}; };


const hideAutoKeywords = ['qa', 'table', 'resume', 'knowledge_graph', 'tag'];
const hideAutoKeywords = [
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];


export const useShowAutoKeywords = () => { export const useShowAutoKeywords = () => {
const showAutoKeywords = useCallback((selectedTag: string) => {
return hideAutoKeywords.every((x) => selectedTag !== x);
}, []);
const showAutoKeywords = useCallback(
(selectedTag: DocumentParserType | undefined) => {
return hideAutoKeywords.every((x) => selectedTag !== x);
},
[],
);


return showAutoKeywords; return showAutoKeywords;
}; };

+ 19
- 13
web/src/components/chunk-method-modal/index.tsx View File

import React, { useEffect, useMemo } from 'react'; import React, { useEffect, useMemo } from 'react';
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks'; import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';


import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { IParserConfig } from '@/interfaces/database/document'; import { IParserConfig } from '@/interfaces/database/document';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document'; import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> { interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
loading: boolean; loading: boolean;
onOk: ( onOk: (
parserId: string,
parserId: DocumentParserType | undefined,
parserConfig: IChangeParserConfigRequestBody, parserConfig: IChangeParserConfigRequestBody,
) => void; ) => void;
showModal?(): void; showModal?(): void;
parserId: string;
parserId: DocumentParserType;
parserConfig: IParserConfig; parserConfig: IParserConfig;
documentExtension: string; documentExtension: string;
documentId: string; documentId: string;
} }


const hidePagesChunkMethods = [ const hidePagesChunkMethods = [
'qa',
'table',
'picture',
'resume',
'one',
'knowledge_graph',
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Picture,
DocumentParserType.Resume,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
]; ];


const ChunkMethodModal: React.FC<IProps> = ({ const ChunkMethodModal: React.FC<IProps> = ({
return ( return (
isPdf && isPdf &&
hidePagesChunkMethods hidePagesChunkMethods
.filter((x) => x !== 'one')
.filter((x) => x !== DocumentParserType.One)
.every((x) => x !== selectedTag) .every((x) => x !== selectedTag)
); );
}, [selectedTag, isPdf]); }, [selectedTag, isPdf]);


const showMaxTokenNumber = const showMaxTokenNumber =
selectedTag === 'naive' || selectedTag === 'knowledge_graph';
selectedTag === DocumentParserType.Naive ||
selectedTag === DocumentParserType.KnowledgeGraph;


const hideDivider = [showPages, showOne, showMaxTokenNumber].every( const hideDivider = [showPages, showOne, showMaxTokenNumber].every(
(x) => x === false, (x) => x === false,
); );


const showEntityTypes = selectedTag === 'knowledge_graph';
const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph;


const showExcelToHtml = const showExcelToHtml =
selectedTag === 'naive' && documentExtension === 'xlsx';
selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx';


const showAutoKeywords = useShowAutoKeywords(); const showAutoKeywords = useShowAutoKeywords();


{showMaxTokenNumber && ( {showMaxTokenNumber && (
<> <>
<MaxTokenNumber <MaxTokenNumber
max={selectedTag === 'knowledge_graph' ? 8192 * 2 : 2048}
max={
selectedTag === DocumentParserType.KnowledgeGraph
? 8192 * 2
: 2048
}
></MaxTokenNumber> ></MaxTokenNumber>
<Delimiter></Delimiter> <Delimiter></Delimiter>
</> </>

+ 15
- 10
web/src/components/parse-configuration/graph-rag-items.tsx View File

import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { Form, Select, Switch } from 'antd'; import { Form, Select, Switch } from 'antd';
import { upperFirst } from 'lodash'; import { upperFirst } from 'lodash';
import { useCallback, useMemo } from 'react'; import { useCallback, useMemo } from 'react';
import EntityTypesItem from '../entity-types-item'; import EntityTypesItem from '../entity-types-item';


const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];


export const showTagItems = (parserId: string) => {
export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId); return !excludedTagParseMethods.includes(parserId);
}; };


} }


export const excludedParseMethods = [ export const excludedParseMethods = [
'table',
'resume',
'picture',
'knowledge_graph',
'qa',
'tag',
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.Picture,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Qa,
DocumentParserType.Tag,
]; ];


export const showGraphRagItems = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
return !excludedParseMethods.some((x) => x === parserId);
}; };


// The three types "table", "resume" and "one" do not display this configuration. // The three types "table", "resume" and "one" do not display this configuration.

+ 18
- 11
web/src/components/parse-configuration/index.tsx View File

import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { PlusOutlined } from '@ant-design/icons'; import { PlusOutlined } from '@ant-design/icons';
import { import {
import random from 'lodash/random'; import random from 'lodash/random';


export const excludedParseMethods = [ export const excludedParseMethods = [
'table',
'resume',
'one',
'picture',
'knowledge_graph',
'qa',
'tag',
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.One,
DocumentParserType.Picture,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Qa,
DocumentParserType.Tag,
]; ];


export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
export const showRaptorParseConfiguration = (
parserId: DocumentParserType | undefined,
) => {
return !excludedParseMethods.some((x) => x === parserId);
}; };


export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
export const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];


export const showTagItems = (parserId: string) => {
export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId); return !excludedTagParseMethods.includes(parserId);
}; };



+ 1
- 0
web/src/constants/knowledge.ts View File

Audio = 'audio', Audio = 'audio',
Email = 'email', Email = 'email',
Tag = 'tag', Tag = 'tag',
KnowledgeGraph = 'knowledge_graph',
} }

+ 3
- 2
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx View File

import GraphRagItems, { import GraphRagItems, {
showGraphRagItems, showGraphRagItems,
} from '@/components/parse-configuration/graph-rag-items'; } from '@/components/parse-configuration/graph-rag-items';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks'; import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { normFile } from '@/utils/file-util'; import { normFile } from '@/utils/file-util';


return ( return (
<> <>
{parserId === 'knowledge_graph' && (
{parserId === DocumentParserType.KnowledgeGraph && (
<> <>
<EntityTypesItem></EntityTypesItem> <EntityTypesItem></EntityTypesItem>
<MaxTokenNumber max={8192 * 2}></MaxTokenNumber> <MaxTokenNumber max={8192 * 2}></MaxTokenNumber>
<AutoQuestionsItem></AutoQuestionsItem> <AutoQuestionsItem></AutoQuestionsItem>
</> </>
)} )}
{parserId === 'naive' && (
{parserId === DocumentParserType.Naive && (
<> <>
<MaxTokenNumber></MaxTokenNumber> <MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter> <Delimiter></Delimiter>

Loading…
Cancel
Save