ソースを参照

Feat: Modify the parsing method string to an enumeration type. #5467 (#5468)

### What problem does this PR solve?

Feat: Modify the parsing method string to an enumeration type. #5467

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
tags/v0.17.0
balibabu 8ヶ月前
コミット
b2a5482d2c
コミッターのメールアドレスに関連付けられたアカウントが存在しません

+ 79
- 48
web/src/components/chunk-method-modal/hooks.ts ファイルの表示

@@ -1,3 +1,4 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { useSelectParserList } from '@/hooks/user-setting-hooks';
import { FormInstance } from 'antd';
@@ -7,69 +8,82 @@ const ParserListMap = new Map([
[
['pdf'],
[
'naive',
'resume',
'manual',
'paper',
'book',
'laws',
'presentation',
'one',
'qa',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Manual,
DocumentParserType.Paper,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.Presentation,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
],
],
[
['doc', 'docx'],
[
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'manual',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Manual,
DocumentParserType.KnowledgeGraph,
],
],
[
['xlsx', 'xls'],
['naive', 'qa', 'table', 'one', 'knowledge_graph'],
[
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
],
],
[['ppt', 'pptx'], ['presentation']],
[['ppt', 'pptx'], [DocumentParserType.Presentation]],
[
['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'],
['picture'],
[DocumentParserType.Picture],
],
[
['txt'],
[
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'table',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
],
],
[
['csv'],
[
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'table',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
],
],
[
['md'],
[
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
],
],
[['md'], ['naive', 'qa', 'knowledge_graph']],
[['json'], ['naive', 'knowledge_graph']],
[['eml'], ['email']],
[['json'], [DocumentParserType.Naive, DocumentParserType.KnowledgeGraph]],
[['eml'], [DocumentParserType.Email]],
]);

const getParserList = (
@@ -84,11 +98,11 @@ const getParserList = (

export const useFetchParserListOnMount = (
documentId: string,
parserId: string,
parserId: DocumentParserType,
documentExtension: string,
form: FormInstance,
) => {
const [selectedTag, setSelectedTag] = useState('');
const [selectedTag, setSelectedTag] = useState<DocumentParserType>();
const parserList = useSelectParserList();
const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form);

@@ -102,7 +116,15 @@ export const useFetchParserListOnMount = (
}

return getParserList(
['naive', 'resume', 'book', 'laws', 'one', 'qa', 'table'],
[
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
],
parserList,
);
}, [parserList, documentExtension]);
@@ -113,18 +135,27 @@ export const useFetchParserListOnMount = (

const handleChange = (tag: string) => {
handleChunkMethodSelectChange(tag);
setSelectedTag(tag);
setSelectedTag(tag as DocumentParserType);
};

return { parserList: nextParserList, handleChange, selectedTag };
};

const hideAutoKeywords = ['qa', 'table', 'resume', 'knowledge_graph', 'tag'];
const hideAutoKeywords = [
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];

export const useShowAutoKeywords = () => {
const showAutoKeywords = useCallback((selectedTag: string) => {
return hideAutoKeywords.every((x) => selectedTag !== x);
}, []);
const showAutoKeywords = useCallback(
(selectedTag: DocumentParserType | undefined) => {
return hideAutoKeywords.every((x) => selectedTag !== x);
},
[],
);

return showAutoKeywords;
};

+ 19
- 13
web/src/components/chunk-method-modal/index.tsx ファイルの表示

@@ -19,6 +19,7 @@ import omit from 'lodash/omit';
import React, { useEffect, useMemo } from 'react';
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';

import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { IParserConfig } from '@/interfaces/database/document';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
@@ -38,23 +39,23 @@ import styles from './index.less';
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
loading: boolean;
onOk: (
parserId: string,
parserId: DocumentParserType | undefined,
parserConfig: IChangeParserConfigRequestBody,
) => void;
showModal?(): void;
parserId: string;
parserId: DocumentParserType;
parserConfig: IParserConfig;
documentExtension: string;
documentId: string;
}

const hidePagesChunkMethods = [
'qa',
'table',
'picture',
'resume',
'one',
'knowledge_graph',
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Picture,
DocumentParserType.Resume,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
];

const ChunkMethodModal: React.FC<IProps> = ({
@@ -95,22 +96,23 @@ const ChunkMethodModal: React.FC<IProps> = ({
return (
isPdf &&
hidePagesChunkMethods
.filter((x) => x !== 'one')
.filter((x) => x !== DocumentParserType.One)
.every((x) => x !== selectedTag)
);
}, [selectedTag, isPdf]);

const showMaxTokenNumber =
selectedTag === 'naive' || selectedTag === 'knowledge_graph';
selectedTag === DocumentParserType.Naive ||
selectedTag === DocumentParserType.KnowledgeGraph;

const hideDivider = [showPages, showOne, showMaxTokenNumber].every(
(x) => x === false,
);

const showEntityTypes = selectedTag === 'knowledge_graph';
const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph;

const showExcelToHtml =
selectedTag === 'naive' && documentExtension === 'xlsx';
selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx';

const showAutoKeywords = useShowAutoKeywords();

@@ -284,7 +286,11 @@ const ChunkMethodModal: React.FC<IProps> = ({
{showMaxTokenNumber && (
<>
<MaxTokenNumber
max={selectedTag === 'knowledge_graph' ? 8192 * 2 : 2048}
max={
selectedTag === DocumentParserType.KnowledgeGraph
? 8192 * 2
: 2048
}
></MaxTokenNumber>
<Delimiter></Delimiter>
</>

+ 15
- 10
web/src/components/parse-configuration/graph-rag-items.tsx ファイルの表示

@@ -1,12 +1,17 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { Form, Select, Switch } from 'antd';
import { upperFirst } from 'lodash';
import { useCallback, useMemo } from 'react';
import EntityTypesItem from '../entity-types-item';

const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];

export const showTagItems = (parserId: string) => {
export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId);
};

@@ -16,16 +21,16 @@ const enum MethodValue {
}

export const excludedParseMethods = [
'table',
'resume',
'picture',
'knowledge_graph',
'qa',
'tag',
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.Picture,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Qa,
DocumentParserType.Tag,
];

export const showGraphRagItems = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
return !excludedParseMethods.some((x) => x === parserId);
};

// The three types "table", "resume" and "one" do not display this configuration.

+ 18
- 11
web/src/components/parse-configuration/index.tsx ファイルの表示

@@ -1,3 +1,4 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { PlusOutlined } from '@ant-design/icons';
import {
@@ -13,22 +14,28 @@ import {
import random from 'lodash/random';

export const excludedParseMethods = [
'table',
'resume',
'one',
'picture',
'knowledge_graph',
'qa',
'tag',
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.One,
DocumentParserType.Picture,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Qa,
DocumentParserType.Tag,
];

export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
export const showRaptorParseConfiguration = (
parserId: DocumentParserType | undefined,
) => {
return !excludedParseMethods.some((x) => x === parserId);
};

export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
export const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];

export const showTagItems = (parserId: string) => {
export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId);
};


+ 1
- 0
web/src/constants/knowledge.ts ファイルの表示

@@ -79,4 +79,5 @@ export enum DocumentParserType {
Audio = 'audio',
Email = 'email',
Tag = 'tag',
KnowledgeGraph = 'knowledge_graph',
}

+ 3
- 2
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx ファイルの表示

@@ -16,6 +16,7 @@ import ParseConfiguration, {
import GraphRagItems, {
showGraphRagItems,
} from '@/components/parse-configuration/graph-rag-items';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { normFile } from '@/utils/file-util';
@@ -127,7 +128,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {

return (
<>
{parserId === 'knowledge_graph' && (
{parserId === DocumentParserType.KnowledgeGraph && (
<>
<EntityTypesItem></EntityTypesItem>
<MaxTokenNumber max={8192 * 2}></MaxTokenNumber>
@@ -140,7 +141,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
<AutoQuestionsItem></AutoQuestionsItem>
</>
)}
{parserId === 'naive' && (
{parserId === DocumentParserType.Naive && (
<>
<MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>

読み込み中…
キャンセル
保存