Ver código fonte

fix: fix document list overlap and optimize document list fetching (#15377)

tags/1.0.1
Wu Tianwei 7 meses atrás
pai
commit
2cf0cb471f
Nenhuma conta vinculada ao e-mail do autor do commit

+ 10
- 5
web/app/components/datasets/documents/detail/index.tsx Ver arquivo

import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge' import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment' import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base' import { useInvalid } from '@/service/use-base'


type DocumentContextValue = { type DocumentContextValue = {


const invalidChunkList = useInvalid(useSegmentListKey) const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey) const invalidChildChunkList = useInvalid(useChildSegmentListKey)
const invalidDocumentList = useInvalidDocumentList(datasetId)


const handleOperate = (operateName?: string) => { const handleOperate = (operateName?: string) => {
invalidDocumentList()
if (operateName === 'delete') { if (operateName === 'delete') {
backToPrev() backToPrev()
} }
else { else {
detailMutate() detailMutate()
setTimeout(() => {
invalidChunkList()
invalidChildChunkList()
}, 5000)
// If operation is not rename, refresh the chunk list after 5 seconds
if (operateName) {
setTimeout(() => {
invalidChunkList()
invalidChildChunkList()
}, 5000)
}
} }
} }



+ 27
- 30
web/app/components/datasets/documents/index.tsx Ver arquivo

'use client' 'use client'
import type { FC } from 'react' import type { FC } from 'react'
import React, { useCallback, useEffect, useMemo, useState } from 'react' import React, { useCallback, useEffect, useMemo, useState } from 'react'
import useSWR from 'swr'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { useRouter } from 'next/navigation' import { useRouter } from 'next/navigation'
import { useDebounce, useDebounceFn } from 'ahooks' import { useDebounce, useDebounceFn } from 'ahooks'
import { groupBy, omit } from 'lodash-es'
import { groupBy } from 'lodash-es'
import { PlusIcon } from '@heroicons/react/24/solid' import { PlusIcon } from '@heroicons/react/24/solid'
import { RiExternalLinkLine } from '@remixicon/react' import { RiExternalLinkLine } from '@remixicon/react'
import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document' import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document'
import Button from '@/app/components/base/button' import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input' import Input from '@/app/components/base/input'
import { get } from '@/service/base' import { get } from '@/service/base'
import { createDocument, fetchDocuments } from '@/service/datasets'
import { createDocument } from '@/service/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail' import { useDatasetDetailContext } from '@/context/dataset-detail'
import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector' import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector'
import type { NotionPage } from '@/models/common' import type { NotionPage } from '@/models/common'
import type { CreateDocumentReq } from '@/models/datasets' import type { CreateDocumentReq } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import { DataSourceType, ProcessMode } from '@/models/datasets'
import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed' import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
import { useProviderContext } from '@/context/provider-context' import { useProviderContext } from '@/context/provider-context'
import cn from '@/utils/classnames' import cn from '@/utils/classnames'
import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
import { useDocumentList, useInvalidDocumentDetailKey, useInvalidDocumentList } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base' import { useInvalid } from '@/service/use-base'
import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment' import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'


</div> </div>
} }


interface IDocumentsProps {
type IDocumentsProps = {
datasetId: string datasetId: string
} }


export const fetcher = (url: string) => get(url, {}, {}) export const fetcher = (url: string) => get(url, {}, {})
const DEFAULT_LIMIT = 15
const DEFAULT_LIMIT = 10


const Documents: FC<IDocumentsProps> = ({ datasetId }) => { const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const { t } = useTranslation() const { t } = useTranslation()


const debouncedSearchValue = useDebounce(searchValue, { wait: 500 }) const debouncedSearchValue = useDebounce(searchValue, { wait: 500 })


const query = useMemo(() => {
return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
}, [currPage, debouncedSearchValue, isDataSourceNotion, limit])

const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR(
{
action: 'fetchDocuments',
datasetId,
params: query,
const { data: documentsRes, isFetching: isListLoading } = useDocumentList({
datasetId,
query: {
page: currPage + 1,
limit,
keyword: debouncedSearchValue,
}, },
apiParams => fetchDocuments(omit(apiParams, 'action')),
{ refreshInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0 },
)
refetchInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0,
})

const invalidDocumentList = useInvalidDocumentList(datasetId)


const [isMuting, setIsMuting] = useState(false)
useEffect(() => { useEffect(() => {
if (!isListLoading && isMuting)
setIsMuting(false)
}, [isListLoading, isMuting])
if (documentsRes) {
const totalPages = Math.ceil(documentsRes.total / limit)
if (totalPages < currPage + 1)
setCurrPage(totalPages === 0 ? 0 : totalPages - 1)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [documentsRes])


const invalidDocumentDetail = useInvalidDocumentDetailKey() const invalidDocumentDetail = useInvalidDocumentDetailKey()
const invalidChunkList = useInvalid(useSegmentListKey) const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey) const invalidChildChunkList = useInvalid(useChildSegmentListKey)


const handleUpdate = useCallback(() => { const handleUpdate = useCallback(() => {
setIsMuting(true)
mutate()
invalidDocumentList()
invalidDocumentDetail() invalidDocumentDetail()
setTimeout(() => { setTimeout(() => {
invalidChunkList() invalidChunkList()
router.push(`/datasets/${datasetId}/documents/create`) router.push(`/datasets/${datasetId}/documents/create`)
} }


const isLoading = isListLoading // !documentsRes && !error

const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => { const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => {
const workspacesMap = groupBy(selectedPages, 'workspace_id') const workspacesMap = groupBy(selectedPages, 'workspace_id')
const workspaces = Object.keys(workspacesMap).map((workspaceId) => { const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
indexing_technique: dataset?.indexing_technique, indexing_technique: dataset?.indexing_technique,
process_rule: { process_rule: {
rules: {}, rules: {},
mode: 'automatic',
mode: ProcessMode.general,
}, },
} as CreateDocumentReq } as CreateDocumentReq


datasetId, datasetId,
body: params, body: params,
}) })
mutate()
invalidDocumentList()
setTimerCanRun(true) setTimerCanRun(true)
// mutateDatasetIndexingStatus(undefined, { revalidate: true }) // mutateDatasetIndexingStatus(undefined, { revalidate: true })
setNotionPageSelectorModalVisible(false) setNotionPageSelectorModalVisible(false)
)} )}
</div> </div>
</div> </div>
{(isLoading && !isMuting)
{isListLoading
? <Loading type='app' /> ? <Loading type='app' />
: total > 0 : total > 0
? <List ? <List

+ 108
- 106
web/app/components/datasets/documents/list.tsx Ver arquivo

} }


return ( return (
<div className='relative w-full h-full overflow-x-auto'>
<table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
<thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
<tr>
<td className='w-12'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={isAllSelected}
mixed={!isAllSelected && isSomeSelected}
onCheck={onSelectedAll}
/>
#
</div>
</td>
<td>
<div className='flex'>
{t('datasetDocuments.list.table.header.fileName')}
</div>
</td>
<td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
<td className='w-44'>
<div className='flex items-center' onClick={onClickSort}>
{t('datasetDocuments.list.table.header.uploadTime')}
<ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
</div>
</td>
<td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
<td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
</tr>
</thead>
<tbody className="text-text-secondary">
{localDocs.map((doc, index) => {
const isFile = doc.data_source_type === DataSourceType.FILE
const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
return <tr
key={doc.id}
className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
onClick={() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-text-tertiary text-xs'>
<div className='flex flex-col relative w-full h-full'>
<div className='grow overflow-x-auto'>
<table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
<thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
<tr>
<td className='w-12'>
<div className='flex items-center' onClick={e => e.stopPropagation()}> <div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox <Checkbox
className='shrink-0 mr-2' className='shrink-0 mr-2'
checked={selectedIds.includes(doc.id)}
onCheck={() => {
onSelectedIdChange(
selectedIds.includes(doc.id)
? selectedIds.filter(id => id !== doc.id)
: [...selectedIds, doc.id],
)
}}
checked={isAllSelected}
mixed={!isAllSelected && isSomeSelected}
onCheck={onSelectedAll}
/> />
{/* {doc.position} */}
{index + 1}
#
</div> </div>
</td> </td>
<td> <td>
<div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
<div className='shrink-0'>
{doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
{doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
{doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />}
</div>
<span className='text-sm truncate grow-1'>{doc.name}</span>
<div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
<Tooltip
popupContent={t('datasetDocuments.list.table.rename')}
>
<div
className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
onClick={(e) => {
e.stopPropagation()
handleShowRenameModal(doc)
}}
>
<Edit03 className='w-4 h-4 text-text-tertiary' />
</div>
</Tooltip>
</div>
<div className='flex'>
{t('datasetDocuments.list.table.header.fileName')}
</div> </div>
</td> </td>
<td>
<ChunkingModeLabel
isGeneralMode={isGeneralMode}
isQAMode={isQAMode}
/>
</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className='text-text-secondary text-[13px]'>
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
</td>
<td>
{
(['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION)
? <ProgressBar percent={doc.percent || 0} />
: <StatusItem status={doc.display_status} />
}
</td>
<td>
<OperationAction
embeddingAvailable={embeddingAvailable}
datasetId={datasetId}
detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
onUpdate={onUpdate}
/>
<td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
<td className='w-44'>
<div className='flex items-center' onClick={onClickSort}>
{t('datasetDocuments.list.table.header.uploadTime')}
<ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
</div>
</td> </td>
<td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
<td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
</tr> </tr>
})}
</tbody>
</table>
</thead>
<tbody className="text-text-secondary">
{localDocs.map((doc, index) => {
const isFile = doc.data_source_type === DataSourceType.FILE
const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
return <tr
key={doc.id}
className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
onClick={() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-text-tertiary text-xs'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={selectedIds.includes(doc.id)}
onCheck={() => {
onSelectedIdChange(
selectedIds.includes(doc.id)
? selectedIds.filter(id => id !== doc.id)
: [...selectedIds, doc.id],
)
}}
/>
{/* {doc.position} */}
{index + 1}
</div>
</td>
<td>
<div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
<div className='shrink-0'>
{doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex mt-[-3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
{doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
{doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex mt-[-3px] mr-1.5 align-middle' />}
</div>
<span className='text-sm truncate grow-1'>{doc.name}</span>
<div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
<Tooltip
popupContent={t('datasetDocuments.list.table.rename')}
>
<div
className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
onClick={(e) => {
e.stopPropagation()
handleShowRenameModal(doc)
}}
>
<Edit03 className='w-4 h-4 text-text-tertiary' />
</div>
</Tooltip>
</div>
</div>
</td>
<td>
<ChunkingModeLabel
isGeneralMode={isGeneralMode}
isQAMode={isQAMode}
/>
</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className='text-text-secondary text-[13px]'>
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
</td>
<td>
{
(['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION)
? <ProgressBar percent={doc.percent || 0} />
: <StatusItem status={doc.display_status} />
}
</td>
<td>
<OperationAction
embeddingAvailable={embeddingAvailable}
datasetId={datasetId}
detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
onUpdate={onUpdate}
/>
</td>
</tr>
})}
</tbody>
</table>
</div>
{(selectedIds.length > 0) && ( {(selectedIds.length > 0) && (
<BatchAction <BatchAction
className='absolute left-0 bottom-16 z-20' className='absolute left-0 bottom-16 z-20'
/> />
)} )}
{/* Show Pagination only if the total is more than the limit */} {/* Show Pagination only if the total is more than the limit */}
{pagination.total && pagination.total > (pagination.limit || 10) && (
{pagination.total && (
<Pagination <Pagination
{...pagination} {...pagination}
className='absolute bottom-0 left-0 w-full px-0 pb-0'
className='shrink-0 w-full px-0 pb-0'
/> />
)} )}



+ 0
- 5
web/service/datasets.ts Ver arquivo

CreateDocumentReq, CreateDocumentReq,
DataSet, DataSet,
DataSetListResponse, DataSetListResponse,
DocumentListResponse,
ErrorDocsResponse, ErrorDocsResponse,
ExternalAPIDeleteResponse, ExternalAPIDeleteResponse,
ExternalAPIItem, ExternalAPIItem,
return get<ProcessRuleResponse>('/datasets/process-rule', { params: { document_id: documentId } }) return get<ProcessRuleResponse>('/datasets/process-rule', { params: { document_id: documentId } })
} }


export const fetchDocuments: Fetcher<DocumentListResponse, { datasetId: string; params: { keyword: string; page: number; limit: number; sort?: SortType } }> = ({ datasetId, params }) => {
return get<DocumentListResponse>(`/datasets/${datasetId}/documents`, { params })
}

export const createFirstDocument: Fetcher<createDocumentResponse, { body: CreateDocumentReq }> = ({ body }) => { export const createFirstDocument: Fetcher<createDocumentResponse, { body: CreateDocumentReq }> = ({ body }) => {
return post<createDocumentResponse>('/datasets/init', { body }) return post<createDocumentResponse>('/datasets/init', { body })
} }

+ 13
- 9
web/service/knowledge/use-document.ts Ver arquivo

} from '@tanstack/react-query' } from '@tanstack/react-query'
import { del, get, patch } from '../base' import { del, get, patch } from '../base'
import { useInvalid } from '../use-base' import { useInvalid } from '../use-base'
import type { MetadataType } from '../datasets'
import type { DocumentDetailResponse, SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets'
import type { MetadataType, SortType } from '../datasets'
import type { DocumentDetailResponse, DocumentListResponse, UpdateDocumentBatchParams } from '@/models/datasets'
import { DocumentActionType } from '@/models/datasets' import { DocumentActionType } from '@/models/datasets'
import type { CommonResponse } from '@/models/common' import type { CommonResponse } from '@/models/common'


keyword: string keyword: string
page: number page: number
limit: number limit: number
}
sort?: SortType
},
refetchInterval?: number | false
}) => { }) => {
const { query, datasetId } = payload
return useQuery<{ data: SimpleDocumentDetail[] }>({
queryKey: [...useDocumentListKey, datasetId, query],
queryFn: () => get<{ data: SimpleDocumentDetail[] }>(`/datasets/${datasetId}/documents`, {
const { query, datasetId, refetchInterval } = payload
const { keyword, page, limit, sort } = query
return useQuery<DocumentListResponse>({
queryKey: [...useDocumentListKey, datasetId, keyword, page, limit, sort],
queryFn: () => get<DocumentListResponse>(`/datasets/${datasetId}/documents`, {
params: query, params: query,
}), }),
refetchInterval,
}) })
} }


export const useInvalidDocumentList = () => {
return useInvalid(useDocumentListKey)
export const useInvalidDocumentList = (datasetId?: string) => {
return useInvalid(datasetId ? [...useDocumentListKey, datasetId] : useDocumentListKey)
} }


const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument'] const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']

Carregando…
Cancelar
Salvar