| @@ -534,7 +534,7 @@ class IndexingRunner: | |||
| # chunk nodes by chunk size | |||
| indexing_start_at = time.perf_counter() | |||
| tokens = 0 | |||
| if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX: | |||
| if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy": | |||
| # create keyword index | |||
| create_keyword_thread = threading.Thread( | |||
| target=self._process_keyword_index, | |||
| @@ -572,7 +572,7 @@ class IndexingRunner: | |||
| for future in futures: | |||
| tokens += future.result() | |||
| if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX: | |||
| if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy": | |||
| create_keyword_thread.join() | |||
| indexing_end_at = time.perf_counter() | |||
| @@ -76,6 +76,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor): | |||
| if dataset.indexing_technique == "high_quality": | |||
| vector = Vector(dataset) | |||
| vector.create(documents) | |||
| with_keywords = False | |||
| if with_keywords: | |||
| keywords_list = kwargs.get("keywords_list") | |||
| keyword = Keyword(dataset) | |||
| @@ -91,6 +92,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor): | |||
| vector.delete_by_ids(node_ids) | |||
| else: | |||
| vector.delete() | |||
| with_keywords = False | |||
| if with_keywords: | |||
| keyword = Keyword(dataset) | |||
| if node_ids: | |||
| @@ -97,16 +97,16 @@ class VectorService: | |||
| vector = Vector(dataset=dataset) | |||
| vector.delete_by_ids([segment.index_node_id]) | |||
| vector.add_texts([document], duplicate_check=True) | |||
| # update keyword index | |||
| keyword = Keyword(dataset) | |||
| keyword.delete_by_ids([segment.index_node_id]) | |||
| # save keyword index | |||
| if keywords and len(keywords) > 0: | |||
| keyword.add_texts([document], keywords_list=[keywords]) | |||
| else: | |||
| keyword.add_texts([document]) | |||
| # update keyword index | |||
| keyword = Keyword(dataset) | |||
| keyword.delete_by_ids([segment.index_node_id]) | |||
| # save keyword index | |||
| if keywords and len(keywords) > 0: | |||
| keyword.add_texts([document], keywords_list=[keywords]) | |||
| else: | |||
| keyword.add_texts([document]) | |||
| @classmethod | |||
| def generate_child_chunks( | |||
| @@ -1,4 +1,4 @@ | |||
| import React, { type FC, useMemo, useState } from 'react' | |||
| import React, { type FC, useCallback, useMemo, useState } from 'react' | |||
| import { useTranslation } from 'react-i18next' | |||
| import { | |||
| RiCloseLine, | |||
| @@ -16,8 +16,10 @@ import { useSegmentListContext } from './index' | |||
| import { ChunkingMode, type SegmentDetailModel } from '@/models/datasets' | |||
| import { useEventEmitterContextContext } from '@/context/event-emitter' | |||
| import { formatNumber } from '@/utils/format' | |||
| import classNames from '@/utils/classnames' | |||
| import cn from '@/utils/classnames' | |||
| import Divider from '@/app/components/base/divider' | |||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||
| import { IndexingType } from '../../../create/step-two' | |||
| type ISegmentDetailProps = { | |||
| segInfo?: Partial<SegmentDetailModel> & { id: string } | |||
| @@ -48,6 +50,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({ | |||
| const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) | |||
| const mode = useDocumentContext(s => s.mode) | |||
| const parentMode = useDocumentContext(s => s.parentMode) | |||
| const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) | |||
| eventEmitter?.useSubscription((v) => { | |||
| if (v === 'update-segment') | |||
| @@ -56,56 +59,41 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({ | |||
| setLoading(false) | |||
| }) | |||
| const handleCancel = () => { | |||
| const handleCancel = useCallback(() => { | |||
| onCancel() | |||
| } | |||
| }, [onCancel]) | |||
| const handleSave = () => { | |||
| const handleSave = useCallback(() => { | |||
| onUpdate(segInfo?.id || '', question, answer, keywords) | |||
| } | |||
| }, [onUpdate, segInfo?.id, question, answer, keywords]) | |||
| const handleRegeneration = () => { | |||
| const handleRegeneration = useCallback(() => { | |||
| setShowRegenerationModal(true) | |||
| } | |||
| }, []) | |||
| const onCancelRegeneration = () => { | |||
| const onCancelRegeneration = useCallback(() => { | |||
| setShowRegenerationModal(false) | |||
| } | |||
| }, []) | |||
| const onConfirmRegeneration = () => { | |||
| const onConfirmRegeneration = useCallback(() => { | |||
| onUpdate(segInfo?.id || '', question, answer, keywords, true) | |||
| } | |||
| const isParentChildMode = useMemo(() => { | |||
| return mode === 'hierarchical' | |||
| }, [mode]) | |||
| const isFullDocMode = useMemo(() => { | |||
| return mode === 'hierarchical' && parentMode === 'full-doc' | |||
| }, [mode, parentMode]) | |||
| const titleText = useMemo(() => { | |||
| return isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail') | |||
| }, [isEditMode, t]) | |||
| const isQAModel = useMemo(() => { | |||
| return docForm === ChunkingMode.qa | |||
| }, [docForm]) | |||
| }, [onUpdate, segInfo?.id, question, answer, keywords]) | |||
| const wordCountText = useMemo(() => { | |||
| const contentLength = isQAModel ? (question.length + answer.length) : question.length | |||
| const contentLength = docForm === ChunkingMode.qa ? (question.length + answer.length) : question.length | |||
| const total = formatNumber(isEditMode ? contentLength : segInfo!.word_count as number) | |||
| const count = isEditMode ? contentLength : segInfo!.word_count as number | |||
| return `${total} ${t('datasetDocuments.segment.characters', { count })}` | |||
| }, [isEditMode, question.length, answer.length, isQAModel, segInfo, t]) | |||
| }, [isEditMode, question.length, answer.length, docForm, segInfo, t]) | |||
| const labelPrefix = useMemo(() => { | |||
| return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk') | |||
| }, [isParentChildMode, t]) | |||
| const isFullDocMode = mode === 'hierarchical' && parentMode === 'full-doc' | |||
| const titleText = isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail') | |||
| const labelPrefix = mode === 'hierarchical' ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk') | |||
| const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL | |||
| return ( | |||
| <div className={'flex h-full flex-col'}> | |||
| <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> | |||
| <div className={cn('flex items-center justify-between', fullScreen ? 'border border-divider-subtle py-3 pl-6 pr-4' : 'pl-4 pr-3 pt-3')}> | |||
| <div className='flex flex-col'> | |||
| <div className='system-xl-semibold text-text-primary'>{titleText}</div> | |||
| <div className='flex items-center gap-x-2'> | |||
| @@ -134,12 +122,12 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({ | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div className={classNames( | |||
| <div className={cn( | |||
| 'flex grow', | |||
| fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4', | |||
| !isEditMode && 'pb-0 overflow-hidden', | |||
| fullScreen ? 'w-full flex-row justify-center gap-x-8 px-6 pt-6' : 'flex-col gap-y-1 px-4 py-3', | |||
| !isEditMode && 'overflow-hidden pb-0', | |||
| )}> | |||
| <div className={classNames(isEditMode ? 'break-all whitespace-pre-line overflow-hidden' : 'overflow-y-auto', fullScreen ? 'w-1/2' : 'grow')}> | |||
| <div className={cn(isEditMode ? 'overflow-hidden whitespace-pre-line break-all' : 'overflow-y-auto', fullScreen ? 'w-1/2' : 'grow')}> | |||
| <ChunkContent | |||
| docForm={docForm} | |||
| question={question} | |||
| @@ -149,7 +137,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({ | |||
| isEditMode={isEditMode} | |||
| /> | |||
| </div> | |||
| {mode === 'custom' && <Keywords | |||
| {isECOIndexing && <Keywords | |||
| className={fullScreen ? 'w-1/5' : ''} | |||
| actionType={isEditMode ? 'edit' : 'view'} | |||
| segInfo={segInfo} | |||
| @@ -1,4 +1,4 @@ | |||
| import { memo, useMemo, useRef, useState } from 'react' | |||
| import { memo, useCallback, useMemo, useRef, useState } from 'react' | |||
| import type { FC } from 'react' | |||
| import { useTranslation } from 'react-i18next' | |||
| import { useContext } from 'use-context-selector' | |||
| @@ -12,7 +12,6 @@ import Keywords from './completed/common/keywords' | |||
| import ChunkContent from './completed/common/chunk-content' | |||
| import AddAnother from './completed/common/add-another' | |||
| import Dot from './completed/common/dot' | |||
| import { useDocumentContext } from './index' | |||
| import { useStore as useAppStore } from '@/app/components/app/store' | |||
| import { ToastContext } from '@/app/components/base/toast' | |||
| import { ChunkingMode, type SegmentUpdater } from '@/models/datasets' | |||
| @@ -20,6 +19,8 @@ import classNames from '@/utils/classnames' | |||
| import { formatNumber } from '@/utils/format' | |||
| import Divider from '@/app/components/base/divider' | |||
| import { useAddSegment } from '@/service/knowledge/use-segment' | |||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||
| import { IndexingType } from '../../create/step-two' | |||
| type NewSegmentModalProps = { | |||
| onCancel: () => void | |||
| @@ -44,39 +45,37 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({ | |||
| const [addAnother, setAddAnother] = useState(true) | |||
| const fullScreen = useSegmentListContext(s => s.fullScreen) | |||
| const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) | |||
| const mode = useDocumentContext(s => s.mode) | |||
| const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) | |||
| const { appSidebarExpand } = useAppStore(useShallow(state => ({ | |||
| appSidebarExpand: state.appSidebarExpand, | |||
| }))) | |||
| const refreshTimer = useRef<any>(null) | |||
| const CustomButton = <> | |||
| <Divider type='vertical' className='mx-1 h-3 bg-divider-regular' /> | |||
| <button | |||
| type='button' | |||
| className='system-xs-semibold text-text-accent' | |||
| onClick={() => { | |||
| clearTimeout(refreshTimer.current) | |||
| viewNewlyAddedChunk() | |||
| }}> | |||
| {t('common.operation.view')} | |||
| </button> | |||
| </> | |||
| const CustomButton = useMemo(() => ( | |||
| <> | |||
| <Divider type='vertical' className='mx-1 h-3 bg-divider-regular' /> | |||
| <button | |||
| type='button' | |||
| className='system-xs-semibold text-text-accent' | |||
| onClick={() => { | |||
| clearTimeout(refreshTimer.current) | |||
| viewNewlyAddedChunk() | |||
| }}> | |||
| {t('common.operation.view')} | |||
| </button> | |||
| </> | |||
| ), [viewNewlyAddedChunk, t]) | |||
| const isQAModel = useMemo(() => { | |||
| return docForm === ChunkingMode.qa | |||
| }, [docForm]) | |||
| const handleCancel = (actionType: 'esc' | 'add' = 'esc') => { | |||
| const handleCancel = useCallback((actionType: 'esc' | 'add' = 'esc') => { | |||
| if (actionType === 'esc' || !addAnother) | |||
| onCancel() | |||
| } | |||
| }, [onCancel, addAnother]) | |||
| const { mutateAsync: addSegment } = useAddSegment() | |||
| const handleSave = async () => { | |||
| const handleSave = useCallback(async () => { | |||
| const params: SegmentUpdater = { content: '' } | |||
| if (isQAModel) { | |||
| if (docForm === ChunkingMode.qa) { | |||
| if (!question.trim()) { | |||
| return notify({ | |||
| type: 'error', | |||
| @@ -129,21 +128,27 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({ | |||
| setLoading(false) | |||
| }, | |||
| }) | |||
| } | |||
| }, [docForm, keywords, addSegment, datasetId, documentId, question, answer, notify, t, appSidebarExpand, CustomButton, handleCancel, onSave]) | |||
| const wordCountText = useMemo(() => { | |||
| const count = isQAModel ? (question.length + answer.length) : question.length | |||
| const count = docForm === ChunkingMode.qa ? (question.length + answer.length) : question.length | |||
| return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}` | |||
| // eslint-disable-next-line react-hooks/exhaustive-deps | |||
| }, [question.length, answer.length, isQAModel]) | |||
| }, [question.length, answer.length, docForm, t]) | |||
| const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL | |||
| return ( | |||
| <div className={'flex h-full flex-col'}> | |||
| <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> | |||
| <div | |||
| className={classNames( | |||
| 'flex items-center justify-between', | |||
| fullScreen ? 'border border-divider-subtle py-3 pl-6 pr-4' : 'pl-4 pr-3 pt-3', | |||
| )} | |||
| > | |||
| <div className='flex flex-col'> | |||
| <div className='system-xl-semibold text-text-primary'>{ | |||
| t('datasetDocuments.segment.addChunk') | |||
| }</div> | |||
| <div className='system-xl-semibold text-text-primary'> | |||
| {t('datasetDocuments.segment.addChunk')} | |||
| </div> | |||
| <div className='flex items-center gap-x-2'> | |||
| <SegmentIndexTag label={t('datasetDocuments.segment.newChunk')!} /> | |||
| <Dot /> | |||
| @@ -171,8 +176,8 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({ | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4')}> | |||
| <div className={classNames('break-all overflow-hidden whitespace-pre-line', fullScreen ? 'w-1/2' : 'grow')}> | |||
| <div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center gap-x-8 px-6 pt-6' : 'flex-col gap-y-1 px-4 py-3')}> | |||
| <div className={classNames('overflow-hidden whitespace-pre-line break-all', fullScreen ? 'w-1/2' : 'grow')}> | |||
| <ChunkContent | |||
| docForm={docForm} | |||
| question={question} | |||
| @@ -182,7 +187,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({ | |||
| isEditMode={true} | |||
| /> | |||
| </div> | |||
| {mode === 'custom' && <Keywords | |||
| {isECOIndexing && <Keywords | |||
| className={fullScreen ? 'w-1/5' : ''} | |||
| actionType='add' | |||
| keywords={keywords} | |||
| @@ -213,7 +213,7 @@ export default combine( | |||
| settings: { | |||
| tailwindcss: { | |||
| // These are the default values but feel free to customize | |||
| callees: ['classnames', 'clsx', 'ctl', 'cn'], | |||
| callees: ['classnames', 'clsx', 'ctl', 'cn', 'classNames'], | |||
| config: 'tailwind.config.js', // returned from `loadConfig()` utility if not provided | |||
| cssFiles: [ | |||
| '**/*.css', | |||