| import logging | |||||
| import time | import time | ||||
| from collections.abc import Generator, Mapping, Sequence | from collections.abc import Generator, Mapping, Sequence | ||||
| from typing import TYPE_CHECKING, Any, Optional, Union | from typing import TYPE_CHECKING, Any, Optional, Union | ||||
| if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
| from core.file.models import File | from core.file.models import File | ||||
| _logger = logging.getLogger(__name__) | |||||
| class AppRunner: | class AppRunner: | ||||
| def get_pre_calculate_rest_tokens( | def get_pre_calculate_rest_tokens( | ||||
| ) | ) | ||||
| def _handle_invoke_result_stream( | def _handle_invoke_result_stream( | ||||
| self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool | |||||
| self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool | |||||
| ) -> None: | ) -> None: | ||||
| """ | """ | ||||
| Handle invoke result | Handle invoke result | ||||
| else: | else: | ||||
| queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER) | queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER) | ||||
| text += result.delta.message.content | |||||
| message = result.delta.message | |||||
| if isinstance(message.content, str): | |||||
| text += message.content | |||||
| elif isinstance(message.content, list): | |||||
| for content in message.content: | |||||
| if not isinstance(content, str): | |||||
| # TODO(QuantumGhost): Add multimodal output support for easy ui. | |||||
| _logger.warning("received multimodal output, type=%s", type(content)) | |||||
| text += content.data | |||||
| else: | |||||
| text += content # failback to str | |||||
| if not model: | if not model: | ||||
| model = result.model | model = result.model | ||||
| if not prompt_messages: | if not prompt_messages: | ||||
| prompt_messages = result.prompt_messages | |||||
| prompt_messages = list(result.prompt_messages) | |||||
| if result.delta.usage: | if result.delta.usage: | ||||
| usage = result.delta.usage | usage = result.delta.usage | ||||
| if not usage: | |||||
| if usage is None: | |||||
| usage = LLMUsage.empty_usage() | usage = LLMUsage.empty_usage() | ||||
| llm_result = LLMResult( | llm_result = LLMResult( | 
| from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage | from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage | ||||
| from core.model_runtime.entities.message_entities import ( | from core.model_runtime.entities.message_entities import ( | ||||
| AssistantPromptMessage, | AssistantPromptMessage, | ||||
| TextPromptMessageContent, | |||||
| ) | ) | ||||
| from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel | from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel | ||||
| from core.ops.entities.trace_entity import TraceTaskName | from core.ops.entities.trace_entity import TraceTaskName | ||||
| delta_text = chunk.delta.message.content | delta_text = chunk.delta.message.content | ||||
| if delta_text is None: | if delta_text is None: | ||||
| continue | continue | ||||
| if isinstance(chunk.delta.message.content, list): | |||||
| delta_text = "" | |||||
| for content in chunk.delta.message.content: | |||||
| logger.debug( | |||||
| "The content type %s in LLM chunk delta message content.: %r", type(content), content | |||||
| ) | |||||
| if isinstance(content, TextPromptMessageContent): | |||||
| delta_text += content.data | |||||
| elif isinstance(content, str): | |||||
| delta_text += content # failback to str | |||||
| else: | |||||
| logger.warning( | |||||
| "Unsupported content type %s in LLM chunk delta message content.: %r", | |||||
| type(content), | |||||
| content, | |||||
| ) | |||||
| continue | |||||
| if not self._task_state.llm_result.prompt_messages: | if not self._task_state.llm_result.prompt_messages: | ||||
| self._task_state.llm_result.prompt_messages = chunk.prompt_messages | self._task_state.llm_result.prompt_messages = chunk.prompt_messages | 
| # Set appropriate response format based on model capabilities | # Set appropriate response format based on model capabilities | ||||
| self._set_response_format(completion_params, model_schema.parameter_rules) | self._set_response_format(completion_params, model_schema.parameter_rules) | ||||
| model_config_with_cred.parameters = completion_params | model_config_with_cred.parameters = completion_params | ||||
| # NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`. | |||||
| node_data_model.completion_params = completion_params | |||||
| return model, model_config_with_cred | return model, model_config_with_cred | ||||
| def _fetch_prompt_messages( | def _fetch_prompt_messages( | 
| size={size} | size={size} | ||||
| className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')} | className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')} | ||||
| onClick={() => { | onClick={() => { | ||||
| if (isValidUrl(link)) { | |||||
| if (link && isValidUrl(link)) { | |||||
| window.open(link, '_blank') | window.open(link, '_blank') | ||||
| return | return | ||||
| } | } | 
| const { t } = useTranslation() | const { t } = useTranslation() | ||||
| const plan = useProviderContextSelector(state => state.plan) | const plan = useProviderContextSelector(state => state.plan) | ||||
| const enableBilling = useProviderContextSelector(state => state.enableBilling) | const enableBilling = useProviderContextSelector(state => state.enableBilling) | ||||
| const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id) | |||||
| const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) | const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) | ||||
| const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) | |||||
| const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method) | |||||
| const [datasource, setDatasource] = useState<Datasource>() | const [datasource, setDatasource] = useState<Datasource>() | ||||
| const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined) | const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined) | ||||
| const [batchId, setBatchId] = useState('') | const [batchId, setBatchId] = useState('') | ||||
| { | { | ||||
| currentStep === 3 && ( | currentStep === 3 && ( | ||||
| <Processing | <Processing | ||||
| datasetId={datasetId!} | |||||
| batchId={batchId} | batchId={batchId} | ||||
| documents={documents} | documents={documents} | ||||
| indexingType={indexingType!} | |||||
| retrievalMethod={retrievalMethod!} | |||||
| /> | /> | ||||
| ) | ) | ||||
| } | } | ||||
| currentStep === 2 && ( | currentStep === 2 && ( | ||||
| <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'> | <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'> | ||||
| <ChunkPreview | <ChunkPreview | ||||
| datasource={datasource!} | |||||
| dataSourceType={datasource!.type} | |||||
| files={fileList.map(file => file.file)} | files={fileList.map(file => file.file)} | ||||
| onlineDocuments={onlineDocuments} | onlineDocuments={onlineDocuments} | ||||
| websitePages={websitePages} | websitePages={websitePages} | ||||
| isIdle={isIdle} | isIdle={isIdle} | ||||
| isPending={isPending} | |||||
| isPending={isPending && isPreview.current} | |||||
| estimateData={estimateData} | estimateData={estimateData} | ||||
| onPreview={onClickPreview} | onPreview={onClickPreview} | ||||
| handlePreviewFileChange={handlePreviewFileChange} | handlePreviewFileChange={handlePreviewFileChange} | 
| import { useTranslation } from 'react-i18next' | import { useTranslation } from 'react-i18next' | ||||
| import { PreviewContainer } from '../../../preview/container' | import { PreviewContainer } from '../../../preview/container' | ||||
| import { PreviewHeader } from '../../../preview/header' | import { PreviewHeader } from '../../../preview/header' | ||||
| import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types' | |||||
| import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets' | import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets' | ||||
| import { ChunkingMode } from '@/models/datasets' | import { ChunkingMode } from '@/models/datasets' | ||||
| import type { NotionPage } from '@/models/common' | import type { NotionPage } from '@/models/common' | ||||
| import { DatasourceType } from '@/models/pipeline' | import { DatasourceType } from '@/models/pipeline' | ||||
| type ChunkPreviewProps = { | type ChunkPreviewProps = { | ||||
| datasource: Datasource | |||||
| dataSourceType: DatasourceType | |||||
| files: CustomFile[] | files: CustomFile[] | ||||
| onlineDocuments: NotionPage[] | onlineDocuments: NotionPage[] | ||||
| websitePages: CrawlResultItem[] | websitePages: CrawlResultItem[] | ||||
| } | } | ||||
| const ChunkPreview = ({ | const ChunkPreview = ({ | ||||
| datasource, | |||||
| dataSourceType, | |||||
| files, | files, | ||||
| onlineDocuments, | onlineDocuments, | ||||
| websitePages, | websitePages, | ||||
| const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0]) | const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0]) | ||||
| const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0]) | const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0]) | ||||
| const dataSourceType = datasource?.type | |||||
| return ( | return ( | ||||
| <PreviewContainer | <PreviewContainer | ||||
| header={<PreviewHeader | header={<PreviewHeader | ||||
| <p className='text-sm text-text-tertiary'> | <p className='text-sm text-text-tertiary'> | ||||
| {t('datasetCreation.stepTwo.previewChunkTip')} | {t('datasetCreation.stepTwo.previewChunkTip')} | ||||
| </p> | </p> | ||||
| <Button | |||||
| onClick={onPreview} | |||||
| > | |||||
| <Button onClick={onPreview}> | |||||
| {t('datasetPipeline.addDocuments.stepTwo.previewChunks')} | {t('datasetPipeline.addDocuments.stepTwo.previewChunks')} | ||||
| </Button> | </Button> | ||||
| </div> | </div> | 
| import { RiBookOpenLine } from '@remixicon/react' | import { RiBookOpenLine } from '@remixicon/react' | ||||
| import { useGetDocLanguage } from '@/context/i18n' | import { useGetDocLanguage } from '@/context/i18n' | ||||
| import EmbeddingProcess from './embedding-process' | import EmbeddingProcess from './embedding-process' | ||||
| import type { IndexingType } from '../../../create/step-two' | |||||
| import type { RETRIEVE_METHOD } from '@/types/app' | |||||
| import type { InitialDocumentDetail } from '@/models/pipeline' | import type { InitialDocumentDetail } from '@/models/pipeline' | ||||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||||
| type ProcessingProps = { | type ProcessingProps = { | ||||
| datasetId: string | |||||
| indexingType: IndexingType | |||||
| retrievalMethod: RETRIEVE_METHOD | |||||
| batchId: string | batchId: string | ||||
| documents: InitialDocumentDetail[] | documents: InitialDocumentDetail[] | ||||
| } | } | ||||
| const Processing = ({ | const Processing = ({ | ||||
| datasetId, | |||||
| batchId, | batchId, | ||||
| documents, | documents, | ||||
| indexingType, | |||||
| retrievalMethod, | |||||
| }: ProcessingProps) => { | }: ProcessingProps) => { | ||||
| const { t } = useTranslation() | const { t } = useTranslation() | ||||
| const docLanguage = useGetDocLanguage() | const docLanguage = useGetDocLanguage() | ||||
| const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id) | |||||
| const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) | |||||
| const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method) | |||||
| return ( | return ( | ||||
| <div className='flex h-full w-full justify-center overflow-hidden'> | <div className='flex h-full w-full justify-center overflow-hidden'> | ||||
| <div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'> | <div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'> | ||||
| <div className='max-w-[640px]'> | <div className='max-w-[640px]'> | ||||
| <EmbeddingProcess | <EmbeddingProcess | ||||
| datasetId={datasetId} | |||||
| datasetId={datasetId!} | |||||
| batchId={batchId} | batchId={batchId} | ||||
| documents={documents} | documents={documents} | ||||
| indexingType={indexingType} | indexingType={indexingType} | 
| import { formatNumber } from '@/utils/format' | import { formatNumber } from '@/utils/format' | ||||
| import classNames from '@/utils/classnames' | import classNames from '@/utils/classnames' | ||||
| import Divider from '@/app/components/base/divider' | import Divider from '@/app/components/base/divider' | ||||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||||
| import { IndexingType } from '../../../create/step-two' | |||||
| type ISegmentDetailProps = { | type ISegmentDetailProps = { | ||||
| segInfo?: Partial<SegmentDetailModel> & { id: string } | segInfo?: Partial<SegmentDetailModel> & { id: string } | ||||
| const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) | const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) | ||||
| const mode = useDocumentContext(s => s.mode) | const mode = useDocumentContext(s => s.mode) | ||||
| const parentMode = useDocumentContext(s => s.parentMode) | const parentMode = useDocumentContext(s => s.parentMode) | ||||
| const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) | |||||
| eventEmitter?.useSubscription((v) => { | eventEmitter?.useSubscription((v) => { | ||||
| if (v === 'update-segment') | if (v === 'update-segment') | ||||
| return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk') | return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk') | ||||
| }, [isParentChildMode, t]) | }, [isParentChildMode, t]) | ||||
| const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL | |||||
| return ( | return ( | ||||
| <div className={'flex h-full flex-col'}> | <div className={'flex h-full flex-col'}> | ||||
| <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> | <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> | ||||
| isEditMode={isEditMode} | isEditMode={isEditMode} | ||||
| /> | /> | ||||
| </div> | </div> | ||||
| {mode === 'custom' && <Keywords | |||||
| {isECOIndexing && <Keywords | |||||
| className={fullScreen ? 'w-1/5' : ''} | className={fullScreen ? 'w-1/5' : ''} | ||||
| actionType={isEditMode ? 'edit' : 'view'} | actionType={isEditMode ? 'edit' : 'view'} | ||||
| segInfo={segInfo} | segInfo={segInfo} | 
| import ChunkContent from './completed/common/chunk-content' | import ChunkContent from './completed/common/chunk-content' | ||||
| import AddAnother from './completed/common/add-another' | import AddAnother from './completed/common/add-another' | ||||
| import Dot from './completed/common/dot' | import Dot from './completed/common/dot' | ||||
| import { useDocumentContext } from './index' | |||||
| import { useStore as useAppStore } from '@/app/components/app/store' | import { useStore as useAppStore } from '@/app/components/app/store' | ||||
| import { ToastContext } from '@/app/components/base/toast' | import { ToastContext } from '@/app/components/base/toast' | ||||
| import { ChunkingMode, type SegmentUpdater } from '@/models/datasets' | import { ChunkingMode, type SegmentUpdater } from '@/models/datasets' | ||||
| import { formatNumber } from '@/utils/format' | import { formatNumber } from '@/utils/format' | ||||
| import Divider from '@/app/components/base/divider' | import Divider from '@/app/components/base/divider' | ||||
| import { useAddSegment } from '@/service/knowledge/use-segment' | import { useAddSegment } from '@/service/knowledge/use-segment' | ||||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||||
| import { IndexingType } from '../../create/step-two' | |||||
| type NewSegmentModalProps = { | type NewSegmentModalProps = { | ||||
| onCancel: () => void | onCancel: () => void | ||||
| const [addAnother, setAddAnother] = useState(true) | const [addAnother, setAddAnother] = useState(true) | ||||
| const fullScreen = useSegmentListContext(s => s.fullScreen) | const fullScreen = useSegmentListContext(s => s.fullScreen) | ||||
| const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) | const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) | ||||
| const mode = useDocumentContext(s => s.mode) | |||||
| const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) | |||||
| const { appSidebarExpand } = useAppStore(useShallow(state => ({ | const { appSidebarExpand } = useAppStore(useShallow(state => ({ | ||||
| appSidebarExpand: state.appSidebarExpand, | appSidebarExpand: state.appSidebarExpand, | ||||
| }))) | }))) | ||||
| // eslint-disable-next-line react-hooks/exhaustive-deps | // eslint-disable-next-line react-hooks/exhaustive-deps | ||||
| }, [question.length, answer.length, isQAModel]) | }, [question.length, answer.length, isQAModel]) | ||||
| const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL | |||||
| return ( | return ( | ||||
| <div className={'flex h-full flex-col'}> | <div className={'flex h-full flex-col'}> | ||||
| <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> | <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> | ||||
| isEditMode={true} | isEditMode={true} | ||||
| /> | /> | ||||
| </div> | </div> | ||||
| {mode === 'custom' && <Keywords | |||||
| {isECOIndexing && <Keywords | |||||
| className={fullScreen ? 'w-1/5' : ''} | className={fullScreen ? 'w-1/5' : ''} | ||||
| actionType='add' | actionType='add' | ||||
| keywords={keywords} | keywords={keywords} | 
| import React, { useMemo } from 'react' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| import { useBoolean } from 'ahooks' | |||||
| import { useContext } from 'use-context-selector' | |||||
| import { useRouter } from 'next/navigation' | |||||
| import DatasetDetailContext from '@/context/dataset-detail' | |||||
| import type { CrawlOptions, CustomFile, DataSourceType } from '@/models/datasets' | |||||
| import Loading from '@/app/components/base/loading' | |||||
| import StepTwo from '@/app/components/datasets/create/step-two' | |||||
| import AccountSetting from '@/app/components/header/account-setting' | |||||
| import AppUnavailable from '@/app/components/base/app-unavailable' | |||||
| import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' | |||||
| import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' | |||||
| import type { NotionPage } from '@/models/common' | |||||
| import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document' | |||||
| type DocumentSettingsProps = { | |||||
| datasetId: string | |||||
| documentId: string | |||||
| } | |||||
| const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { | |||||
| const { t } = useTranslation() | |||||
| const router = useRouter() | |||||
| const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean() | |||||
| const { indexingTechnique, dataset } = useContext(DatasetDetailContext) | |||||
| const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding) | |||||
| const invalidDocumentDetail = useInvalidDocumentDetailKey() | |||||
| const saveHandler = () => { | |||||
| invalidDocumentDetail() | |||||
| router.push(`/datasets/${datasetId}/documents/${documentId}`) | |||||
| } | |||||
| const cancelHandler = () => router.back() | |||||
| const { data: documentDetail, error } = useDocumentDetail({ | |||||
| datasetId, | |||||
| documentId, | |||||
| params: { metadata: 'without' }, | |||||
| }) | |||||
| const currentPage = useMemo(() => { | |||||
| return { | |||||
| workspace_id: documentDetail?.data_source_info.notion_workspace_id, | |||||
| page_id: documentDetail?.data_source_info.notion_page_id, | |||||
| page_name: documentDetail?.name, | |||||
| page_icon: documentDetail?.data_source_info.notion_page_icon, | |||||
| type: documentDetail?.data_source_type, | |||||
| } | |||||
| }, [documentDetail]) | |||||
| if (error) | |||||
| return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} /> | |||||
| return ( | |||||
| <div className='flex' style={{ height: 'calc(100vh - 56px)' }}> | |||||
| <div className='grow'> | |||||
| {!documentDetail && <Loading type='app' />} | |||||
| {dataset && documentDetail && ( | |||||
| <StepTwo | |||||
| isAPIKeySet={!!embeddingsDefaultModel} | |||||
| onSetting={showSetAPIKey} | |||||
| datasetId={datasetId} | |||||
| dataSourceType={documentDetail.data_source_type as DataSourceType} | |||||
| notionPages={[currentPage as unknown as NotionPage]} | |||||
| websitePages={[ | |||||
| { | |||||
| title: documentDetail.name, | |||||
| source_url: documentDetail.data_source_info?.url, | |||||
| markdown: '', | |||||
| description: '', | |||||
| }, | |||||
| ]} | |||||
| websiteCrawlProvider={documentDetail.data_source_info?.provider} | |||||
| websiteCrawlJobId={documentDetail.data_source_info?.job_id} | |||||
| crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions} | |||||
| indexingType={indexingTechnique} | |||||
| isSetting | |||||
| documentDetail={documentDetail} | |||||
| files={[documentDetail.data_source_info.upload_file as CustomFile]} | |||||
| onSave={saveHandler} | |||||
| onCancel={cancelHandler} | |||||
| /> | |||||
| )} | |||||
| </div> | |||||
| {isShowSetAPIKey && <AccountSetting activeTab='provider' onCancel={async () => { | |||||
| hideSetAPIkey() | |||||
| }} />} | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default DocumentSettings | 
| 'use client' | 'use client' | ||||
| import React, { useMemo } from 'react' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| import { useBoolean } from 'ahooks' | |||||
| import { useContext } from 'use-context-selector' | |||||
| import { useRouter } from 'next/navigation' | |||||
| import DatasetDetailContext from '@/context/dataset-detail' | |||||
| import type { CrawlOptions, CustomFile } from '@/models/datasets' | |||||
| import React from 'react' | |||||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||||
| import DocumentSettings from './document-settings' | |||||
| import PipelineSettings from './pipeline-settings' | |||||
| import Loading from '@/app/components/base/loading' | |||||
| import StepTwo from '@/app/components/datasets/create/step-two' | |||||
| import AccountSetting from '@/app/components/header/account-setting' | |||||
| import AppUnavailable from '@/app/components/base/app-unavailable' | |||||
| import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' | |||||
| import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' | |||||
| import type { NotionPage } from '@/models/common' | |||||
| import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document' | |||||
| type DocumentSettingsProps = { | |||||
| type SettingsProps = { | |||||
| datasetId: string | datasetId: string | ||||
| documentId: string | documentId: string | ||||
| } | } | ||||
| const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { | |||||
| const { t } = useTranslation() | |||||
| const router = useRouter() | |||||
| const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean() | |||||
| const { indexingTechnique, dataset } = useContext(DatasetDetailContext) | |||||
| const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding) | |||||
| const invalidDocumentDetail = useInvalidDocumentDetailKey() | |||||
| const saveHandler = () => { | |||||
| invalidDocumentDetail() | |||||
| router.push(`/datasets/${datasetId}/documents/${documentId}`) | |||||
| const Settings = ({ | |||||
| datasetId, | |||||
| documentId, | |||||
| }: SettingsProps) => { | |||||
| const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) | |||||
| if (!pipelineId) { | |||||
| return ( | |||||
| <DocumentSettings | |||||
| datasetId={datasetId} | |||||
| documentId={documentId} | |||||
| /> | |||||
| ) | |||||
| } | } | ||||
| const cancelHandler = () => router.back() | |||||
| const { data: documentDetail, error } = useDocumentDetail({ | |||||
| datasetId, | |||||
| documentId, | |||||
| params: { metadata: 'without' }, | |||||
| }) | |||||
| const currentPage = useMemo(() => { | |||||
| return { | |||||
| workspace_id: documentDetail?.data_source_info.notion_workspace_id, | |||||
| page_id: documentDetail?.data_source_info.notion_page_id, | |||||
| page_name: documentDetail?.name, | |||||
| page_icon: documentDetail?.data_source_info.notion_page_icon, | |||||
| type: documentDetail?.data_source_type, | |||||
| } | |||||
| }, [documentDetail]) | |||||
| if (error) | |||||
| return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} /> | |||||
| return ( | return ( | ||||
| <div className='flex' style={{ height: 'calc(100vh - 56px)' }}> | |||||
| <div className="grow"> | |||||
| {!documentDetail && <Loading type='app' />} | |||||
| {dataset && documentDetail && ( | |||||
| <StepTwo | |||||
| isAPIKeySet={!!embeddingsDefaultModel} | |||||
| onSetting={showSetAPIKey} | |||||
| datasetId={datasetId} | |||||
| dataSourceType={documentDetail.data_source_type} | |||||
| notionPages={[currentPage as unknown as NotionPage]} | |||||
| websitePages={[ | |||||
| { | |||||
| title: documentDetail.name, | |||||
| source_url: documentDetail.data_source_info?.url, | |||||
| markdown: '', | |||||
| description: '', | |||||
| }, | |||||
| ]} | |||||
| websiteCrawlProvider={documentDetail.data_source_info?.provider} | |||||
| websiteCrawlJobId={documentDetail.data_source_info?.job_id} | |||||
| crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions} | |||||
| indexingType={indexingTechnique} | |||||
| isSetting | |||||
| documentDetail={documentDetail} | |||||
| files={[documentDetail.data_source_info.upload_file as CustomFile]} | |||||
| onSave={saveHandler} | |||||
| onCancel={cancelHandler} | |||||
| /> | |||||
| )} | |||||
| </div> | |||||
| {isShowSetAPIKey && <AccountSetting activeTab="provider" onCancel={async () => { | |||||
| hideSetAPIkey() | |||||
| }} />} | |||||
| </div> | |||||
| <PipelineSettings | |||||
| datasetId={datasetId} | |||||
| documentId={documentId} | |||||
| /> | |||||
| ) | ) | ||||
| } | } | ||||
| export default DocumentSettings | |||||
| export default Settings | 
| import { useCallback, useRef, useState } from 'react' | |||||
| import type { CrawlResultItem, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets' | |||||
| import type { NotionPage } from '@/models/common' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | |||||
| import { useDocumentDetail } from '@/service/knowledge/use-document' | |||||
| import AppUnavailable from '@/app/components/base/app-unavailable' | |||||
| import ChunkPreview from '../../../create-from-pipeline/preview/chunk-preview' | |||||
| import Loading from '@/app/components/base/loading' | |||||
| import type { DatasourceType } from '@/models/pipeline' | |||||
| import ProcessDocuments from './process-documents' | |||||
| import LeftHeader from './left-header' | |||||
| type PipelineSettingsProps = { | |||||
| datasetId: string | |||||
| documentId: string | |||||
| } | |||||
| const PipelineSettings = ({ | |||||
| datasetId, | |||||
| documentId, | |||||
| }: PipelineSettingsProps) => { | |||||
| const { t } = useTranslation() | |||||
| const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) | |||||
| const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined) | |||||
| const isPreview = useRef(false) | |||||
| const formRef = useRef<any>(null) | |||||
| const { data: documentDetail, error, isFetching: isFetchingDocumentDetail } = useDocumentDetail({ | |||||
| datasetId, | |||||
| documentId, | |||||
| params: { metadata: 'without' }, | |||||
| }) | |||||
| const handlePreviewChunks = useCallback(async (data: Record<string, any>) => { | |||||
| // todo: Preview | |||||
| }, []) | |||||
| const handleProcess = useCallback(async (data: Record<string, any>) => { | |||||
| // todo: Process | |||||
| }, []) | |||||
| const onClickProcess = useCallback(() => { | |||||
| isPreview.current = false | |||||
| formRef.current?.submit() | |||||
| }, []) | |||||
| const onClickPreview = useCallback(() => { | |||||
| isPreview.current = true | |||||
| formRef.current?.submit() | |||||
| }, []) | |||||
| const handleSubmit = useCallback((data: Record<string, any>) => { | |||||
| isPreview.current ? handlePreviewChunks(data) : handleProcess(data) | |||||
| }, [handlePreviewChunks, handleProcess]) | |||||
| const handlePreviewFileChange = useCallback((file: DocumentItem) => { | |||||
| onClickPreview() | |||||
| }, [onClickPreview]) | |||||
| const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => { | |||||
| onClickPreview() | |||||
| }, [onClickPreview]) | |||||
| const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => { | |||||
| onClickPreview() | |||||
| }, [onClickPreview]) | |||||
| if (isFetchingDocumentDetail) { | |||||
| return ( | |||||
| <Loading type='app' /> | |||||
| ) | |||||
| } | |||||
| if (error) | |||||
| return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} /> | |||||
| return ( | |||||
| <div | |||||
| className='relative flex h-[calc(100vh-56px)] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle' | |||||
| > | |||||
| <div className='flex h-full flex-1 flex-col px-14'> | |||||
| <LeftHeader title={t('datasetPipeline.documentSettings.title')} /> | |||||
| <div className='grow overflow-y-auto'> | |||||
| <ProcessDocuments | |||||
| ref={formRef} | |||||
| documentId={documentId} | |||||
| onProcess={onClickProcess} | |||||
| onPreview={onClickPreview} | |||||
| onSubmit={handleSubmit} | |||||
| /> | |||||
| </div> | |||||
| </div> | |||||
| {/* Preview */} | |||||
| <div className='flex h-full flex-1 shrink-0 pl-2 pt-2'> | |||||
| <ChunkPreview | |||||
| dataSourceType={documentDetail!.data_source_type as DatasourceType} | |||||
| // @ts-expect-error mock data // todo: remove mock data | |||||
| files={[{ | |||||
| id: '12345678', | |||||
| name: 'test-file', | |||||
| extension: 'txt', | |||||
| }]} | |||||
| onlineDocuments={[]} | |||||
| websitePages={[]} | |||||
| isIdle={true} | |||||
| isPending={true} | |||||
| estimateData={estimateData} | |||||
| onPreview={onClickPreview} | |||||
| handlePreviewFileChange={handlePreviewFileChange} | |||||
| handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange} | |||||
| handlePreviewWebsitePageChange={handlePreviewWebsiteChange} | |||||
| /> | |||||
| </div> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default PipelineSettings | 
| import React, { useCallback } from 'react' | |||||
| import { RiArrowLeftLine } from '@remixicon/react' | |||||
| import Button from '@/app/components/base/button' | |||||
| import { useRouter } from 'next/navigation' | |||||
| import Effect from '@/app/components/base/effect' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| type LeftHeaderProps = { | |||||
| title: string | |||||
| } | |||||
| const LeftHeader = ({ | |||||
| title, | |||||
| }: LeftHeaderProps) => { | |||||
| const { t } = useTranslation() | |||||
| const { back } = useRouter() | |||||
| const navigateBack = useCallback(() => { | |||||
| back() | |||||
| }, [back]) | |||||
| return ( | |||||
| <div className='relative flex flex-col gap-y-0.5 pb-2 pt-4'> | |||||
| <div className='system-2xs-semibold-uppercase bg-pipeline-add-documents-title-bg bg-clip-text text-transparent'> | |||||
| {title} | |||||
| </div> | |||||
| <div className='system-md-semibold text-text-primary'> | |||||
| {t('datasetPipeline.addDocuments.steps.processDocuments')} | |||||
| </div> | |||||
| <Button | |||||
| variant='secondary-accent' | |||||
| className='absolute -left-11 top-3.5 size-9 rounded-full p-0' | |||||
| onClick={navigateBack} | |||||
| > | |||||
| <RiArrowLeftLine className='size-5 ' /> | |||||
| </Button> | |||||
| <Effect className='left-8 top-[-34px] opacity-20' /> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default React.memo(LeftHeader) | 
| import React from 'react' | |||||
| import Button from '@/app/components/base/button' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| type ActionsProps = { | |||||
| onProcess: () => void | |||||
| } | |||||
| const Actions = ({ | |||||
| onProcess, | |||||
| }: ActionsProps) => { | |||||
| const { t } = useTranslation() | |||||
| return ( | |||||
| <div className='flex items-center justify-end'> | |||||
| <Button | |||||
| variant='primary' | |||||
| onClick={onProcess} | |||||
| > | |||||
| {t('datasetPipeline.operations.saveAndProcess')} | |||||
| </Button> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default React.memo(Actions) | 
| import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types' | |||||
| export const useConfigurations = (documentdId: string) => { | |||||
| const initialData: Record<string, any> = {} | |||||
| const configurations: BaseConfiguration[] = [] | |||||
| return { | |||||
| initialData, | |||||
| configurations, | |||||
| } | |||||
| } | 
| import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils' | |||||
| import { useConfigurations } from './hooks' | |||||
| import Actions from './actions' | |||||
| import Form from '../../../../create-from-pipeline/process-documents/form' | |||||
| type ProcessDocumentsProps = { | |||||
| documentId: string | |||||
| ref: React.RefObject<any> | |||||
| onProcess: () => void | |||||
| onPreview: () => void | |||||
| onSubmit: (data: Record<string, any>) => void | |||||
| } | |||||
| const ProcessDocuments = ({ | |||||
| documentId, | |||||
| onProcess, | |||||
| onPreview, | |||||
| onSubmit, | |||||
| ref, | |||||
| }: ProcessDocumentsProps) => { | |||||
| const { initialData, configurations } = useConfigurations(documentId) | |||||
| const schema = generateZodSchema(configurations) | |||||
| return ( | |||||
| <div className='flex flex-col gap-y-4 pt-4'> | |||||
| <Form | |||||
| ref={ref} | |||||
| initialData={initialData} | |||||
| configurations={configurations} | |||||
| schema={schema} | |||||
| onSubmit={onSubmit} | |||||
| onPreview={onPreview} | |||||
| /> | |||||
| <Actions onProcess={onProcess} /> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default ProcessDocuments | 
| export const categoryKeys = [ | export const categoryKeys = [ | ||||
| 'model', | 'model', | ||||
| 'tool', | 'tool', | ||||
| 'datasource', | |||||
| 'agent-strategy', | 'agent-strategy', | ||||
| 'extension', | 'extension', | ||||
| 'bundle', | 'bundle', | ||||
| 'datasource', | |||||
| ] | ] | 
| <span className='relative z-[2] lowercase'>{t('category.tools')}</span> | <span className='relative z-[2] lowercase'>{t('category.tools')}</span> | ||||
| </span> | </span> | ||||
| , | , | ||||
| <span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> | |||||
| <span className='relative z-[2] lowercase'>{t('category.datasources')}</span> | |||||
| </span> | |||||
| , | |||||
| <span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> | <span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> | ||||
| <span className='relative z-[2] lowercase'>{t('category.agents')}</span> | <span className='relative z-[2] lowercase'>{t('category.agents')}</span> | ||||
| </span> | </span> | 
| tool: PluginType.tool, | tool: PluginType.tool, | ||||
| agent: PluginType.agent, | agent: PluginType.agent, | ||||
| extension: PluginType.extension, | extension: PluginType.extension, | ||||
| datasource: PluginType.datasource, | |||||
| bundle: 'bundle', | bundle: 'bundle', | ||||
| } | } | ||||
| type PluginTypeSwitchProps = { | type PluginTypeSwitchProps = { | ||||
| text: t('plugin.category.tools'), | text: t('plugin.category.tools'), | ||||
| icon: <RiHammerLine className='mr-1.5 h-4 w-4' />, | icon: <RiHammerLine className='mr-1.5 h-4 w-4' />, | ||||
| }, | }, | ||||
| { | |||||
| value: PLUGIN_TYPE_SEARCH_MAP.datasource, | |||||
| text: t('plugin.category.datasources'), | |||||
| icon: <RiHammerLine className='mr-1.5 h-4 w-4' />, | |||||
| }, | |||||
| { | { | ||||
| value: PLUGIN_TYPE_SEARCH_MAP.agent, | value: PLUGIN_TYPE_SEARCH_MAP.agent, | ||||
| text: t('plugin.category.agents'), | text: t('plugin.category.agents'), | 
| model = 'model', | model = 'model', | ||||
| extension = 'extension', | extension = 'extension', | ||||
| agent = 'agent-strategy', | agent = 'agent-strategy', | ||||
| datasource = 'datasource', | |||||
| } | } | ||||
| export enum PluginSource { | export enum PluginSource { | 
| {t('plugin.category.tools')} | {t('plugin.category.tools')} | ||||
| </span> | </span> | ||||
| , | , | ||||
| <span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> | |||||
| {t('plugin.category.datasources')} | |||||
| </span> | |||||
| , | |||||
| <span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> | <span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> | ||||
| {t('plugin.category.agents')} | {t('plugin.category.agents')} | ||||
| </span> | </span> | 
| }, | }, | ||||
| characters: 'characters', | characters: 'characters', | ||||
| }, | }, | ||||
| documentSettings: { | |||||
| title: 'Document Settings', | |||||
| }, | |||||
| } | } | ||||
| export default translation | export default translation | 
| }, | }, | ||||
| characters: '字符', | characters: '字符', | ||||
| }, | }, | ||||
| documentSettings: { | |||||
| title: '文档设置', | |||||
| }, | |||||
| } | } | ||||
| export default translation | export default translation | 
| import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types' | import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types' | ||||
| import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card' | import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card' | ||||
| import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge' | import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge' | ||||
| import type { DatasourceType } from './pipeline' | |||||
| export enum DataSourceType { | export enum DataSourceType { | ||||
| FILE = 'upload_file', | FILE = 'upload_file', | ||||
| batch: string | batch: string | ||||
| position: number | position: number | ||||
| dataset_id: string | dataset_id: string | ||||
| data_source_type: DataSourceType | |||||
| data_source_type: DataSourceType | DatasourceType | |||||
| data_source_info: DataSourceInfo | data_source_info: DataSourceInfo | ||||
| dataset_process_rule_id: string | dataset_process_rule_id: string | ||||
| name: string | name: string |