Преглед изворни кода

Merge branch 'feat/rag-pipeline' into deploy/rag-dev

tags/2.0.0-beta.1
twwu пре 4 месеци
родитељ
комит
d3dbfbe8b3
24 измењених фајлова са 439 додато и 120 уклоњено
  1. 17
    4
      api/core/app/apps/base_app_runner.py
  2. 18
    0
      api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
  3. 2
    0
      api/core/workflow/nodes/llm/node.py
  4. 1
    1
      web/app/components/base/markdown-blocks/button.tsx
  5. 2
    8
      web/app/components/datasets/documents/create-from-pipeline/index.tsx
  6. 3
    8
      web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx
  7. 5
    9
      web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx
  8. 6
    1
      web/app/components/datasets/documents/detail/completed/segment-detail.tsx
  9. 6
    3
      web/app/components/datasets/documents/detail/new-segment.tsx
  10. 94
    0
      web/app/components/datasets/documents/detail/settings/document-settings.tsx
  11. 23
    84
      web/app/components/datasets/documents/detail/settings/index.tsx
  12. 120
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/index.tsx
  13. 42
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/left-header.tsx
  14. 26
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/actions.tsx
  15. 11
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/hooks.ts
  16. 39
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/index.tsx
  17. 1
    1
      web/app/components/plugins/constants.ts
  18. 4
    0
      web/app/components/plugins/marketplace/description/index.tsx
  19. 6
    0
      web/app/components/plugins/marketplace/plugin-type-switch.tsx
  20. 1
    0
      web/app/components/plugins/types.ts
  21. 4
    0
      web/app/components/tools/marketplace/index.tsx
  22. 3
    0
      web/i18n/en-US/dataset-pipeline.ts
  23. 3
    0
      web/i18n/zh-Hans/dataset-pipeline.ts
  24. 2
    1
      web/models/datasets.ts

+ 17
- 4
api/core/app/apps/base_app_runner.py Прегледај датотеку

import logging
import time import time
from collections.abc import Generator, Mapping, Sequence from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, Union from typing import TYPE_CHECKING, Any, Optional, Union
if TYPE_CHECKING: if TYPE_CHECKING:
from core.file.models import File from core.file.models import File


_logger = logging.getLogger(__name__)



class AppRunner: class AppRunner:
def get_pre_calculate_rest_tokens( def get_pre_calculate_rest_tokens(
) )


def _handle_invoke_result_stream( def _handle_invoke_result_stream(
self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
) -> None: ) -> None:
""" """
Handle invoke result Handle invoke result
else: else:
queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER) queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)


text += result.delta.message.content
message = result.delta.message
if isinstance(message.content, str):
text += message.content
elif isinstance(message.content, list):
for content in message.content:
if not isinstance(content, str):
# TODO(QuantumGhost): Add multimodal output support for easy ui.
_logger.warning("received multimodal output, type=%s", type(content))
text += content.data
else:
text += content # failback to str


if not model: if not model:
model = result.model model = result.model


if not prompt_messages: if not prompt_messages:
prompt_messages = result.prompt_messages
prompt_messages = list(result.prompt_messages)


if result.delta.usage: if result.delta.usage:
usage = result.delta.usage usage = result.delta.usage


if not usage:
if usage is None:
usage = LLMUsage.empty_usage() usage = LLMUsage.empty_usage()


llm_result = LLMResult( llm_result = LLMResult(

+ 18
- 0
api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py Прегледај датотеку

from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import ( from core.model_runtime.entities.message_entities import (
AssistantPromptMessage, AssistantPromptMessage,
TextPromptMessageContent,
) )
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName from core.ops.entities.trace_entity import TraceTaskName
delta_text = chunk.delta.message.content delta_text = chunk.delta.message.content
if delta_text is None: if delta_text is None:
continue continue
if isinstance(chunk.delta.message.content, list):
delta_text = ""
for content in chunk.delta.message.content:
logger.debug(
"The content type %s in LLM chunk delta message content.: %r", type(content), content
)
if isinstance(content, TextPromptMessageContent):
delta_text += content.data
elif isinstance(content, str):
delta_text += content # failback to str
else:
logger.warning(
"Unsupported content type %s in LLM chunk delta message content.: %r",
type(content),
content,
)
continue


if not self._task_state.llm_result.prompt_messages: if not self._task_state.llm_result.prompt_messages:
self._task_state.llm_result.prompt_messages = chunk.prompt_messages self._task_state.llm_result.prompt_messages = chunk.prompt_messages

+ 2
- 0
api/core/workflow/nodes/llm/node.py Прегледај датотеку

# Set appropriate response format based on model capabilities # Set appropriate response format based on model capabilities
self._set_response_format(completion_params, model_schema.parameter_rules) self._set_response_format(completion_params, model_schema.parameter_rules)
model_config_with_cred.parameters = completion_params model_config_with_cred.parameters = completion_params
# NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`.
node_data_model.completion_params = completion_params
return model, model_config_with_cred return model, model_config_with_cred


def _fetch_prompt_messages( def _fetch_prompt_messages(

+ 1
- 1
web/app/components/base/markdown-blocks/button.tsx Прегледај датотеку

size={size} size={size}
className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')} className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')}
onClick={() => { onClick={() => {
if (isValidUrl(link)) {
if (link && isValidUrl(link)) {
window.open(link, '_blank') window.open(link, '_blank')
return return
} }

+ 2
- 8
web/app/components/datasets/documents/create-from-pipeline/index.tsx Прегледај датотеку

const { t } = useTranslation() const { t } = useTranslation()
const plan = useProviderContextSelector(state => state.plan) const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling) const enableBilling = useProviderContextSelector(state => state.enableBilling)
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)
const [datasource, setDatasource] = useState<Datasource>() const [datasource, setDatasource] = useState<Datasource>()
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined) const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
const [batchId, setBatchId] = useState('') const [batchId, setBatchId] = useState('')
{ {
currentStep === 3 && ( currentStep === 3 && (
<Processing <Processing
datasetId={datasetId!}
batchId={batchId} batchId={batchId}
documents={documents} documents={documents}
indexingType={indexingType!}
retrievalMethod={retrievalMethod!}
/> />
) )
} }
currentStep === 2 && ( currentStep === 2 && (
<div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'> <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
<ChunkPreview <ChunkPreview
datasource={datasource!}
dataSourceType={datasource!.type}
files={fileList.map(file => file.file)} files={fileList.map(file => file.file)}
onlineDocuments={onlineDocuments} onlineDocuments={onlineDocuments}
websitePages={websitePages} websitePages={websitePages}
isIdle={isIdle} isIdle={isIdle}
isPending={isPending}
isPending={isPending && isPreview.current}
estimateData={estimateData} estimateData={estimateData}
onPreview={onClickPreview} onPreview={onClickPreview}
handlePreviewFileChange={handlePreviewFileChange} handlePreviewFileChange={handlePreviewFileChange}

+ 3
- 8
web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx Прегледај датотеку

import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { PreviewContainer } from '../../../preview/container' import { PreviewContainer } from '../../../preview/container'
import { PreviewHeader } from '../../../preview/header' import { PreviewHeader } from '../../../preview/header'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets' import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import { ChunkingMode } from '@/models/datasets' import { ChunkingMode } from '@/models/datasets'
import type { NotionPage } from '@/models/common' import type { NotionPage } from '@/models/common'
import { DatasourceType } from '@/models/pipeline' import { DatasourceType } from '@/models/pipeline'


type ChunkPreviewProps = { type ChunkPreviewProps = {
datasource: Datasource
dataSourceType: DatasourceType
files: CustomFile[] files: CustomFile[]
onlineDocuments: NotionPage[] onlineDocuments: NotionPage[]
websitePages: CrawlResultItem[] websitePages: CrawlResultItem[]
} }


const ChunkPreview = ({ const ChunkPreview = ({
datasource,
dataSourceType,
files, files,
onlineDocuments, onlineDocuments,
websitePages, websitePages,
const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0]) const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0])
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0]) const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])


const dataSourceType = datasource?.type

return ( return (
<PreviewContainer <PreviewContainer
header={<PreviewHeader header={<PreviewHeader
<p className='text-sm text-text-tertiary'> <p className='text-sm text-text-tertiary'>
{t('datasetCreation.stepTwo.previewChunkTip')} {t('datasetCreation.stepTwo.previewChunkTip')}
</p> </p>
<Button
onClick={onPreview}
>
<Button onClick={onPreview}>
{t('datasetPipeline.addDocuments.stepTwo.previewChunks')} {t('datasetPipeline.addDocuments.stepTwo.previewChunks')}
</Button> </Button>
</div> </div>

+ 5
- 9
web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx Прегледај датотеку

import { RiBookOpenLine } from '@remixicon/react' import { RiBookOpenLine } from '@remixicon/react'
import { useGetDocLanguage } from '@/context/i18n' import { useGetDocLanguage } from '@/context/i18n'
import EmbeddingProcess from './embedding-process' import EmbeddingProcess from './embedding-process'
import type { IndexingType } from '../../../create/step-two'
import type { RETRIEVE_METHOD } from '@/types/app'
import type { InitialDocumentDetail } from '@/models/pipeline' import type { InitialDocumentDetail } from '@/models/pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'


type ProcessingProps = { type ProcessingProps = {
datasetId: string
indexingType: IndexingType
retrievalMethod: RETRIEVE_METHOD
batchId: string batchId: string
documents: InitialDocumentDetail[] documents: InitialDocumentDetail[]
} }


const Processing = ({ const Processing = ({
datasetId,
batchId, batchId,
documents, documents,
indexingType,
retrievalMethod,
}: ProcessingProps) => { }: ProcessingProps) => {
const { t } = useTranslation() const { t } = useTranslation()
const docLanguage = useGetDocLanguage() const docLanguage = useGetDocLanguage()
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)


return ( return (
<div className='flex h-full w-full justify-center overflow-hidden'> <div className='flex h-full w-full justify-center overflow-hidden'>
<div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'> <div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'>
<div className='max-w-[640px]'> <div className='max-w-[640px]'>
<EmbeddingProcess <EmbeddingProcess
datasetId={datasetId}
datasetId={datasetId!}
batchId={batchId} batchId={batchId}
documents={documents} documents={documents}
indexingType={indexingType} indexingType={indexingType}

+ 6
- 1
web/app/components/datasets/documents/detail/completed/segment-detail.tsx Прегледај датотеку

import { formatNumber } from '@/utils/format' import { formatNumber } from '@/utils/format'
import classNames from '@/utils/classnames' import classNames from '@/utils/classnames'
import Divider from '@/app/components/base/divider' import Divider from '@/app/components/base/divider'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { IndexingType } from '../../../create/step-two'


type ISegmentDetailProps = { type ISegmentDetailProps = {
segInfo?: Partial<SegmentDetailModel> & { id: string } segInfo?: Partial<SegmentDetailModel> & { id: string }
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
const mode = useDocumentContext(s => s.mode) const mode = useDocumentContext(s => s.mode)
const parentMode = useDocumentContext(s => s.parentMode) const parentMode = useDocumentContext(s => s.parentMode)
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)


eventEmitter?.useSubscription((v) => { eventEmitter?.useSubscription((v) => {
if (v === 'update-segment') if (v === 'update-segment')
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk') return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
}, [isParentChildMode, t]) }, [isParentChildMode, t])


const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL

return ( return (
<div className={'flex h-full flex-col'}> <div className={'flex h-full flex-col'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
isEditMode={isEditMode} isEditMode={isEditMode}
/> />
</div> </div>
{mode === 'custom' && <Keywords
{isECOIndexing && <Keywords
className={fullScreen ? 'w-1/5' : ''} className={fullScreen ? 'w-1/5' : ''}
actionType={isEditMode ? 'edit' : 'view'} actionType={isEditMode ? 'edit' : 'view'}
segInfo={segInfo} segInfo={segInfo}

+ 6
- 3
web/app/components/datasets/documents/detail/new-segment.tsx Прегледај датотеку

import ChunkContent from './completed/common/chunk-content' import ChunkContent from './completed/common/chunk-content'
import AddAnother from './completed/common/add-another' import AddAnother from './completed/common/add-another'
import Dot from './completed/common/dot' import Dot from './completed/common/dot'
import { useDocumentContext } from './index'
import { useStore as useAppStore } from '@/app/components/app/store' import { useStore as useAppStore } from '@/app/components/app/store'
import { ToastContext } from '@/app/components/base/toast' import { ToastContext } from '@/app/components/base/toast'
import { ChunkingMode, type SegmentUpdater } from '@/models/datasets' import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
import { formatNumber } from '@/utils/format' import { formatNumber } from '@/utils/format'
import Divider from '@/app/components/base/divider' import Divider from '@/app/components/base/divider'
import { useAddSegment } from '@/service/knowledge/use-segment' import { useAddSegment } from '@/service/knowledge/use-segment'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { IndexingType } from '../../create/step-two'


type NewSegmentModalProps = { type NewSegmentModalProps = {
onCancel: () => void onCancel: () => void
const [addAnother, setAddAnother] = useState(true) const [addAnother, setAddAnother] = useState(true)
const fullScreen = useSegmentListContext(s => s.fullScreen) const fullScreen = useSegmentListContext(s => s.fullScreen)
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen) const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
const mode = useDocumentContext(s => s.mode)
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const { appSidebarExpand } = useAppStore(useShallow(state => ({ const { appSidebarExpand } = useAppStore(useShallow(state => ({
appSidebarExpand: state.appSidebarExpand, appSidebarExpand: state.appSidebarExpand,
}))) })))
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, [question.length, answer.length, isQAModel]) }, [question.length, answer.length, isQAModel])


const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL

return ( return (
<div className={'flex h-full flex-col'}> <div className={'flex h-full flex-col'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}> <div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
isEditMode={true} isEditMode={true}
/> />
</div> </div>
{mode === 'custom' && <Keywords
{isECOIndexing && <Keywords
className={fullScreen ? 'w-1/5' : ''} className={fullScreen ? 'w-1/5' : ''}
actionType='add' actionType='add'
keywords={keywords} keywords={keywords}

+ 94
- 0
web/app/components/datasets/documents/detail/settings/document-settings.tsx Прегледај датотеку

import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile, DataSourceType } from '@/models/datasets'
import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
import AccountSetting from '@/app/components/header/account-setting'
import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'

type DocumentSettingsProps = {
datasetId: string
documentId: string
}

const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)

const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
}

const cancelHandler = () => router.back()

const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
page_id: documentDetail?.data_source_info.notion_page_id,
page_name: documentDetail?.name,
page_icon: documentDetail?.data_source_info.notion_page_icon,
type: documentDetail?.data_source_type,
}
}, [documentDetail])

if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return (
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
<div className='grow'>
{!documentDetail && <Loading type='app' />}
{dataset && documentDetail && (
<StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={showSetAPIKey}
datasetId={datasetId}
dataSourceType={documentDetail.data_source_type as DataSourceType}
notionPages={[currentPage as unknown as NotionPage]}
websitePages={[
{
title: documentDetail.name,
source_url: documentDetail.data_source_info?.url,
markdown: '',
description: '',
},
]}
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
onSave={saveHandler}
onCancel={cancelHandler}
/>
)}
</div>
{isShowSetAPIKey && <AccountSetting activeTab='provider' onCancel={async () => {
hideSetAPIkey()
}} />}
</div>
)
}

export default DocumentSettings

+ 23
- 84
web/app/components/datasets/documents/detail/settings/index.tsx Прегледај датотеку

'use client' 'use client'
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile } from '@/models/datasets'
import React from 'react'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import DocumentSettings from './document-settings'
import PipelineSettings from './pipeline-settings'


import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
import AccountSetting from '@/app/components/header/account-setting'
import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'

type DocumentSettingsProps = {
type SettingsProps = {
datasetId: string datasetId: string
documentId: string documentId: string
} }


const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)

const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
const Settings = ({
datasetId,
documentId,
}: SettingsProps) => {
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)

if (!pipelineId) {
return (
<DocumentSettings
datasetId={datasetId}
documentId={documentId}
/>
)
} }


const cancelHandler = () => router.back()

const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
page_id: documentDetail?.data_source_info.notion_page_id,
page_name: documentDetail?.name,
page_icon: documentDetail?.data_source_info.notion_page_icon,
type: documentDetail?.data_source_type,
}
}, [documentDetail])

if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return ( return (
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
<div className="grow">
{!documentDetail && <Loading type='app' />}
{dataset && documentDetail && (
<StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={showSetAPIKey}
datasetId={datasetId}
dataSourceType={documentDetail.data_source_type}
notionPages={[currentPage as unknown as NotionPage]}
websitePages={[
{
title: documentDetail.name,
source_url: documentDetail.data_source_info?.url,
markdown: '',
description: '',
},
]}
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
onSave={saveHandler}
onCancel={cancelHandler}
/>
)}
</div>
{isShowSetAPIKey && <AccountSetting activeTab="provider" onCancel={async () => {
hideSetAPIkey()
}} />}
</div>
<PipelineSettings
datasetId={datasetId}
documentId={documentId}
/>
) )
} }


export default DocumentSettings
export default Settings

+ 120
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/index.tsx Прегледај датотеку

import { useCallback, useRef, useState } from 'react'
import type { CrawlResultItem, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
import { useTranslation } from 'react-i18next'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useDocumentDetail } from '@/service/knowledge/use-document'
import AppUnavailable from '@/app/components/base/app-unavailable'
import ChunkPreview from '../../../create-from-pipeline/preview/chunk-preview'
import Loading from '@/app/components/base/loading'
import type { DatasourceType } from '@/models/pipeline'
import ProcessDocuments from './process-documents'
import LeftHeader from './left-header'

type PipelineSettingsProps = {
datasetId: string
documentId: string
}

const PipelineSettings = ({
datasetId,
documentId,
}: PipelineSettingsProps) => {
const { t } = useTranslation()
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)

const isPreview = useRef(false)
const formRef = useRef<any>(null)

const { data: documentDetail, error, isFetching: isFetchingDocumentDetail } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
// todo: Preview
}, [])

const handleProcess = useCallback(async (data: Record<string, any>) => {
// todo: Process
}, [])

const onClickProcess = useCallback(() => {
isPreview.current = false
formRef.current?.submit()
}, [])

const onClickPreview = useCallback(() => {
isPreview.current = true
formRef.current?.submit()
}, [])

const handleSubmit = useCallback((data: Record<string, any>) => {
isPreview.current ? handlePreviewChunks(data) : handleProcess(data)
}, [handlePreviewChunks, handleProcess])

const handlePreviewFileChange = useCallback((file: DocumentItem) => {
onClickPreview()
}, [onClickPreview])

const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
onClickPreview()
}, [onClickPreview])

const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
onClickPreview()
}, [onClickPreview])

if (isFetchingDocumentDetail) {
return (
<Loading type='app' />
)
}

if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return (
<div
className='relative flex h-[calc(100vh-56px)] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
>
<div className='flex h-full flex-1 flex-col px-14'>
<LeftHeader title={t('datasetPipeline.documentSettings.title')} />
<div className='grow overflow-y-auto'>
<ProcessDocuments
ref={formRef}
documentId={documentId}
onProcess={onClickProcess}
onPreview={onClickPreview}
onSubmit={handleSubmit}
/>
</div>
</div>
{/* Preview */}
<div className='flex h-full flex-1 shrink-0 pl-2 pt-2'>
<ChunkPreview
dataSourceType={documentDetail!.data_source_type as DatasourceType}
// @ts-expect-error mock data // todo: remove mock data
files={[{
id: '12345678',
name: 'test-file',
extension: 'txt',
}]}
onlineDocuments={[]}
websitePages={[]}
isIdle={true}
isPending={true}
estimateData={estimateData}
onPreview={onClickPreview}
handlePreviewFileChange={handlePreviewFileChange}
handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
/>
</div>
</div>
)
}

export default PipelineSettings

+ 42
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/left-header.tsx Прегледај датотеку

import React, { useCallback } from 'react'
import { RiArrowLeftLine } from '@remixicon/react'
import Button from '@/app/components/base/button'
import { useRouter } from 'next/navigation'
import Effect from '@/app/components/base/effect'
import { useTranslation } from 'react-i18next'

type LeftHeaderProps = {
title: string
}

const LeftHeader = ({
title,
}: LeftHeaderProps) => {
const { t } = useTranslation()
const { back } = useRouter()

const navigateBack = useCallback(() => {
back()
}, [back])

return (
<div className='relative flex flex-col gap-y-0.5 pb-2 pt-4'>
<div className='system-2xs-semibold-uppercase bg-pipeline-add-documents-title-bg bg-clip-text text-transparent'>
{title}
</div>
<div className='system-md-semibold text-text-primary'>
{t('datasetPipeline.addDocuments.steps.processDocuments')}
</div>
<Button
variant='secondary-accent'
className='absolute -left-11 top-3.5 size-9 rounded-full p-0'
onClick={navigateBack}
>
<RiArrowLeftLine className='size-5 ' />
</Button>
<Effect className='left-8 top-[-34px] opacity-20' />
</div>
)
}

export default React.memo(LeftHeader)

+ 26
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/actions.tsx Прегледај датотеку

import React from 'react'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'

type ActionsProps = {
onProcess: () => void
}

const Actions = ({
onProcess,
}: ActionsProps) => {
const { t } = useTranslation()

return (
<div className='flex items-center justify-end'>
<Button
variant='primary'
onClick={onProcess}
>
{t('datasetPipeline.operations.saveAndProcess')}
</Button>
</div>
)
}

export default React.memo(Actions)

+ 11
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/hooks.ts Прегледај датотеку

import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'

export const useConfigurations = (documentdId: string) => {
const initialData: Record<string, any> = {}
const configurations: BaseConfiguration[] = []

return {
initialData,
configurations,
}
}

+ 39
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/index.tsx Прегледај датотеку

import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
import { useConfigurations } from './hooks'
import Actions from './actions'
import Form from '../../../../create-from-pipeline/process-documents/form'

type ProcessDocumentsProps = {
documentId: string
ref: React.RefObject<any>
onProcess: () => void
onPreview: () => void
onSubmit: (data: Record<string, any>) => void
}

const ProcessDocuments = ({
documentId,
onProcess,
onPreview,
onSubmit,
ref,
}: ProcessDocumentsProps) => {
const { initialData, configurations } = useConfigurations(documentId)
const schema = generateZodSchema(configurations)

return (
<div className='flex flex-col gap-y-4 pt-4'>
<Form
ref={ref}
initialData={initialData}
configurations={configurations}
schema={schema}
onSubmit={onSubmit}
onPreview={onPreview}
/>
<Actions onProcess={onProcess} />
</div>
)
}

export default ProcessDocuments

+ 1
- 1
web/app/components/plugins/constants.ts Прегледај датотеку

export const categoryKeys = [ export const categoryKeys = [
'model', 'model',
'tool', 'tool',
'datasource',
'agent-strategy', 'agent-strategy',
'extension', 'extension',
'bundle', 'bundle',
'datasource',
] ]

+ 4
- 0
web/app/components/plugins/marketplace/description/index.tsx Прегледај датотеку

<span className='relative z-[2] lowercase'>{t('category.tools')}</span> <span className='relative z-[2] lowercase'>{t('category.tools')}</span>
</span> </span>
, ,
<span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
<span className='relative z-[2] lowercase'>{t('category.datasources')}</span>
</span>
,
<span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> <span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
<span className='relative z-[2] lowercase'>{t('category.agents')}</span> <span className='relative z-[2] lowercase'>{t('category.agents')}</span>
</span> </span>

+ 6
- 0
web/app/components/plugins/marketplace/plugin-type-switch.tsx Прегледај датотеку

tool: PluginType.tool, tool: PluginType.tool,
agent: PluginType.agent, agent: PluginType.agent,
extension: PluginType.extension, extension: PluginType.extension,
datasource: PluginType.datasource,
bundle: 'bundle', bundle: 'bundle',
} }
type PluginTypeSwitchProps = { type PluginTypeSwitchProps = {
text: t('plugin.category.tools'), text: t('plugin.category.tools'),
icon: <RiHammerLine className='mr-1.5 h-4 w-4' />, icon: <RiHammerLine className='mr-1.5 h-4 w-4' />,
}, },
{
value: PLUGIN_TYPE_SEARCH_MAP.datasource,
text: t('plugin.category.datasources'),
icon: <RiHammerLine className='mr-1.5 h-4 w-4' />,
},
{ {
value: PLUGIN_TYPE_SEARCH_MAP.agent, value: PLUGIN_TYPE_SEARCH_MAP.agent,
text: t('plugin.category.agents'), text: t('plugin.category.agents'),

+ 1
- 0
web/app/components/plugins/types.ts Прегледај датотеку

model = 'model', model = 'model',
extension = 'extension', extension = 'extension',
agent = 'agent-strategy', agent = 'agent-strategy',
datasource = 'datasource',
} }


export enum PluginSource { export enum PluginSource {

+ 4
- 0
web/app/components/tools/marketplace/index.tsx Прегледај датотеку

{t('plugin.category.tools')} {t('plugin.category.tools')}
</span> </span>
, ,
<span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
{t('plugin.category.datasources')}
</span>
,
<span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']"> <span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
{t('plugin.category.agents')} {t('plugin.category.agents')}
</span> </span>

+ 3
- 0
web/i18n/en-US/dataset-pipeline.ts Прегледај датотеку

}, },
characters: 'characters', characters: 'characters',
}, },
documentSettings: {
title: 'Document Settings',
},
} }


export default translation export default translation

+ 3
- 0
web/i18n/zh-Hans/dataset-pipeline.ts Прегледај датотеку

}, },
characters: '字符', characters: '字符',
}, },
documentSettings: {
title: '文档设置',
},
} }


export default translation export default translation

+ 2
- 1
web/models/datasets.ts Прегледај датотеку

import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types' import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card' import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge' import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
import type { DatasourceType } from './pipeline'


export enum DataSourceType { export enum DataSourceType {
FILE = 'upload_file', FILE = 'upload_file',
batch: string batch: string
position: number position: number
dataset_id: string dataset_id: string
data_source_type: DataSourceType
data_source_type: DataSourceType | DatasourceType
data_source_info: DataSourceInfo data_source_info: DataSourceInfo
dataset_process_rule_id: string dataset_process_rule_id: string
name: string name: string

Loading…
Откажи
Сачувај