瀏覽代碼

Merge branch 'feat/rag-pipeline' into deploy/rag-dev

tags/2.0.0-beta.1
twwu 4 月之前
父節點
當前提交
d3dbfbe8b3
共有 24 個檔案被更改,包括 439 行新增120 行删除
  1. 17
    4
      api/core/app/apps/base_app_runner.py
  2. 18
    0
      api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
  3. 2
    0
      api/core/workflow/nodes/llm/node.py
  4. 1
    1
      web/app/components/base/markdown-blocks/button.tsx
  5. 2
    8
      web/app/components/datasets/documents/create-from-pipeline/index.tsx
  6. 3
    8
      web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx
  7. 5
    9
      web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx
  8. 6
    1
      web/app/components/datasets/documents/detail/completed/segment-detail.tsx
  9. 6
    3
      web/app/components/datasets/documents/detail/new-segment.tsx
  10. 94
    0
      web/app/components/datasets/documents/detail/settings/document-settings.tsx
  11. 23
    84
      web/app/components/datasets/documents/detail/settings/index.tsx
  12. 120
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/index.tsx
  13. 42
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/left-header.tsx
  14. 26
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/actions.tsx
  15. 11
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/hooks.ts
  16. 39
    0
      web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/index.tsx
  17. 1
    1
      web/app/components/plugins/constants.ts
  18. 4
    0
      web/app/components/plugins/marketplace/description/index.tsx
  19. 6
    0
      web/app/components/plugins/marketplace/plugin-type-switch.tsx
  20. 1
    0
      web/app/components/plugins/types.ts
  21. 4
    0
      web/app/components/tools/marketplace/index.tsx
  22. 3
    0
      web/i18n/en-US/dataset-pipeline.ts
  23. 3
    0
      web/i18n/zh-Hans/dataset-pipeline.ts
  24. 2
    1
      web/models/datasets.ts

+ 17
- 4
api/core/app/apps/base_app_runner.py 查看文件

@@ -1,3 +1,4 @@
import logging
import time
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, Union
@@ -33,6 +34,8 @@ from models.model import App, AppMode, Message, MessageAnnotation
if TYPE_CHECKING:
from core.file.models import File

_logger = logging.getLogger(__name__)


class AppRunner:
def get_pre_calculate_rest_tokens(
@@ -298,7 +301,7 @@ class AppRunner:
)

def _handle_invoke_result_stream(
self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
) -> None:
"""
Handle invoke result
@@ -317,18 +320,28 @@ class AppRunner:
else:
queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)

text += result.delta.message.content
message = result.delta.message
if isinstance(message.content, str):
text += message.content
elif isinstance(message.content, list):
for content in message.content:
if not isinstance(content, str):
# TODO(QuantumGhost): Add multimodal output support for easy ui.
_logger.warning("received multimodal output, type=%s", type(content))
text += content.data
else:
text += content # failback to str

if not model:
model = result.model

if not prompt_messages:
prompt_messages = result.prompt_messages
prompt_messages = list(result.prompt_messages)

if result.delta.usage:
usage = result.delta.usage

if not usage:
if usage is None:
usage = LLMUsage.empty_usage()

llm_result = LLMResult(

+ 18
- 0
api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py 查看文件

@@ -48,6 +48,7 @@ from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
TextPromptMessageContent,
)
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName
@@ -309,6 +310,23 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
delta_text = chunk.delta.message.content
if delta_text is None:
continue
if isinstance(chunk.delta.message.content, list):
delta_text = ""
for content in chunk.delta.message.content:
logger.debug(
"The content type %s in LLM chunk delta message content.: %r", type(content), content
)
if isinstance(content, TextPromptMessageContent):
delta_text += content.data
elif isinstance(content, str):
delta_text += content # failback to str
else:
logger.warning(
"Unsupported content type %s in LLM chunk delta message content.: %r",
type(content),
content,
)
continue

if not self._task_state.llm_result.prompt_messages:
self._task_state.llm_result.prompt_messages = chunk.prompt_messages

+ 2
- 0
api/core/workflow/nodes/llm/node.py 查看文件

@@ -525,6 +525,8 @@ class LLMNode(BaseNode[LLMNodeData]):
# Set appropriate response format based on model capabilities
self._set_response_format(completion_params, model_schema.parameter_rules)
model_config_with_cred.parameters = completion_params
# NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`.
node_data_model.completion_params = completion_params
return model, model_config_with_cred

def _fetch_prompt_messages(

+ 1
- 1
web/app/components/base/markdown-blocks/button.tsx 查看文件

@@ -14,7 +14,7 @@ const MarkdownButton = ({ node }: any) => {
size={size}
className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')}
onClick={() => {
if (isValidUrl(link)) {
if (link && isValidUrl(link)) {
window.open(link, '_blank')
return
}

+ 2
- 8
web/app/components/datasets/documents/create-from-pipeline/index.tsx 查看文件

@@ -32,10 +32,7 @@ const CreateFormPipeline = () => {
const { t } = useTranslation()
const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling)
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)
const [datasource, setDatasource] = useState<Datasource>()
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
const [batchId, setBatchId] = useState('')
@@ -302,11 +299,8 @@ const CreateFormPipeline = () => {
{
currentStep === 3 && (
<Processing
datasetId={datasetId!}
batchId={batchId}
documents={documents}
indexingType={indexingType!}
retrievalMethod={retrievalMethod!}
/>
)
}
@@ -326,12 +320,12 @@ const CreateFormPipeline = () => {
currentStep === 2 && (
<div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
<ChunkPreview
datasource={datasource!}
dataSourceType={datasource!.type}
files={fileList.map(file => file.file)}
onlineDocuments={onlineDocuments}
websitePages={websitePages}
isIdle={isIdle}
isPending={isPending}
isPending={isPending && isPreview.current}
estimateData={estimateData}
onPreview={onClickPreview}
handlePreviewFileChange={handlePreviewFileChange}

+ 3
- 8
web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx 查看文件

@@ -2,7 +2,6 @@ import React, { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { PreviewContainer } from '../../../preview/container'
import { PreviewHeader } from '../../../preview/header'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import { ChunkingMode } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
@@ -18,7 +17,7 @@ import Button from '@/app/components/base/button'
import { DatasourceType } from '@/models/pipeline'

type ChunkPreviewProps = {
datasource: Datasource
dataSourceType: DatasourceType
files: CustomFile[]
onlineDocuments: NotionPage[]
websitePages: CrawlResultItem[]
@@ -32,7 +31,7 @@ type ChunkPreviewProps = {
}

const ChunkPreview = ({
datasource,
dataSourceType,
files,
onlineDocuments,
websitePages,
@@ -51,8 +50,6 @@ const ChunkPreview = ({
const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0])
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])

const dataSourceType = datasource?.type

return (
<PreviewContainer
header={<PreviewHeader
@@ -185,9 +182,7 @@ const ChunkPreview = ({
<p className='text-sm text-text-tertiary'>
{t('datasetCreation.stepTwo.previewChunkTip')}
</p>
<Button
onClick={onPreview}
>
<Button onClick={onPreview}>
{t('datasetPipeline.addDocuments.stepTwo.previewChunks')}
</Button>
</div>

+ 5
- 9
web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx 查看文件

@@ -4,34 +4,30 @@ import { useTranslation } from 'react-i18next'
import { RiBookOpenLine } from '@remixicon/react'
import { useGetDocLanguage } from '@/context/i18n'
import EmbeddingProcess from './embedding-process'
import type { IndexingType } from '../../../create/step-two'
import type { RETRIEVE_METHOD } from '@/types/app'
import type { InitialDocumentDetail } from '@/models/pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'

type ProcessingProps = {
datasetId: string
indexingType: IndexingType
retrievalMethod: RETRIEVE_METHOD
batchId: string
documents: InitialDocumentDetail[]
}

const Processing = ({
datasetId,
batchId,
documents,
indexingType,
retrievalMethod,
}: ProcessingProps) => {
const { t } = useTranslation()
const docLanguage = useGetDocLanguage()
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)

return (
<div className='flex h-full w-full justify-center overflow-hidden'>
<div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'>
<div className='max-w-[640px]'>
<EmbeddingProcess
datasetId={datasetId}
datasetId={datasetId!}
batchId={batchId}
documents={documents}
indexingType={indexingType}

+ 6
- 1
web/app/components/datasets/documents/detail/completed/segment-detail.tsx 查看文件

@@ -18,6 +18,8 @@ import { useEventEmitterContextContext } from '@/context/event-emitter'
import { formatNumber } from '@/utils/format'
import classNames from '@/utils/classnames'
import Divider from '@/app/components/base/divider'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { IndexingType } from '../../../create/step-two'

type ISegmentDetailProps = {
segInfo?: Partial<SegmentDetailModel> & { id: string }
@@ -48,6 +50,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
const mode = useDocumentContext(s => s.mode)
const parentMode = useDocumentContext(s => s.parentMode)
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)

eventEmitter?.useSubscription((v) => {
if (v === 'update-segment')
@@ -103,6 +106,8 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
}, [isParentChildMode, t])

const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL

return (
<div className={'flex h-full flex-col'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
@@ -149,7 +154,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
isEditMode={isEditMode}
/>
</div>
{mode === 'custom' && <Keywords
{isECOIndexing && <Keywords
className={fullScreen ? 'w-1/5' : ''}
actionType={isEditMode ? 'edit' : 'view'}
segInfo={segInfo}

+ 6
- 3
web/app/components/datasets/documents/detail/new-segment.tsx 查看文件

@@ -12,7 +12,6 @@ import Keywords from './completed/common/keywords'
import ChunkContent from './completed/common/chunk-content'
import AddAnother from './completed/common/add-another'
import Dot from './completed/common/dot'
import { useDocumentContext } from './index'
import { useStore as useAppStore } from '@/app/components/app/store'
import { ToastContext } from '@/app/components/base/toast'
import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
@@ -20,6 +19,8 @@ import classNames from '@/utils/classnames'
import { formatNumber } from '@/utils/format'
import Divider from '@/app/components/base/divider'
import { useAddSegment } from '@/service/knowledge/use-segment'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { IndexingType } from '../../create/step-two'

type NewSegmentModalProps = {
onCancel: () => void
@@ -44,7 +45,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
const [addAnother, setAddAnother] = useState(true)
const fullScreen = useSegmentListContext(s => s.fullScreen)
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
const mode = useDocumentContext(s => s.mode)
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const { appSidebarExpand } = useAppStore(useShallow(state => ({
appSidebarExpand: state.appSidebarExpand,
})))
@@ -137,6 +138,8 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [question.length, answer.length, isQAModel])

const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL

return (
<div className={'flex h-full flex-col'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
@@ -182,7 +185,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
isEditMode={true}
/>
</div>
{mode === 'custom' && <Keywords
{isECOIndexing && <Keywords
className={fullScreen ? 'w-1/5' : ''}
actionType='add'
keywords={keywords}

+ 94
- 0
web/app/components/datasets/documents/detail/settings/document-settings.tsx 查看文件

@@ -0,0 +1,94 @@
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile, DataSourceType } from '@/models/datasets'
import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
import AccountSetting from '@/app/components/header/account-setting'
import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'

type DocumentSettingsProps = {
datasetId: string
documentId: string
}

const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)

const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
}

const cancelHandler = () => router.back()

const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
page_id: documentDetail?.data_source_info.notion_page_id,
page_name: documentDetail?.name,
page_icon: documentDetail?.data_source_info.notion_page_icon,
type: documentDetail?.data_source_type,
}
}, [documentDetail])

if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return (
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
<div className='grow'>
{!documentDetail && <Loading type='app' />}
{dataset && documentDetail && (
<StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={showSetAPIKey}
datasetId={datasetId}
dataSourceType={documentDetail.data_source_type as DataSourceType}
notionPages={[currentPage as unknown as NotionPage]}
websitePages={[
{
title: documentDetail.name,
source_url: documentDetail.data_source_info?.url,
markdown: '',
description: '',
},
]}
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
onSave={saveHandler}
onCancel={cancelHandler}
/>
)}
</div>
{isShowSetAPIKey && <AccountSetting activeTab='provider' onCancel={async () => {
hideSetAPIkey()
}} />}
</div>
)
}

export default DocumentSettings

+ 23
- 84
web/app/components/datasets/documents/detail/settings/index.tsx 查看文件

@@ -1,96 +1,35 @@
'use client'
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import { useContext } from 'use-context-selector'
import { useRouter } from 'next/navigation'
import DatasetDetailContext from '@/context/dataset-detail'
import type { CrawlOptions, CustomFile } from '@/models/datasets'
import React from 'react'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import DocumentSettings from './document-settings'
import PipelineSettings from './pipeline-settings'

import Loading from '@/app/components/base/loading'
import StepTwo from '@/app/components/datasets/create/step-two'
import AccountSetting from '@/app/components/header/account-setting'
import AppUnavailable from '@/app/components/base/app-unavailable'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import { useDocumentDetail, useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'

type DocumentSettingsProps = {
type SettingsProps = {
datasetId: string
documentId: string
}

const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const { t } = useTranslation()
const router = useRouter()
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)

const invalidDocumentDetail = useInvalidDocumentDetailKey()
const saveHandler = () => {
invalidDocumentDetail()
router.push(`/datasets/${datasetId}/documents/${documentId}`)
const Settings = ({
datasetId,
documentId,
}: SettingsProps) => {
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)

if (!pipelineId) {
return (
<DocumentSettings
datasetId={datasetId}
documentId={documentId}
/>
)
}

const cancelHandler = () => router.back()

const { data: documentDetail, error } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const currentPage = useMemo(() => {
return {
workspace_id: documentDetail?.data_source_info.notion_workspace_id,
page_id: documentDetail?.data_source_info.notion_page_id,
page_name: documentDetail?.name,
page_icon: documentDetail?.data_source_info.notion_page_icon,
type: documentDetail?.data_source_type,
}
}, [documentDetail])

if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return (
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
<div className="grow">
{!documentDetail && <Loading type='app' />}
{dataset && documentDetail && (
<StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={showSetAPIKey}
datasetId={datasetId}
dataSourceType={documentDetail.data_source_type}
notionPages={[currentPage as unknown as NotionPage]}
websitePages={[
{
title: documentDetail.name,
source_url: documentDetail.data_source_info?.url,
markdown: '',
description: '',
},
]}
websiteCrawlProvider={documentDetail.data_source_info?.provider}
websiteCrawlJobId={documentDetail.data_source_info?.job_id}
crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions}
indexingType={indexingTechnique}
isSetting
documentDetail={documentDetail}
files={[documentDetail.data_source_info.upload_file as CustomFile]}
onSave={saveHandler}
onCancel={cancelHandler}
/>
)}
</div>
{isShowSetAPIKey && <AccountSetting activeTab="provider" onCancel={async () => {
hideSetAPIkey()
}} />}
</div>
<PipelineSettings
datasetId={datasetId}
documentId={documentId}
/>
)
}

export default DocumentSettings
export default Settings

+ 120
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/index.tsx 查看文件

@@ -0,0 +1,120 @@
import { useCallback, useRef, useState } from 'react'
import type { CrawlResultItem, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
import { useTranslation } from 'react-i18next'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useDocumentDetail } from '@/service/knowledge/use-document'
import AppUnavailable from '@/app/components/base/app-unavailable'
import ChunkPreview from '../../../create-from-pipeline/preview/chunk-preview'
import Loading from '@/app/components/base/loading'
import type { DatasourceType } from '@/models/pipeline'
import ProcessDocuments from './process-documents'
import LeftHeader from './left-header'

type PipelineSettingsProps = {
datasetId: string
documentId: string
}

const PipelineSettings = ({
datasetId,
documentId,
}: PipelineSettingsProps) => {
const { t } = useTranslation()
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)

const isPreview = useRef(false)
const formRef = useRef<any>(null)

const { data: documentDetail, error, isFetching: isFetchingDocumentDetail } = useDocumentDetail({
datasetId,
documentId,
params: { metadata: 'without' },
})

const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
// todo: Preview
}, [])

const handleProcess = useCallback(async (data: Record<string, any>) => {
// todo: Process
}, [])

const onClickProcess = useCallback(() => {
isPreview.current = false
formRef.current?.submit()
}, [])

const onClickPreview = useCallback(() => {
isPreview.current = true
formRef.current?.submit()
}, [])

const handleSubmit = useCallback((data: Record<string, any>) => {
isPreview.current ? handlePreviewChunks(data) : handleProcess(data)
}, [handlePreviewChunks, handleProcess])

const handlePreviewFileChange = useCallback((file: DocumentItem) => {
onClickPreview()
}, [onClickPreview])

const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
onClickPreview()
}, [onClickPreview])

const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
onClickPreview()
}, [onClickPreview])

if (isFetchingDocumentDetail) {
return (
<Loading type='app' />
)
}

if (error)
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />

return (
<div
className='relative flex h-[calc(100vh-56px)] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
>
<div className='flex h-full flex-1 flex-col px-14'>
<LeftHeader title={t('datasetPipeline.documentSettings.title')} />
<div className='grow overflow-y-auto'>
<ProcessDocuments
ref={formRef}
documentId={documentId}
onProcess={onClickProcess}
onPreview={onClickPreview}
onSubmit={handleSubmit}
/>
</div>
</div>
{/* Preview */}
<div className='flex h-full flex-1 shrink-0 pl-2 pt-2'>
<ChunkPreview
dataSourceType={documentDetail!.data_source_type as DatasourceType}
// @ts-expect-error mock data // todo: remove mock data
files={[{
id: '12345678',
name: 'test-file',
extension: 'txt',
}]}
onlineDocuments={[]}
websitePages={[]}
isIdle={true}
isPending={true}
estimateData={estimateData}
onPreview={onClickPreview}
handlePreviewFileChange={handlePreviewFileChange}
handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
/>
</div>
</div>
)
}

export default PipelineSettings

+ 42
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/left-header.tsx 查看文件

@@ -0,0 +1,42 @@
import React, { useCallback } from 'react'
import { RiArrowLeftLine } from '@remixicon/react'
import Button from '@/app/components/base/button'
import { useRouter } from 'next/navigation'
import Effect from '@/app/components/base/effect'
import { useTranslation } from 'react-i18next'

type LeftHeaderProps = {
title: string
}

const LeftHeader = ({
title,
}: LeftHeaderProps) => {
const { t } = useTranslation()
const { back } = useRouter()

const navigateBack = useCallback(() => {
back()
}, [back])

return (
<div className='relative flex flex-col gap-y-0.5 pb-2 pt-4'>
<div className='system-2xs-semibold-uppercase bg-pipeline-add-documents-title-bg bg-clip-text text-transparent'>
{title}
</div>
<div className='system-md-semibold text-text-primary'>
{t('datasetPipeline.addDocuments.steps.processDocuments')}
</div>
<Button
variant='secondary-accent'
className='absolute -left-11 top-3.5 size-9 rounded-full p-0'
onClick={navigateBack}
>
<RiArrowLeftLine className='size-5 ' />
</Button>
<Effect className='left-8 top-[-34px] opacity-20' />
</div>
)
}

export default React.memo(LeftHeader)

+ 26
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/actions.tsx 查看文件

@@ -0,0 +1,26 @@
import React from 'react'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'

type ActionsProps = {
onProcess: () => void
}

const Actions = ({
onProcess,
}: ActionsProps) => {
const { t } = useTranslation()

return (
<div className='flex items-center justify-end'>
<Button
variant='primary'
onClick={onProcess}
>
{t('datasetPipeline.operations.saveAndProcess')}
</Button>
</div>
)
}

export default React.memo(Actions)

+ 11
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/hooks.ts 查看文件

@@ -0,0 +1,11 @@
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'

export const useConfigurations = (documentdId: string) => {
const initialData: Record<string, any> = {}
const configurations: BaseConfiguration[] = []

return {
initialData,
configurations,
}
}

+ 39
- 0
web/app/components/datasets/documents/detail/settings/pipeline-settings/process-documents/index.tsx 查看文件

@@ -0,0 +1,39 @@
import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
import { useConfigurations } from './hooks'
import Actions from './actions'
import Form from '../../../../create-from-pipeline/process-documents/form'

type ProcessDocumentsProps = {
documentId: string
ref: React.RefObject<any>
onProcess: () => void
onPreview: () => void
onSubmit: (data: Record<string, any>) => void
}

const ProcessDocuments = ({
documentId,
onProcess,
onPreview,
onSubmit,
ref,
}: ProcessDocumentsProps) => {
const { initialData, configurations } = useConfigurations(documentId)
const schema = generateZodSchema(configurations)

return (
<div className='flex flex-col gap-y-4 pt-4'>
<Form
ref={ref}
initialData={initialData}
configurations={configurations}
schema={schema}
onSubmit={onSubmit}
onPreview={onPreview}
/>
<Actions onProcess={onProcess} />
</div>
)
}

export default ProcessDocuments

+ 1
- 1
web/app/components/plugins/constants.ts 查看文件

@@ -21,8 +21,8 @@ export const tagKeys = [
export const categoryKeys = [
'model',
'tool',
'datasource',
'agent-strategy',
'extension',
'bundle',
'datasource',
]

+ 4
- 0
web/app/components/plugins/marketplace/description/index.tsx 查看文件

@@ -44,6 +44,10 @@ const Description = async ({
<span className='relative z-[2] lowercase'>{t('category.tools')}</span>
</span>
,
<span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
<span className='relative z-[2] lowercase'>{t('category.datasources')}</span>
</span>
,
<span className="body-md-medium relative z-[1] ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
<span className='relative z-[2] lowercase'>{t('category.agents')}</span>
</span>

+ 6
- 0
web/app/components/plugins/marketplace/plugin-type-switch.tsx 查看文件

@@ -21,6 +21,7 @@ export const PLUGIN_TYPE_SEARCH_MAP = {
tool: PluginType.tool,
agent: PluginType.agent,
extension: PluginType.extension,
datasource: PluginType.datasource,
bundle: 'bundle',
}
type PluginTypeSwitchProps = {
@@ -56,6 +57,11 @@ const PluginTypeSwitch = ({
text: t('plugin.category.tools'),
icon: <RiHammerLine className='mr-1.5 h-4 w-4' />,
},
{
value: PLUGIN_TYPE_SEARCH_MAP.datasource,
text: t('plugin.category.datasources'),
icon: <RiHammerLine className='mr-1.5 h-4 w-4' />,
},
{
value: PLUGIN_TYPE_SEARCH_MAP.agent,
text: t('plugin.category.agents'),

+ 1
- 0
web/app/components/plugins/types.ts 查看文件

@@ -7,6 +7,7 @@ export enum PluginType {
model = 'model',
extension = 'extension',
agent = 'agent-strategy',
datasource = 'datasource',
}

export enum PluginSource {

+ 4
- 0
web/app/components/tools/marketplace/index.tsx 查看文件

@@ -71,6 +71,10 @@ const Marketplace = ({
{t('plugin.category.tools')}
</span>
,
<span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
{t('plugin.category.datasources')}
</span>
,
<span className="body-md-medium relative ml-1 text-text-secondary after:absolute after:bottom-[1.5px] after:left-0 after:h-2 after:w-full after:bg-text-text-selected after:content-['']">
{t('plugin.category.agents')}
</span>

+ 3
- 0
web/i18n/en-US/dataset-pipeline.ts 查看文件

@@ -103,6 +103,9 @@ const translation = {
},
characters: 'characters',
},
documentSettings: {
title: 'Document Settings',
},
}

export default translation

+ 3
- 0
web/i18n/zh-Hans/dataset-pipeline.ts 查看文件

@@ -103,6 +103,9 @@ const translation = {
},
characters: '字符',
},
documentSettings: {
title: '文档设置',
},
}

export default translation

+ 2
- 1
web/models/datasets.ts 查看文件

@@ -6,6 +6,7 @@ import type { MetadataFilteringVariableType } from '@/app/components/workflow/no
import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
import type { DatasourceType } from './pipeline'

export enum DataSourceType {
FILE = 'upload_file',
@@ -318,7 +319,7 @@ export type InitialDocumentDetail = {
batch: string
position: number
dataset_id: string
data_source_type: DataSourceType
data_source_type: DataSourceType | DatasourceType
data_source_info: DataSourceInfo
dataset_process_rule_id: string
name: string

Loading…
取消
儲存