| const result: CrawlResultItem[] = [ | const result: CrawlResultItem[] = [ | ||||
| { | { | ||||
| title: 'Start the frontend Docker container separately', | title: 'Start the frontend Docker container separately', | ||||
| markdown: 'Markdown 1', | |||||
| content: 'Markdown 1', | |||||
| description: 'Description 1', | description: 'Description 1', | ||||
| source_url: 'https://example.com/1', | source_url: 'https://example.com/1', | ||||
| }, | }, | ||||
| { | { | ||||
| title: 'Advanced Tool Integration', | title: 'Advanced Tool Integration', | ||||
| markdown: 'Markdown 2', | |||||
| content: 'Markdown 2', | |||||
| description: 'Description 2', | description: 'Description 2', | ||||
| source_url: 'https://example.com/2', | source_url: 'https://example.com/2', | ||||
| }, | }, | ||||
| { | { | ||||
| title: 'Local Source Code Start | English | Dify', | title: 'Local Source Code Start | English | Dify', | ||||
| markdown: 'Markdown 3', | |||||
| content: 'Markdown 3', | |||||
| description: 'Description 3', | description: 'Description 3', | ||||
| source_url: 'https://example.com/3', | source_url: 'https://example.com/3', | ||||
| }, | }, |
| }) as any | }) as any | ||||
| if (res.data) { | if (res.data) { | ||||
| const { title, content, description, url } = res.data | |||||
| const data = { | const data = { | ||||
| current: 1, | current: 1, | ||||
| total: 1, | total: 1, | ||||
| data: [{ | data: [{ | ||||
| title: res.data.title, | |||||
| markdown: res.data.content, | |||||
| description: res.data.description, | |||||
| source_url: res.data.url, | |||||
| title, | |||||
| content, | |||||
| description, | |||||
| source_url: url, | |||||
| }], | }], | ||||
| time_consuming: (Date.now() - startTime) / 1000, | time_consuming: (Date.now() - startTime) / 1000, | ||||
| } | } |
| <div className='system-xs-medium truncate text-text-tertiary' title={payload.source_url}>{payload.source_url}</div> | <div className='system-xs-medium truncate text-text-tertiary' title={payload.source_url}>{payload.source_url}</div> | ||||
| </div> | </div> | ||||
| <div className={cn(s.previewContent, 'body-md-regular')}> | <div className={cn(s.previewContent, 'body-md-regular')}> | ||||
| <div className={cn(s.fileContent)}>{payload.markdown}</div> | |||||
| <div className={cn(s.fileContent)}>{payload.content}</div> | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| ) | ) |
| onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => { | onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => { | ||||
| const { data: crawlData, time_consuming } = data | const { data: crawlData, time_consuming } = data | ||||
| const crawlResultData = { | const crawlResultData = { | ||||
| data: crawlData.map((item: any) => { | |||||
| const { content, ...rest } = item | |||||
| return { | |||||
| markdown: content || '', | |||||
| ...rest, | |||||
| } as CrawlResultItem | |||||
| }), | |||||
| data: crawlData as CrawlResultItem[], | |||||
| time_consuming: time_consuming ?? 0, | time_consuming: time_consuming ?? 0, | ||||
| } | } | ||||
| setCrawlResult(crawlResultData) | setCrawlResult(crawlResultData) |
| const dataSourceStore = useDataSourceStore() | const dataSourceStore = useDataSourceStore() | ||||
| const selectedOnlineDriveFileList = useMemo(() => { | const selectedOnlineDriveFileList = useMemo(() => { | ||||
| return selectedFileIds.map(key => onlineDriveFileList.find(item => item.id === key)!) | |||||
| return selectedFileIds.map(id => onlineDriveFileList.find(item => item.id === id)!) | |||||
| }, [onlineDriveFileList, selectedFileIds]) | }, [onlineDriveFileList, selectedFileIds]) | ||||
| const clearOnlineDriveData = useCallback(() => { | const clearOnlineDriveData = useCallback(() => { |
| <span className='uppercase' title={currentWebsite.source_url}>{currentWebsite.source_url}</span> | <span className='uppercase' title={currentWebsite.source_url}>{currentWebsite.source_url}</span> | ||||
| <span>·</span> | <span>·</span> | ||||
| <span>·</span> | <span>·</span> | ||||
| <span>{`${formatNumberAbbreviated(currentWebsite.markdown.length)} ${t('datasetPipeline.addDocuments.characters')}`}</span> | |||||
| <span>{`${formatNumberAbbreviated(currentWebsite.content.length)} ${t('datasetPipeline.addDocuments.characters')}`}</span> | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| <button | <button | ||||
| </button> | </button> | ||||
| </div> | </div> | ||||
| <div className='body-md-regular grow overflow-hidden px-6 py-5 text-text-secondary'> | <div className='body-md-regular grow overflow-hidden px-6 py-5 text-text-secondary'> | ||||
| {currentWebsite.markdown} | |||||
| {currentWebsite.content} | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| ) | ) |
| { | { | ||||
| title: documentDetail.name, | title: documentDetail.name, | ||||
| source_url: documentDetail.data_source_info?.url, | source_url: documentDetail.data_source_info?.url, | ||||
| markdown: '', | |||||
| content: '', | |||||
| description: '', | description: '', | ||||
| }, | }, | ||||
| ]} | ]} |
| import LeftHeader from './left-header' | import LeftHeader from './left-header' | ||||
| import { usePipelineExecutionLog, useRunPublishedPipeline } from '@/service/use-pipeline' | import { usePipelineExecutionLog, useRunPublishedPipeline } from '@/service/use-pipeline' | ||||
| import type { OnlineDriveFile, PublishedPipelineRunPreviewResponse } from '@/models/pipeline' | import type { OnlineDriveFile, PublishedPipelineRunPreviewResponse } from '@/models/pipeline' | ||||
| import { DatasourceType, OnlineDriveFileType } from '@/models/pipeline' | |||||
| import { DatasourceType } from '@/models/pipeline' | |||||
| import { noop } from 'lodash-es' | import { noop } from 'lodash-es' | ||||
| import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' | ||||
| import { useRouter } from 'next/navigation' | import { useRouter } from 'next/navigation' | ||||
| import { useInvalidDocumentDetail, useInvalidDocumentList } from '@/service/knowledge/use-document' | import { useInvalidDocumentDetail, useInvalidDocumentList } from '@/service/knowledge/use-document' | ||||
| import { isFile } from '../../../create-from-pipeline/data-source/online-drive/utils' | |||||
| type PipelineSettingsProps = { | type PipelineSettingsProps = { | ||||
| datasetId: string | datasetId: string | ||||
| if (lastRunData?.datasource_type === DatasourceType.websiteCrawl) { | if (lastRunData?.datasource_type === DatasourceType.websiteCrawl) { | ||||
| const { content, description, source_url, title } = lastRunData.datasource_info | const { content, description, source_url, title } = lastRunData.datasource_info | ||||
| websitePages.push({ | websitePages.push({ | ||||
| markdown: content, | |||||
| content, | |||||
| description, | description, | ||||
| source_url, | source_url, | ||||
| title, | title, | ||||
| const onlineDriveFiles = useMemo(() => { | const onlineDriveFiles = useMemo(() => { | ||||
| const onlineDriveFiles: OnlineDriveFile[] = [] | const onlineDriveFiles: OnlineDriveFile[] = [] | ||||
| if (lastRunData?.datasource_type === DatasourceType.onlineDrive) { | if (lastRunData?.datasource_type === DatasourceType.onlineDrive) { | ||||
| const { key } = lastRunData.datasource_info | |||||
| const isFileType = isFile(key) | |||||
| const filePathList = key.split('/') | |||||
| const { id, type, name, size } = lastRunData.datasource_info | |||||
| onlineDriveFiles.push({ | onlineDriveFiles.push({ | ||||
| key, | |||||
| displayName: `${isFileType ? filePathList.pop() : filePathList[filePathList.length - 2]}${isFileType ? '' : '/'}`, | |||||
| type: isFileType ? OnlineDriveFileType.file : OnlineDriveFileType.folder, | |||||
| id, | |||||
| name, | |||||
| type, | |||||
| size, | |||||
| }) | }) | ||||
| } | } | ||||
| return onlineDriveFiles | return onlineDriveFiles |
| )} | )} | ||||
| </div> | </div> | ||||
| ) | ) | ||||
| })} | |||||
| })} | |||||
| </> | </> | ||||
| </OutputVars> | </OutputVars> | ||||
| </div> | </div> |
| export type CrawlResultItem = { | export type CrawlResultItem = { | ||||
| title: string | title: string | ||||
| markdown: string | |||||
| content: string | |||||
| description: string | description: string | ||||
| source_url: string | source_url: string | ||||
| } | } | ||||
| provider?: DataSourceProvider | provider?: DataSourceProvider | ||||
| job_id: string | job_id: string | ||||
| url: string | url: string | ||||
| credential_id?: string | |||||
| } | } | ||||
| export type InitialDocumentDetail = { | export type InitialDocumentDetail = { |