| import { BlockEnum, type Node } from '@/app/components/workflow/types' | import { BlockEnum, type Node } from '@/app/components/workflow/types' | ||||
| import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' | import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' | ||||
| import type { DatasourceType } from '@/models/pipeline' | import type { DatasourceType } from '@/models/pipeline' | ||||
| import type { CrawlResultItem, DocumentItem, FileItem } from '@/models/datasets' | |||||
| import type { CrawlResult, CrawlResultItem, DocumentItem, FileItem } from '@/models/datasets' | |||||
| import { CrawlStep } from '@/models/datasets' | |||||
| import produce from 'immer' | import produce from 'immer' | ||||
| import type { NotionPage } from '@/models/common' | import type { NotionPage } from '@/models/common' | ||||
| export const useWebsiteCrawl = () => { | export const useWebsiteCrawl = () => { | ||||
| const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([]) | const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([]) | ||||
| const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>() | const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>() | ||||
| const [crawlResult, setCrawlResult] = useState<CrawlResult | undefined>() | |||||
| const [step, setStep] = useState<CrawlStep>(CrawlStep.init) | |||||
| const [previewIndex, setPreviewIndex] = useState<number>(-1) | |||||
| const previewWebsitePage = useRef<CrawlResultItem>(websitePages[0]) | const previewWebsitePage = useRef<CrawlResultItem>(websitePages[0]) | ||||
| const updateCurrentWebsite = useCallback((website: CrawlResultItem) => { | |||||
| const updateCurrentWebsite = useCallback((website: CrawlResultItem, index: number) => { | |||||
| setCurrentWebsite(website) | setCurrentWebsite(website) | ||||
| setPreviewIndex(index) | |||||
| }, []) | }, []) | ||||
| const hideWebsitePreview = useCallback(() => { | const hideWebsitePreview = useCallback(() => { | ||||
| setCurrentWebsite(undefined) | setCurrentWebsite(undefined) | ||||
| setPreviewIndex(-1) | |||||
| }, []) | }, []) | ||||
| const updataCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => { | const updataCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => { | ||||
| return { | return { | ||||
| websitePages, | websitePages, | ||||
| crawlResult, | |||||
| setCrawlResult, | |||||
| step, | |||||
| setStep, | |||||
| previewWebsitePage, | previewWebsitePage, | ||||
| updataCheckedCrawlResultChange, | updataCheckedCrawlResultChange, | ||||
| currentWebsite, | currentWebsite, | ||||
| updateCurrentWebsite, | updateCurrentWebsite, | ||||
| previewIndex, | |||||
| hideWebsitePreview, | hideWebsitePreview, | ||||
| } | } | ||||
| } | } |
| } = useOnlineDocuments() | } = useOnlineDocuments() | ||||
| const { | const { | ||||
| websitePages, | websitePages, | ||||
| crawlResult, | |||||
| setCrawlResult, | |||||
| step, | |||||
| setStep, | |||||
| previewWebsitePage, | previewWebsitePage, | ||||
| updataCheckedCrawlResultChange, | updataCheckedCrawlResultChange, | ||||
| currentWebsite, | currentWebsite, | ||||
| updateCurrentWebsite, | updateCurrentWebsite, | ||||
| previewIndex, | |||||
| hideWebsitePreview, | hideWebsitePreview, | ||||
| } = useWebsiteCrawl() | } = useWebsiteCrawl() | ||||
| <div | <div | ||||
| className='relative flex h-[calc(100vh-56px)] w-full min-w-[1024px] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle' | className='relative flex h-[calc(100vh-56px)] w-full min-w-[1024px] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle' | ||||
| > | > | ||||
| <div className='flex h-full flex-1 flex-col px-14'> | |||||
| <LeftHeader | |||||
| steps={steps} | |||||
| title={t('datasetPipeline.addDocuments.title')} | |||||
| currentStep={currentStep} | |||||
| /> | |||||
| <div className='grow overflow-y-auto'> | |||||
| { | |||||
| currentStep === 1 && ( | |||||
| <div className='flex flex-col gap-y-5 pt-4'> | |||||
| <DataSourceOptions | |||||
| datasourceNodeId={datasource?.nodeId || ''} | |||||
| onSelect={setDatasource} | |||||
| pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]} | |||||
| /> | |||||
| {datasource?.type === DatasourceType.localFile && ( | |||||
| <LocalFile | |||||
| files={fileList} | |||||
| allowedExtensions={datasource?.fileExtensions || []} | |||||
| updateFile={updateFile} | |||||
| updateFileList={updateFileList} | |||||
| onPreview={updateCurrentFile} | |||||
| notSupportBatchUpload={notSupportBatchUpload} | |||||
| /> | |||||
| )} | |||||
| {datasource?.type === DatasourceType.onlineDocument && ( | |||||
| <OnlineDocuments | |||||
| nodeId={datasource?.nodeId || ''} | |||||
| headerInfo={{ | |||||
| title: datasource.description, | |||||
| docTitle: datasource.docTitle || '', | |||||
| docLink: datasource.docLink || '', | |||||
| }} | |||||
| onlineDocuments={onlineDocuments} | |||||
| updateOnlineDocuments={updateOnlineDocuments} | |||||
| canPreview | |||||
| onPreview={updateCurrentPage} | |||||
| /> | |||||
| )} | |||||
| {datasource?.type === DatasourceType.websiteCrawl && ( | |||||
| <WebsiteCrawl | |||||
| nodeId={datasource?.nodeId || ''} | |||||
| headerInfo={{ | |||||
| title: datasource.description, | |||||
| docTitle: datasource.docTitle || '', | |||||
| docLink: datasource.docLink || '', | |||||
| }} | |||||
| checkedCrawlResult={websitePages} | |||||
| onCheckedCrawlResultChange={updataCheckedCrawlResultChange} | |||||
| onPreview={updateCurrentWebsite} | |||||
| <div className='h-full min-w-0 flex-1'> | |||||
| <div className='flex h-full flex-col px-14'> | |||||
| <LeftHeader | |||||
| steps={steps} | |||||
| title={t('datasetPipeline.addDocuments.title')} | |||||
| currentStep={currentStep} | |||||
| /> | |||||
| <div className='grow overflow-y-auto'> | |||||
| { | |||||
| currentStep === 1 && ( | |||||
| <div className='flex flex-col gap-y-5 pt-4'> | |||||
| <DataSourceOptions | |||||
| datasourceNodeId={datasource?.nodeId || ''} | |||||
| onSelect={setDatasource} | |||||
| pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]} | |||||
| /> | /> | ||||
| )} | |||||
| {isShowVectorSpaceFull && ( | |||||
| <VectorSpaceFull /> | |||||
| )} | |||||
| <Actions disabled={nextBtnDisabled} handleNextStep={handleNextStep} /> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| { | |||||
| currentStep === 2 && ( | |||||
| <ProcessDocuments | |||||
| ref={formRef} | |||||
| dataSourceNodeId={datasource?.nodeId || ''} | |||||
| onProcess={onClickProcess} | |||||
| onPreview={onClickPreview} | |||||
| onSubmit={handleSubmit} | |||||
| onBack={handleBackStep} | |||||
| /> | |||||
| ) | |||||
| } | |||||
| { | |||||
| currentStep === 3 && ( | |||||
| <Processing | |||||
| batchId={batchId} | |||||
| documents={documents} | |||||
| /> | |||||
| ) | |||||
| } | |||||
| {datasource?.type === DatasourceType.localFile && ( | |||||
| <LocalFile | |||||
| files={fileList} | |||||
| allowedExtensions={datasource?.fileExtensions || []} | |||||
| updateFile={updateFile} | |||||
| updateFileList={updateFileList} | |||||
| onPreview={updateCurrentFile} | |||||
| notSupportBatchUpload={notSupportBatchUpload} | |||||
| /> | |||||
| )} | |||||
| {datasource?.type === DatasourceType.onlineDocument && ( | |||||
| <OnlineDocuments | |||||
| nodeId={datasource?.nodeId || ''} | |||||
| headerInfo={{ | |||||
| title: datasource.description, | |||||
| docTitle: datasource.docTitle || '', | |||||
| docLink: datasource.docLink || '', | |||||
| }} | |||||
| onlineDocuments={onlineDocuments} | |||||
| updateOnlineDocuments={updateOnlineDocuments} | |||||
| canPreview | |||||
| onPreview={updateCurrentPage} | |||||
| /> | |||||
| )} | |||||
| {datasource?.type === DatasourceType.websiteCrawl && ( | |||||
| <WebsiteCrawl | |||||
| nodeId={datasource?.nodeId || ''} | |||||
| headerInfo={{ | |||||
| title: datasource.description, | |||||
| docTitle: datasource.docTitle || '', | |||||
| docLink: datasource.docLink || '', | |||||
| }} | |||||
| crawlResult={crawlResult} | |||||
| setCrawlResult={setCrawlResult} | |||||
| step={step} | |||||
| setStep={setStep} | |||||
| checkedCrawlResult={websitePages} | |||||
| onCheckedCrawlResultChange={updataCheckedCrawlResultChange} | |||||
| onPreview={updateCurrentWebsite} | |||||
| previewIndex={previewIndex} | |||||
| /> | |||||
| )} | |||||
| {isShowVectorSpaceFull && ( | |||||
| <VectorSpaceFull /> | |||||
| )} | |||||
| <Actions disabled={nextBtnDisabled} handleNextStep={handleNextStep} /> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| { | |||||
| currentStep === 2 && ( | |||||
| <ProcessDocuments | |||||
| ref={formRef} | |||||
| dataSourceNodeId={datasource?.nodeId || ''} | |||||
| onProcess={onClickProcess} | |||||
| onPreview={onClickPreview} | |||||
| onSubmit={handleSubmit} | |||||
| onBack={handleBackStep} | |||||
| /> | |||||
| ) | |||||
| } | |||||
| { | |||||
| currentStep === 3 && ( | |||||
| <Processing | |||||
| batchId={batchId} | |||||
| documents={documents} | |||||
| /> | |||||
| ) | |||||
| } | |||||
| </div> | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| {/* Preview */} | {/* Preview */} | ||||
| { | { | ||||
| currentStep === 1 && ( | currentStep === 1 && ( | ||||
| <div className='flex h-full flex-1 pl-2 pt-2'> | |||||
| {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />} | |||||
| {currentDocuments && <OnlineDocumentPreview currentPage={currentDocuments} hidePreview={hideOnlineDocumentPreview} />} | |||||
| {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />} | |||||
| <div className='h-full min-w-0 flex-1'> | |||||
| <div className='flex h-full flex-col pl-2 pt-2'> | |||||
| {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />} | |||||
| {currentDocuments && <OnlineDocumentPreview currentPage={currentDocuments} hidePreview={hideOnlineDocumentPreview} />} | |||||
| {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />} | |||||
| </div> | |||||
| </div> | </div> | ||||
| ) | ) | ||||
| } | } | ||||
| { | { | ||||
| currentStep === 2 && ( | currentStep === 2 && ( | ||||
| <div className='flex h-full flex-1 pl-2 pt-2'> | |||||
| <ChunkPreview | |||||
| dataSourceType={datasource!.type} | |||||
| files={fileList.map(file => file.file)} | |||||
| onlineDocuments={onlineDocuments} | |||||
| websitePages={websitePages} | |||||
| isIdle={isIdle} | |||||
| isPending={isPending && isPreview.current} | |||||
| estimateData={estimateData} | |||||
| onPreview={onClickPreview} | |||||
| handlePreviewFileChange={handlePreviewFileChange} | |||||
| handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange} | |||||
| handlePreviewWebsitePageChange={handlePreviewWebsiteChange} | |||||
| /> | |||||
| <div className='h-full min-w-0 flex-1'> | |||||
| <div className='flex h-full flex-col pl-2 pt-2'> | |||||
| <ChunkPreview | |||||
| dataSourceType={datasource!.type} | |||||
| files={fileList.map(file => file.file)} | |||||
| onlineDocuments={onlineDocuments} | |||||
| websitePages={websitePages} | |||||
| isIdle={isIdle} | |||||
| isPending={isPending && isPreview.current} | |||||
| estimateData={estimateData} | |||||
| onPreview={onClickPreview} | |||||
| handlePreviewFileChange={handlePreviewFileChange} | |||||
| handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange} | |||||
| handlePreviewWebsitePageChange={handlePreviewWebsiteChange} | |||||
| /> | |||||
| </div> | |||||
| </div> | </div> | ||||
| ) | ) | ||||
| } | } |
| onCheckChange(!isChecked) | onCheckChange(!isChecked) | ||||
| }, [isChecked, onCheckChange]) | }, [isChecked, onCheckChange]) | ||||
| return ( | return ( | ||||
| <div className={cn('flex cursor-pointer gap-x-2 rounded-lg p-2', isPreview ? 'bg-state-base-active' : 'group hover:bg-state-base-hover')}> | |||||
| <div className={cn( | |||||
| 'relative flex cursor-pointer gap-x-2 rounded-lg p-2', | |||||
| isPreview ? 'bg-state-base-active' : 'group hover:bg-state-base-hover', | |||||
| )}> | |||||
| <Checkbox | <Checkbox | ||||
| className='shrink-0' | className='shrink-0' | ||||
| checked={isChecked} | checked={isChecked} | ||||
| {payload.source_url} | {payload.source_url} | ||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| {showPreview && <Button | |||||
| size='small' | |||||
| onClick={onPreview} | |||||
| className='system-xs-medium-uppercase right-0 top-0 hidden px-1.5 group-hover:absolute group-hover:block' | |||||
| > | |||||
| {t('datasetCreation.stepOne.website.preview')} | |||||
| </Button>} | |||||
| {showPreview && ( | |||||
| <Button | |||||
| size='small' | |||||
| onClick={onPreview} | |||||
| className='system-xs-medium-uppercase right-2 top-2 hidden px-1.5 group-hover:absolute group-hover:block' | |||||
| > | |||||
| {t('datasetCreation.stepOne.website.preview')} | |||||
| </Button> | |||||
| )} | |||||
| </div> | </div> | ||||
| ) | ) | ||||
| } | } |
| 'use client' | 'use client' | ||||
| import React, { useCallback, useState } from 'react' | |||||
| import React, { useCallback } from 'react' | |||||
| import { useTranslation } from 'react-i18next' | import { useTranslation } from 'react-i18next' | ||||
| import cn from '@/utils/classnames' | import cn from '@/utils/classnames' | ||||
| import type { CrawlResultItem } from '@/models/datasets' | import type { CrawlResultItem } from '@/models/datasets' | ||||
| type CrawledResultProps = { | type CrawledResultProps = { | ||||
| className?: string | className?: string | ||||
| previewIndex?: number | |||||
| list: CrawlResultItem[] | list: CrawlResultItem[] | ||||
| checkedList: CrawlResultItem[] | checkedList: CrawlResultItem[] | ||||
| onSelectedChange: (selected: CrawlResultItem[]) => void | onSelectedChange: (selected: CrawlResultItem[]) => void | ||||
| onPreview?: (payload: CrawlResultItem) => void | |||||
| onPreview?: (payload: CrawlResultItem, index: number) => void | |||||
| usedTime: number | usedTime: number | ||||
| } | } | ||||
| const CrawledResult = ({ | const CrawledResult = ({ | ||||
| className = '', | className = '', | ||||
| previewIndex, | |||||
| list, | list, | ||||
| checkedList, | checkedList, | ||||
| onSelectedChange, | onSelectedChange, | ||||
| onPreview, | onPreview, | ||||
| }: CrawledResultProps) => { | }: CrawledResultProps) => { | ||||
| const { t } = useTranslation() | const { t } = useTranslation() | ||||
| const [previewIndex, setPreviewIndex] = useState<number>(-1) | |||||
| const isCheckAll = checkedList.length === list.length | const isCheckAll = checkedList.length === list.length | ||||
| const handlePreview = useCallback((index: number) => { | const handlePreview = useCallback((index: number) => { | ||||
| if (!onPreview) return | if (!onPreview) return | ||||
| setPreviewIndex(index) | |||||
| onPreview(list[index]) | |||||
| onPreview(list[index], index) | |||||
| }, [list, onPreview]) | }, [list, onPreview]) | ||||
| return ( | return ( |
| 'use client' | 'use client' | ||||
| import React, { useCallback, useEffect, useRef, useState } from 'react' | import React, { useCallback, useEffect, useRef, useState } from 'react' | ||||
| import { useTranslation } from 'react-i18next' | import { useTranslation } from 'react-i18next' | ||||
| import type { CrawlResultItem } from '@/models/datasets' | |||||
| import type { CrawlResult, CrawlResultItem } from '@/models/datasets' | |||||
| import { CrawlStep } from '@/models/datasets' | |||||
| import Header from '@/app/components/datasets/create/website/base/header' | import Header from '@/app/components/datasets/create/website/base/header' | ||||
| import Options from './options' | import Options from './options' | ||||
| import Crawling from './crawling' | import Crawling from './crawling' | ||||
| const I18N_PREFIX = 'datasetCreation.stepOne.website' | const I18N_PREFIX = 'datasetCreation.stepOne.website' | ||||
| type CrawlerProps = { | |||||
| export type CrawlerProps = { | |||||
| nodeId: string | nodeId: string | ||||
| crawlResult: CrawlResult | undefined | |||||
| setCrawlResult: (payload: CrawlResult) => void | |||||
| step: CrawlStep | |||||
| setStep: (step: CrawlStep) => void | |||||
| checkedCrawlResult: CrawlResultItem[] | checkedCrawlResult: CrawlResultItem[] | ||||
| onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void | onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void | ||||
| headerInfo: { | headerInfo: { | ||||
| docTitle: string | docTitle: string | ||||
| docLink: string | docLink: string | ||||
| } | } | ||||
| onPreview?: (payload: CrawlResultItem) => void | |||||
| previewIndex?: number | |||||
| onPreview?: (payload: CrawlResultItem, index: number) => void | |||||
| isInPipeline?: boolean | isInPipeline?: boolean | ||||
| } | } | ||||
| enum Step { | |||||
| init = 'init', | |||||
| running = 'running', | |||||
| finished = 'finished', | |||||
| } | |||||
| const Crawler = ({ | const Crawler = ({ | ||||
| nodeId, | nodeId, | ||||
| crawlResult, | |||||
| setCrawlResult, | |||||
| step, | |||||
| setStep, | |||||
| checkedCrawlResult, | checkedCrawlResult, | ||||
| headerInfo, | headerInfo, | ||||
| onCheckedCrawlResultChange, | onCheckedCrawlResultChange, | ||||
| previewIndex, | |||||
| onPreview, | onPreview, | ||||
| isInPipeline = false, | isInPipeline = false, | ||||
| }: CrawlerProps) => { | }: CrawlerProps) => { | ||||
| const { t } = useTranslation() | const { t } = useTranslation() | ||||
| const [step, setStep] = useState<Step>(Step.init) | |||||
| const [controlFoldOptions, setControlFoldOptions] = useState<number>(0) | const [controlFoldOptions, setControlFoldOptions] = useState<number>(0) | ||||
| const [totalNum, setTotalNum] = useState(0) | const [totalNum, setTotalNum] = useState(0) | ||||
| const [crawledNum, setCrawledNum] = useState(0) | const [crawledNum, setCrawledNum] = useState(0) | ||||
| }, !!pipelineId && !!nodeId) | }, !!pipelineId && !!nodeId) | ||||
| useEffect(() => { | useEffect(() => { | ||||
| if (step !== Step.init) | |||||
| if (step !== CrawlStep.init) | |||||
| setControlFoldOptions(Date.now()) | setControlFoldOptions(Date.now()) | ||||
| }, [step]) | }, [step]) | ||||
| const isInit = step === Step.init | |||||
| const isCrawlFinished = step === Step.finished | |||||
| const isRunning = step === Step.running | |||||
| const [crawlResult, setCrawlResult] = useState<{ | |||||
| data: CrawlResultItem[] | |||||
| time_consuming: number | string | |||||
| } | undefined>(undefined) | |||||
| const isInit = step === CrawlStep.init | |||||
| const isCrawlFinished = step === CrawlStep.finished | |||||
| const isRunning = step === CrawlStep.running | |||||
| const [crawlErrorMessage, setCrawlErrorMessage] = useState('') | const [crawlErrorMessage, setCrawlErrorMessage] = useState('') | ||||
| const showError = isCrawlFinished && crawlErrorMessage | const showError = isCrawlFinished && crawlErrorMessage | ||||
| : `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run` | : `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run` | ||||
| const handleRun = useCallback(async (value: Record<string, any>) => { | const handleRun = useCallback(async (value: Record<string, any>) => { | ||||
| setStep(Step.running) | |||||
| setStep(CrawlStep.running) | |||||
| ssePost( | ssePost( | ||||
| datasourceNodeRunURL, | datasourceNodeRunURL, | ||||
| { | { | ||||
| }, | }, | ||||
| onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => { | onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => { | ||||
| const { data: crawlData, time_consuming } = data | const { data: crawlData, time_consuming } = data | ||||
| setCrawlResult({ | |||||
| data: crawlData as CrawlResultItem[], | |||||
| const crawlResultData = { | |||||
| data: crawlData.map((item: any) => { | |||||
| const { content, ...rest } = item | |||||
| return { | |||||
| markdown: content || '', | |||||
| ...rest, | |||||
| } as CrawlResultItem | |||||
| }), | |||||
| time_consuming: time_consuming ?? 0, | time_consuming: time_consuming ?? 0, | ||||
| }) | |||||
| } | |||||
| setCrawlResult(crawlResultData) | |||||
| onCheckedCrawlResultChange(crawlData || []) // default select the crawl result | onCheckedCrawlResultChange(crawlData || []) // default select the crawl result | ||||
| setCrawlErrorMessage('') | setCrawlErrorMessage('') | ||||
| setStep(Step.finished) | |||||
| setStep(CrawlStep.finished) | |||||
| }, | }, | ||||
| onError: (message: string) => { | onError: (message: string) => { | ||||
| setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`)) | setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`)) | ||||
| setStep(Step.finished) | |||||
| setStep(CrawlStep.finished) | |||||
| }, | }, | ||||
| }, | }, | ||||
| ) | ) | ||||
| }, [datasourceNodeRunURL, onCheckedCrawlResultChange, t]) | |||||
| }, [datasourceNodeRunURL, onCheckedCrawlResultChange, setCrawlResult, setStep, t]) | |||||
| const handleSubmit = useCallback((value: Record<string, any>) => { | const handleSubmit = useCallback((value: Record<string, any>) => { | ||||
| handleRun(value) | handleRun(value) | ||||
| checkedList={checkedCrawlResult} | checkedList={checkedCrawlResult} | ||||
| onSelectedChange={onCheckedCrawlResultChange} | onSelectedChange={onCheckedCrawlResultChange} | ||||
| usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0} | usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0} | ||||
| previewIndex={previewIndex} | |||||
| onPreview={onPreview} | onPreview={onPreview} | ||||
| /> | /> | ||||
| )} | )} |
| 'use client' | 'use client' | ||||
| import React from 'react' | import React from 'react' | ||||
| import type { CrawlResultItem } from '@/models/datasets' | |||||
| import type { CrawlerProps } from './base/crawler' | |||||
| import Crawler from './base/crawler' | import Crawler from './base/crawler' | ||||
| type WebsiteCrawlProps = { | |||||
| nodeId: string | |||||
| checkedCrawlResult: CrawlResultItem[] | |||||
| onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void | |||||
| headerInfo: { | |||||
| title: string | |||||
| docTitle: string | |||||
| docLink: string | |||||
| } | |||||
| onPreview?: (payload: CrawlResultItem) => void | |||||
| isInPipeline?: boolean | |||||
| } | |||||
| type WebsiteCrawlProps = CrawlerProps | |||||
| const WebsiteCrawl = ({ | const WebsiteCrawl = ({ | ||||
| nodeId, | nodeId, | ||||
| crawlResult, | |||||
| setCrawlResult, | |||||
| step, | |||||
| setStep, | |||||
| checkedCrawlResult, | checkedCrawlResult, | ||||
| headerInfo, | headerInfo, | ||||
| onCheckedCrawlResultChange, | onCheckedCrawlResultChange, | ||||
| previewIndex, | |||||
| onPreview, | onPreview, | ||||
| isInPipeline, | isInPipeline, | ||||
| }: WebsiteCrawlProps) => { | }: WebsiteCrawlProps) => { | ||||
| return ( | return ( | ||||
| <Crawler | <Crawler | ||||
| nodeId={nodeId} | nodeId={nodeId} | ||||
| crawlResult={crawlResult} | |||||
| setCrawlResult={setCrawlResult} | |||||
| step={step} | |||||
| setStep={setStep} | |||||
| checkedCrawlResult={checkedCrawlResult} | checkedCrawlResult={checkedCrawlResult} | ||||
| headerInfo={headerInfo} | headerInfo={headerInfo} | ||||
| onCheckedCrawlResultChange={onCheckedCrawlResultChange} | onCheckedCrawlResultChange={onCheckedCrawlResultChange} | ||||
| previewIndex={previewIndex} | |||||
| onPreview={onPreview} | onPreview={onPreview} | ||||
| isInPipeline={isInPipeline} | isInPipeline={isInPipeline} | ||||
| /> | /> |
| import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' | import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' | ||||
| import { useCallback, useMemo, useState } from 'react' | import { useCallback, useMemo, useState } from 'react' | ||||
| import type { DatasourceType } from '@/models/pipeline' | import type { DatasourceType } from '@/models/pipeline' | ||||
| import type { CrawlResultItem, FileItem } from '@/models/datasets' | |||||
| import type { CrawlResult } from '@/models/datasets' | |||||
| import { type CrawlResultItem, CrawlStep, type FileItem } from '@/models/datasets' | |||||
| import produce from 'immer' | import produce from 'immer' | ||||
| import type { NotionPage } from '@/models/common' | import type { NotionPage } from '@/models/common' | ||||
| export const useWebsiteCrawl = () => { | export const useWebsiteCrawl = () => { | ||||
| const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([]) | const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([]) | ||||
| const [crawlResult, setCrawlResult] = useState<CrawlResult | undefined>() | |||||
| const [step, setStep] = useState<CrawlStep>(CrawlStep.init) | |||||
| return { | return { | ||||
| crawlResult, | |||||
| setCrawlResult, | |||||
| websitePages, | websitePages, | ||||
| setWebsitePages, | setWebsitePages, | ||||
| step, | |||||
| setStep, | |||||
| } | } | ||||
| } | } |
| updateOnlineDocuments, | updateOnlineDocuments, | ||||
| } = useOnlineDocuments() | } = useOnlineDocuments() | ||||
| const { | const { | ||||
| crawlResult, | |||||
| setCrawlResult, | |||||
| websitePages, | websitePages, | ||||
| setWebsitePages, | setWebsitePages, | ||||
| step, | |||||
| setStep, | |||||
| } = useWebsiteCrawl() | } = useWebsiteCrawl() | ||||
| const { handleRun } = useWorkflowRun() | const { handleRun } = useWorkflowRun() | ||||
| docTitle: datasource.docTitle || '', | docTitle: datasource.docTitle || '', | ||||
| docLink: datasource.docLink || '', | docLink: datasource.docLink || '', | ||||
| }} | }} | ||||
| crawlResult={crawlResult} | |||||
| setCrawlResult={setCrawlResult} | |||||
| step={step} | |||||
| setStep={setStep} | |||||
| onCheckedCrawlResultChange={setWebsitePages} | onCheckedCrawlResultChange={setWebsitePages} | ||||
| isInPipeline | isInPipeline | ||||
| /> | /> |
| source_url: string | source_url: string | ||||
| } | } | ||||
| export type CrawlResult = { | |||||
| data: CrawlResultItem[] | |||||
| time_consuming: number | string | |||||
| } | |||||
| export enum CrawlStep { | |||||
| init = 'init', | |||||
| running = 'running', | |||||
| finished = 'finished', | |||||
| } | |||||
| export type FileItem = { | export type FileItem = { | ||||
| fileID: string | fileID: string | ||||
| file: CustomFile | file: CustomFile |