You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.tsx 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. 'use client'
  2. import { useCallback, useMemo, useRef, useState } from 'react'
  3. import DataSourceOptions from './data-source-options'
  4. import type { CrawlResultItem, DocumentItem, CustomFile as File, FileIndexingEstimateResponse } from '@/models/datasets'
  5. import LocalFile from '@/app/components/datasets/documents/create-from-pipeline/data-source/local-file'
  6. import { useProviderContextSelector } from '@/context/provider-context'
  7. import type { NotionPage } from '@/models/common'
  8. import OnlineDocuments from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-documents'
  9. import VectorSpaceFull from '@/app/components/billing/vector-space-full'
  10. import WebsiteCrawl from '@/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl'
  11. import OnlineDrive from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-drive'
  12. import Actions from './actions'
  13. import { useTranslation } from 'react-i18next'
  14. import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
  15. import LeftHeader from './left-header'
  16. import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline'
  17. import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
  18. import Loading from '@/app/components/base/loading'
  19. import type { Node } from '@/app/components/workflow/types'
  20. import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
  21. import FilePreview from './preview/file-preview'
  22. import OnlineDocumentPreview from './preview/online-document-preview'
  23. import WebsitePreview from './preview/web-preview'
  24. import ProcessDocuments from './process-documents'
  25. import ChunkPreview from './preview/chunk-preview'
  26. import Processing from './processing'
  27. import type {
  28. InitialDocumentDetail,
  29. OnlineDriveFile,
  30. PublishedPipelineRunPreviewResponse,
  31. PublishedPipelineRunResponse,
  32. } from '@/models/pipeline'
  33. import { DatasourceType } from '@/models/pipeline'
  34. import { TransferMethod } from '@/types/app'
  35. import { useAddDocumentsSteps, useLocalFile, useOnlineDocument, useOnlineDrive, useWebsiteCrawl } from './hooks'
  36. import DataSourceProvider from './data-source/store/provider'
  37. import { useDataSourceStore } from './data-source/store'
  38. import { useFileUploadConfig } from '@/service/use-common'
  39. const CreateFormPipeline = () => {
  40. const { t } = useTranslation()
  41. const plan = useProviderContextSelector(state => state.plan)
  42. const enableBilling = useProviderContextSelector(state => state.enableBilling)
  43. const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
  44. const [datasource, setDatasource] = useState<Datasource>()
  45. const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
  46. const [batchId, setBatchId] = useState('')
  47. const [documents, setDocuments] = useState<InitialDocumentDetail[]>([])
  48. const dataSourceStore = useDataSourceStore()
  49. const isPreview = useRef(false)
  50. const formRef = useRef<any>(null)
  51. const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '')
  52. const { data: fileUploadConfigResponse } = useFileUploadConfig()
  53. const {
  54. steps,
  55. currentStep,
  56. handleNextStep,
  57. handleBackStep,
  58. } = useAddDocumentsSteps()
  59. const {
  60. localFileList,
  61. allFileLoaded,
  62. currentLocalFile,
  63. hidePreviewLocalFile,
  64. } = useLocalFile()
  65. const {
  66. currentWorkspace,
  67. onlineDocuments,
  68. currentDocument,
  69. PagesMapAndSelectedPagesId,
  70. hidePreviewOnlineDocument,
  71. clearOnlineDocumentData,
  72. } = useOnlineDocument()
  73. const {
  74. websitePages,
  75. currentWebsite,
  76. hideWebsitePreview,
  77. clearWebsiteCrawlData,
  78. } = useWebsiteCrawl()
  79. const {
  80. onlineDriveFileList,
  81. selectedFileIds,
  82. selectedOnlineDriveFileList,
  83. clearOnlineDriveData,
  84. } = useOnlineDrive()
  85. const datasourceType = useMemo(() => datasource?.nodeData.provider_type, [datasource])
  86. const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
  87. const isShowVectorSpaceFull = useMemo(() => {
  88. if (!datasource)
  89. return false
  90. if (datasourceType === DatasourceType.localFile)
  91. return allFileLoaded && isVectorSpaceFull && enableBilling
  92. if (datasourceType === DatasourceType.onlineDocument)
  93. return onlineDocuments.length > 0 && isVectorSpaceFull && enableBilling
  94. if (datasourceType === DatasourceType.websiteCrawl)
  95. return websitePages.length > 0 && isVectorSpaceFull && enableBilling
  96. if (datasourceType === DatasourceType.onlineDrive)
  97. return onlineDriveFileList.length > 0 && isVectorSpaceFull && enableBilling
  98. return false
  99. }, [allFileLoaded, datasource, datasourceType, enableBilling, isVectorSpaceFull, onlineDocuments.length, onlineDriveFileList.length, websitePages.length])
  100. const notSupportBatchUpload = enableBilling && plan.type === 'sandbox'
  101. const nextBtnDisabled = useMemo(() => {
  102. if (!datasource) return true
  103. if (datasourceType === DatasourceType.localFile)
  104. return isShowVectorSpaceFull || !localFileList.length || !allFileLoaded
  105. if (datasourceType === DatasourceType.onlineDocument)
  106. return isShowVectorSpaceFull || !onlineDocuments.length
  107. if (datasourceType === DatasourceType.websiteCrawl)
  108. return isShowVectorSpaceFull || !websitePages.length
  109. if (datasourceType === DatasourceType.onlineDrive)
  110. return isShowVectorSpaceFull || !selectedFileIds.length
  111. return false
  112. }, [datasource, datasourceType, isShowVectorSpaceFull, localFileList.length, allFileLoaded, onlineDocuments.length, websitePages.length, selectedFileIds.length])
  113. const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? {
  114. file_size_limit: 15,
  115. batch_count_limit: 5,
  116. }, [fileUploadConfigResponse])
  117. const showSelect = useMemo(() => {
  118. if (datasourceType === DatasourceType.onlineDocument) {
  119. const pagesCount = currentWorkspace?.pages.length ?? 0
  120. return pagesCount > 0
  121. }
  122. if (datasourceType === DatasourceType.onlineDrive) {
  123. const isBucketList = onlineDriveFileList.some(file => file.type === 'bucket')
  124. return !isBucketList && onlineDriveFileList.filter((item) => {
  125. return item.type !== 'bucket'
  126. }).length > 0
  127. }
  128. }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList])
  129. const totalOptions = useMemo(() => {
  130. if (datasourceType === DatasourceType.onlineDocument)
  131. return currentWorkspace?.pages.length
  132. if (datasourceType === DatasourceType.onlineDrive) {
  133. return onlineDriveFileList.filter((item) => {
  134. return item.type !== 'bucket'
  135. }).length
  136. }
  137. }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList])
  138. const selectedOptions = useMemo(() => {
  139. if (datasourceType === DatasourceType.onlineDocument)
  140. return onlineDocuments.length
  141. if (datasourceType === DatasourceType.onlineDrive)
  142. return selectedFileIds.length
  143. }, [datasourceType, onlineDocuments.length, selectedFileIds.length])
  144. const tip = useMemo(() => {
  145. if (datasourceType === DatasourceType.onlineDocument)
  146. return t('datasetPipeline.addDocuments.selectOnlineDocumentTip', { count: 50 })
  147. if (datasourceType === DatasourceType.onlineDrive) {
  148. return t('datasetPipeline.addDocuments.selectOnlineDriveTip', {
  149. count: fileUploadConfig.batch_count_limit,
  150. fileSize: fileUploadConfig.file_size_limit,
  151. })
  152. }
  153. return ''
  154. }, [datasourceType, fileUploadConfig.batch_count_limit, fileUploadConfig.file_size_limit, t])
  155. const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
  156. const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
  157. if (!datasource)
  158. return
  159. const {
  160. previewLocalFileRef,
  161. previewOnlineDocumentRef,
  162. previewWebsitePageRef,
  163. previewOnlineDriveFileRef,
  164. currentCredentialId,
  165. } = dataSourceStore.getState()
  166. const datasourceInfoList: Record<string, any>[] = []
  167. if (datasourceType === DatasourceType.localFile) {
  168. const { id, name, type, size, extension, mime_type } = previewLocalFileRef.current as File
  169. const documentInfo = {
  170. related_id: id,
  171. name,
  172. type,
  173. size,
  174. extension,
  175. mime_type,
  176. url: '',
  177. transfer_method: TransferMethod.local_file,
  178. credential_id: currentCredentialId,
  179. }
  180. datasourceInfoList.push(documentInfo)
  181. }
  182. if (datasourceType === DatasourceType.onlineDocument) {
  183. const { workspace_id, ...rest } = previewOnlineDocumentRef.current!
  184. const documentInfo = {
  185. workspace_id,
  186. page: rest,
  187. credential_id: currentCredentialId,
  188. }
  189. datasourceInfoList.push(documentInfo)
  190. }
  191. if (datasourceType === DatasourceType.websiteCrawl) {
  192. datasourceInfoList.push({
  193. ...previewWebsitePageRef.current!,
  194. credential_id: currentCredentialId,
  195. })
  196. }
  197. if (datasourceType === DatasourceType.onlineDrive) {
  198. const { bucket } = dataSourceStore.getState()
  199. const { id, type, name } = previewOnlineDriveFileRef.current!
  200. datasourceInfoList.push({
  201. bucket,
  202. id,
  203. name,
  204. type,
  205. credential_id: currentCredentialId,
  206. })
  207. }
  208. await runPublishedPipeline({
  209. pipeline_id: pipelineId!,
  210. inputs: data,
  211. start_node_id: datasource.nodeId,
  212. datasource_type: datasourceType as DatasourceType,
  213. datasource_info_list: datasourceInfoList,
  214. is_preview: true,
  215. }, {
  216. onSuccess: (res) => {
  217. setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs)
  218. },
  219. })
  220. }, [datasource, datasourceType, runPublishedPipeline, pipelineId, dataSourceStore])
  221. const handleProcess = useCallback(async (data: Record<string, any>) => {
  222. if (!datasource)
  223. return
  224. const { currentCredentialId } = dataSourceStore.getState()
  225. const datasourceInfoList: Record<string, any>[] = []
  226. if (datasourceType === DatasourceType.localFile) {
  227. const {
  228. localFileList,
  229. } = dataSourceStore.getState()
  230. localFileList.forEach((file) => {
  231. const { id, name, type, size, extension, mime_type } = file.file
  232. const documentInfo = {
  233. related_id: id,
  234. name,
  235. type,
  236. size,
  237. extension,
  238. mime_type,
  239. url: '',
  240. transfer_method: TransferMethod.local_file,
  241. credential_id: currentCredentialId,
  242. }
  243. datasourceInfoList.push(documentInfo)
  244. })
  245. }
  246. if (datasourceType === DatasourceType.onlineDocument) {
  247. const {
  248. onlineDocuments,
  249. } = dataSourceStore.getState()
  250. onlineDocuments.forEach((page) => {
  251. const { workspace_id, ...rest } = page
  252. const documentInfo = {
  253. workspace_id,
  254. page: rest,
  255. credential_id: currentCredentialId,
  256. }
  257. datasourceInfoList.push(documentInfo)
  258. })
  259. }
  260. if (datasourceType === DatasourceType.websiteCrawl) {
  261. const {
  262. websitePages,
  263. } = dataSourceStore.getState()
  264. websitePages.forEach((websitePage) => {
  265. datasourceInfoList.push({
  266. ...websitePage,
  267. credential_id: currentCredentialId,
  268. })
  269. })
  270. }
  271. if (datasourceType === DatasourceType.onlineDrive) {
  272. const {
  273. bucket,
  274. selectedFileIds,
  275. onlineDriveFileList,
  276. } = dataSourceStore.getState()
  277. selectedFileIds.forEach((id) => {
  278. const file = onlineDriveFileList.find(file => file.id === id)
  279. datasourceInfoList.push({
  280. bucket,
  281. id: file?.id,
  282. name: file?.name,
  283. type: file?.type,
  284. credential_id: currentCredentialId,
  285. })
  286. })
  287. }
  288. await runPublishedPipeline({
  289. pipeline_id: pipelineId!,
  290. inputs: data,
  291. start_node_id: datasource.nodeId,
  292. datasource_type: datasourceType as DatasourceType,
  293. datasource_info_list: datasourceInfoList,
  294. is_preview: false,
  295. }, {
  296. onSuccess: (res) => {
  297. setBatchId((res as PublishedPipelineRunResponse).batch || '')
  298. setDocuments((res as PublishedPipelineRunResponse).documents || [])
  299. handleNextStep()
  300. },
  301. })
  302. }, [dataSourceStore, datasource, datasourceType, handleNextStep, pipelineId, runPublishedPipeline])
  303. const onClickProcess = useCallback(() => {
  304. isPreview.current = false
  305. formRef.current?.submit()
  306. }, [])
  307. const onClickPreview = useCallback(() => {
  308. isPreview.current = true
  309. formRef.current?.submit()
  310. }, [])
  311. const handleSubmit = useCallback((data: Record<string, any>) => {
  312. isPreview.current ? handlePreviewChunks(data) : handleProcess(data)
  313. }, [handlePreviewChunks, handleProcess])
  314. const handlePreviewFileChange = useCallback((file: DocumentItem) => {
  315. const { previewLocalFileRef } = dataSourceStore.getState()
  316. previewLocalFileRef.current = file
  317. onClickPreview()
  318. }, [dataSourceStore, onClickPreview])
  319. const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
  320. const { previewOnlineDocumentRef } = dataSourceStore.getState()
  321. previewOnlineDocumentRef.current = page
  322. onClickPreview()
  323. }, [dataSourceStore, onClickPreview])
  324. const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
  325. const { previewWebsitePageRef } = dataSourceStore.getState()
  326. previewWebsitePageRef.current = website
  327. onClickPreview()
  328. }, [dataSourceStore, onClickPreview])
  329. const handlePreviewOnlineDriveFileChange = useCallback((file: OnlineDriveFile) => {
  330. const { previewOnlineDriveFileRef } = dataSourceStore.getState()
  331. previewOnlineDriveFileRef.current = file
  332. onClickPreview()
  333. }, [dataSourceStore, onClickPreview])
  334. const handleSelectAll = useCallback(() => {
  335. const {
  336. onlineDocuments,
  337. onlineDriveFileList,
  338. selectedFileIds,
  339. setOnlineDocuments,
  340. setSelectedFileIds,
  341. setSelectedPagesId,
  342. } = dataSourceStore.getState()
  343. if (datasourceType === DatasourceType.onlineDocument) {
  344. const allIds = currentWorkspace?.pages.map(page => page.page_id) || []
  345. if (onlineDocuments.length < allIds.length) {
  346. const selectedPages = Array.from(allIds).map(pageId => PagesMapAndSelectedPagesId[pageId])
  347. setOnlineDocuments(selectedPages)
  348. setSelectedPagesId(new Set(allIds))
  349. }
  350. else {
  351. setOnlineDocuments([])
  352. setSelectedPagesId(new Set())
  353. }
  354. }
  355. if (datasourceType === DatasourceType.onlineDrive) {
  356. const allKeys = onlineDriveFileList.filter((item) => {
  357. return item.type !== 'bucket'
  358. }).map(file => file.id)
  359. if (selectedFileIds.length < allKeys.length)
  360. setSelectedFileIds(allKeys)
  361. else
  362. setSelectedFileIds([])
  363. }
  364. }, [PagesMapAndSelectedPagesId, currentWorkspace?.pages, dataSourceStore, datasourceType])
  365. const clearDataSourceData = useCallback((dataSource: Datasource) => {
  366. if (dataSource.nodeData.provider_type === DatasourceType.onlineDocument)
  367. clearOnlineDocumentData()
  368. else if (dataSource.nodeData.provider_type === DatasourceType.websiteCrawl)
  369. clearWebsiteCrawlData()
  370. else if (dataSource.nodeData.provider_type === DatasourceType.onlineDrive)
  371. clearOnlineDriveData()
  372. }, [])
  373. const handleSwitchDataSource = useCallback((dataSource: Datasource) => {
  374. const {
  375. setCurrentCredentialId,
  376. currentNodeIdRef,
  377. } = dataSourceStore.getState()
  378. clearDataSourceData(dataSource)
  379. setCurrentCredentialId('')
  380. currentNodeIdRef.current = dataSource.nodeId
  381. setDatasource(dataSource)
  382. }, [dataSourceStore])
  383. const handleCredentialChange = useCallback((credentialId: string) => {
  384. const { setCurrentCredentialId } = dataSourceStore.getState()
  385. clearDataSourceData(datasource!)
  386. setCurrentCredentialId(credentialId)
  387. }, [dataSourceStore, datasource])
  388. if (isFetchingPipelineInfo) {
  389. return (
  390. <Loading type='app' />
  391. )
  392. }
  393. return (
  394. <div
  395. className='relative flex h-[calc(100vh-56px)] w-full min-w-[1024px] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
  396. >
  397. <div className='h-full min-w-0 flex-1'>
  398. <div className='flex h-full flex-col px-14'>
  399. <LeftHeader
  400. steps={steps}
  401. title={t('datasetPipeline.addDocuments.title')}
  402. currentStep={currentStep}
  403. />
  404. <div className='grow overflow-y-auto'>
  405. {
  406. currentStep === 1 && (
  407. <div className='flex flex-col gap-y-5 pt-4'>
  408. <DataSourceOptions
  409. datasourceNodeId={datasource?.nodeId || ''}
  410. onSelect={handleSwitchDataSource}
  411. pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
  412. />
  413. {datasourceType === DatasourceType.localFile && (
  414. <LocalFile
  415. allowedExtensions={datasource!.nodeData.fileExtensions || []}
  416. notSupportBatchUpload={notSupportBatchUpload}
  417. />
  418. )}
  419. {datasourceType === DatasourceType.onlineDocument && (
  420. <OnlineDocuments
  421. nodeId={datasource!.nodeId}
  422. nodeData={datasource!.nodeData}
  423. onCredentialChange={handleCredentialChange}
  424. />
  425. )}
  426. {datasourceType === DatasourceType.websiteCrawl && (
  427. <WebsiteCrawl
  428. nodeId={datasource!.nodeId}
  429. nodeData={datasource!.nodeData}
  430. onCredentialChange={handleCredentialChange}
  431. />
  432. )}
  433. {datasourceType === DatasourceType.onlineDrive && (
  434. <OnlineDrive
  435. nodeId={datasource!.nodeId}
  436. nodeData={datasource!.nodeData}
  437. onCredentialChange={handleCredentialChange}
  438. />
  439. )}
  440. {isShowVectorSpaceFull && (
  441. <VectorSpaceFull />
  442. )}
  443. <Actions
  444. showSelect={showSelect}
  445. totalOptions={totalOptions}
  446. selectedOptions={selectedOptions}
  447. onSelectAll={handleSelectAll}
  448. disabled={nextBtnDisabled}
  449. handleNextStep={handleNextStep}
  450. tip={tip}
  451. />
  452. </div>
  453. )
  454. }
  455. {
  456. currentStep === 2 && (
  457. <ProcessDocuments
  458. ref={formRef}
  459. dataSourceNodeId={datasource!.nodeId}
  460. isRunning={isPending}
  461. onProcess={onClickProcess}
  462. onPreview={onClickPreview}
  463. onSubmit={handleSubmit}
  464. onBack={handleBackStep}
  465. />
  466. )
  467. }
  468. {
  469. currentStep === 3 && (
  470. <Processing
  471. batchId={batchId}
  472. documents={documents}
  473. />
  474. )
  475. }
  476. </div>
  477. </div>
  478. </div>
  479. {/* Preview */}
  480. {
  481. currentStep === 1 && (
  482. <div className='h-full min-w-0 flex-1'>
  483. <div className='flex h-full flex-col pl-2 pt-2'>
  484. {currentLocalFile && (
  485. <FilePreview
  486. file={currentLocalFile}
  487. hidePreview={hidePreviewLocalFile}
  488. />
  489. )}
  490. {currentDocument && (
  491. <OnlineDocumentPreview
  492. datasourceNodeId={datasource!.nodeId}
  493. currentPage={currentDocument}
  494. hidePreview={hidePreviewOnlineDocument}
  495. />
  496. )}
  497. {currentWebsite && (
  498. <WebsitePreview
  499. currentWebsite={currentWebsite}
  500. hidePreview={hideWebsitePreview}
  501. />
  502. )}
  503. </div>
  504. </div>
  505. )
  506. }
  507. {
  508. currentStep === 2 && (
  509. <div className='h-full min-w-0 flex-1'>
  510. <div className='flex h-full flex-col pl-2 pt-2'>
  511. <ChunkPreview
  512. dataSourceType={datasourceType as DatasourceType}
  513. localFiles={localFileList.map(file => file.file)}
  514. onlineDocuments={onlineDocuments}
  515. websitePages={websitePages}
  516. onlineDriveFiles={selectedOnlineDriveFileList}
  517. isIdle={isIdle}
  518. isPending={isPending && isPreview.current}
  519. estimateData={estimateData}
  520. onPreview={onClickPreview}
  521. handlePreviewFileChange={handlePreviewFileChange}
  522. handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
  523. handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
  524. handlePreviewOnlineDriveFileChange={handlePreviewOnlineDriveFileChange}
  525. />
  526. </div>
  527. </div>
  528. )
  529. }
  530. </div>
  531. )
  532. }
  533. const CreateFormPipelineWrapper = () => {
  534. return (
  535. <DataSourceProvider>
  536. <CreateFormPipeline />
  537. </DataSourceProvider>
  538. )
  539. }
  540. export default CreateFormPipelineWrapper