You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.tsx 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. 'use client'
  2. import { useCallback, useMemo, useRef, useState } from 'react'
  3. import DataSourceOptions from './data-source-options'
  4. import type { CrawlResultItem, DocumentItem, CustomFile as File, FileIndexingEstimateResponse } from '@/models/datasets'
  5. import LocalFile from '@/app/components/datasets/documents/create-from-pipeline/data-source/local-file'
  6. import { useProviderContextSelector } from '@/context/provider-context'
  7. import type { NotionPage } from '@/models/common'
  8. import OnlineDocuments from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-documents'
  9. import VectorSpaceFull from '@/app/components/billing/vector-space-full'
  10. import WebsiteCrawl from '@/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl'
  11. import OnlineDrive from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-drive'
  12. import Actions from './actions'
  13. import { useTranslation } from 'react-i18next'
  14. import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
  15. import LeftHeader from './left-header'
  16. import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline'
  17. import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
  18. import Loading from '@/app/components/base/loading'
  19. import type { Node } from '@/app/components/workflow/types'
  20. import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
  21. import FilePreview from './preview/file-preview'
  22. import OnlineDocumentPreview from './preview/online-document-preview'
  23. import WebsitePreview from './preview/web-preview'
  24. import ProcessDocuments from './process-documents'
  25. import ChunkPreview from './preview/chunk-preview'
  26. import Processing from './processing'
  27. import type { InitialDocumentDetail, OnlineDriveFile, PublishedPipelineRunPreviewResponse, PublishedPipelineRunResponse } from '@/models/pipeline'
  28. import { DatasourceType } from '@/models/pipeline'
  29. import { TransferMethod } from '@/types/app'
  30. import { useAddDocumentsSteps, useLocalFile, useOnlineDocuments, useOnlineDrive, useWebsiteCrawl } from './hooks'
  31. import DataSourceProvider from './data-source/store/provider'
  32. import { useDataSourceStore } from './data-source/store'
  33. import { useFileUploadConfig } from '@/service/use-common'
  34. const CreateFormPipeline = () => {
  35. const { t } = useTranslation()
  36. const plan = useProviderContextSelector(state => state.plan)
  37. const enableBilling = useProviderContextSelector(state => state.enableBilling)
  38. const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
  39. const [datasource, setDatasource] = useState<Datasource>()
  40. const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
  41. const [batchId, setBatchId] = useState('')
  42. const [documents, setDocuments] = useState<InitialDocumentDetail[]>([])
  43. const dataSourceStore = useDataSourceStore()
  44. const isPreview = useRef(false)
  45. const formRef = useRef<any>(null)
  46. const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '')
  47. const { data: fileUploadConfigResponse } = useFileUploadConfig()
  48. const {
  49. steps,
  50. currentStep,
  51. handleNextStep,
  52. handleBackStep,
  53. } = useAddDocumentsSteps()
  54. const {
  55. fileList,
  56. allFileLoaded,
  57. currentLocalFile,
  58. hidePreviewLocalFile,
  59. } = useLocalFile()
  60. const {
  61. currentWorkspace,
  62. onlineDocuments,
  63. currentDocument,
  64. PagesMapAndSelectedPagesId,
  65. hidePreviewOnlineDocument,
  66. } = useOnlineDocuments()
  67. const {
  68. websitePages,
  69. currentWebsite,
  70. hideWebsitePreview,
  71. } = useWebsiteCrawl()
  72. const {
  73. fileList: onlineDriveFileList,
  74. selectedFileKeys,
  75. selectedOnlineDriveFileList,
  76. } = useOnlineDrive()
  77. const datasourceType = datasource?.nodeData.provider_type
  78. const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
  79. const isShowVectorSpaceFull = useMemo(() => {
  80. if (!datasource)
  81. return false
  82. if (datasourceType === DatasourceType.localFile)
  83. return allFileLoaded && isVectorSpaceFull && enableBilling
  84. if (datasourceType === DatasourceType.onlineDocument)
  85. return onlineDocuments.length > 0 && isVectorSpaceFull && enableBilling
  86. if (datasourceType === DatasourceType.websiteCrawl)
  87. return websitePages.length > 0 && isVectorSpaceFull && enableBilling
  88. if (datasourceType === DatasourceType.onlineDrive)
  89. return onlineDriveFileList.length > 0 && isVectorSpaceFull && enableBilling
  90. return false
  91. }, [allFileLoaded, datasource, datasourceType, enableBilling, isVectorSpaceFull, onlineDocuments.length, onlineDriveFileList.length, websitePages.length])
  92. const notSupportBatchUpload = enableBilling && plan.type === 'sandbox'
  93. const nextBtnDisabled = useMemo(() => {
  94. if (!datasource) return true
  95. if (datasourceType === DatasourceType.localFile)
  96. return isShowVectorSpaceFull || !fileList.length || !allFileLoaded
  97. if (datasourceType === DatasourceType.onlineDocument)
  98. return isShowVectorSpaceFull || !onlineDocuments.length
  99. if (datasourceType === DatasourceType.websiteCrawl)
  100. return isShowVectorSpaceFull || !websitePages.length
  101. if (datasourceType === DatasourceType.onlineDrive)
  102. return isShowVectorSpaceFull || !selectedFileKeys.length
  103. return false
  104. }, [datasource, datasourceType, isShowVectorSpaceFull, fileList.length, allFileLoaded, onlineDocuments.length, websitePages.length, selectedFileKeys.length])
  105. const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? {
  106. file_size_limit: 15,
  107. batch_count_limit: 5,
  108. }, [fileUploadConfigResponse])
  109. const showSelect = useMemo(() => {
  110. if (datasourceType === DatasourceType.onlineDocument) {
  111. const pagesCount = currentWorkspace?.pages.length ?? 0
  112. return pagesCount > 0
  113. }
  114. if (datasourceType === DatasourceType.onlineDrive) {
  115. const isBucketList = onlineDriveFileList.some(file => file.type === 'bucket')
  116. return !isBucketList && onlineDriveFileList.filter((item) => {
  117. return item.type !== 'bucket'
  118. }).length > 0
  119. }
  120. }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList])
  121. const totalOptions = useMemo(() => {
  122. if (datasourceType === DatasourceType.onlineDocument)
  123. return currentWorkspace?.pages.length
  124. if (datasourceType === DatasourceType.onlineDrive) {
  125. return onlineDriveFileList.filter((item) => {
  126. return item.type !== 'bucket'
  127. }).length
  128. }
  129. }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList])
  130. const selectedOptions = useMemo(() => {
  131. if (datasourceType === DatasourceType.onlineDocument)
  132. return onlineDocuments.length
  133. if (datasourceType === DatasourceType.onlineDrive)
  134. return selectedFileKeys.length
  135. }, [datasourceType, onlineDocuments.length, selectedFileKeys.length])
  136. const tip = useMemo(() => {
  137. if (datasourceType === DatasourceType.onlineDocument)
  138. return t('datasetPipeline.addDocuments.selectOnlineDocumentTip', { count: 50 })
  139. if (datasourceType === DatasourceType.onlineDrive) {
  140. return t('datasetPipeline.addDocuments.selectOnlineDriveTip', {
  141. count: fileUploadConfig.batch_count_limit,
  142. fileSize: fileUploadConfig.file_size_limit,
  143. })
  144. }
  145. return ''
  146. }, [datasourceType, fileUploadConfig.batch_count_limit, fileUploadConfig.file_size_limit, t])
  147. const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
  148. const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
  149. if (!datasource)
  150. return
  151. const {
  152. previewLocalFileRef,
  153. previewOnlineDocumentRef,
  154. previewWebsitePageRef,
  155. previewOnlineDriveFileRef,
  156. } = dataSourceStore.getState()
  157. const datasourceInfoList: Record<string, any>[] = []
  158. if (datasourceType === DatasourceType.localFile) {
  159. const { id, name, type, size, extension, mime_type } = previewLocalFileRef.current as File
  160. const documentInfo = {
  161. related_id: id,
  162. name,
  163. type,
  164. size,
  165. extension,
  166. mime_type,
  167. url: '',
  168. transfer_method: TransferMethod.local_file,
  169. }
  170. datasourceInfoList.push(documentInfo)
  171. }
  172. if (datasourceType === DatasourceType.onlineDocument) {
  173. const { workspace_id, ...rest } = previewOnlineDocumentRef.current!
  174. const documentInfo = {
  175. workspace_id,
  176. page: rest,
  177. }
  178. datasourceInfoList.push(documentInfo)
  179. }
  180. if (datasourceType === DatasourceType.websiteCrawl)
  181. datasourceInfoList.push(previewWebsitePageRef.current!)
  182. if (datasourceType === DatasourceType.onlineDrive) {
  183. const { bucket } = dataSourceStore.getState()
  184. const { key } = previewOnlineDriveFileRef.current!
  185. datasourceInfoList.push({
  186. bucket,
  187. key,
  188. })
  189. }
  190. await runPublishedPipeline({
  191. pipeline_id: pipelineId!,
  192. inputs: data,
  193. start_node_id: datasource.nodeId,
  194. datasource_type: datasourceType as DatasourceType,
  195. datasource_info_list: datasourceInfoList,
  196. is_preview: true,
  197. }, {
  198. onSuccess: (res) => {
  199. setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs)
  200. },
  201. })
  202. }, [datasource, datasourceType, runPublishedPipeline, pipelineId, dataSourceStore])
  203. const handleProcess = useCallback(async (data: Record<string, any>) => {
  204. if (!datasource)
  205. return
  206. const datasourceInfoList: Record<string, any>[] = []
  207. if (datasourceType === DatasourceType.localFile) {
  208. fileList.forEach((file) => {
  209. const { id, name, type, size, extension, mime_type } = file.file
  210. const documentInfo = {
  211. related_id: id,
  212. name,
  213. type,
  214. size,
  215. extension,
  216. mime_type,
  217. url: '',
  218. transfer_method: TransferMethod.local_file,
  219. }
  220. datasourceInfoList.push(documentInfo)
  221. })
  222. }
  223. if (datasourceType === DatasourceType.onlineDocument) {
  224. onlineDocuments.forEach((page) => {
  225. const { workspace_id, ...rest } = page
  226. const documentInfo = {
  227. workspace_id,
  228. page: rest,
  229. }
  230. datasourceInfoList.push(documentInfo)
  231. })
  232. }
  233. if (datasourceType === DatasourceType.websiteCrawl) {
  234. websitePages.forEach((websitePage) => {
  235. datasourceInfoList.push(websitePage)
  236. })
  237. }
  238. if (datasourceType === DatasourceType.onlineDrive) {
  239. if (datasourceType === DatasourceType.onlineDrive) {
  240. const { bucket } = dataSourceStore.getState()
  241. selectedFileKeys.forEach((key) => {
  242. datasourceInfoList.push({
  243. bucket,
  244. key,
  245. })
  246. })
  247. }
  248. }
  249. await runPublishedPipeline({
  250. pipeline_id: pipelineId!,
  251. inputs: data,
  252. start_node_id: datasource.nodeId,
  253. datasource_type: datasourceType as DatasourceType,
  254. datasource_info_list: datasourceInfoList,
  255. is_preview: false,
  256. }, {
  257. onSuccess: (res) => {
  258. setBatchId((res as PublishedPipelineRunResponse).batch || '')
  259. setDocuments((res as PublishedPipelineRunResponse).documents || [])
  260. handleNextStep()
  261. },
  262. })
  263. }, [dataSourceStore, datasource, datasourceType, fileList, handleNextStep, onlineDocuments, pipelineId, runPublishedPipeline, selectedFileKeys, websitePages])
  264. const onClickProcess = useCallback(() => {
  265. isPreview.current = false
  266. formRef.current?.submit()
  267. }, [])
  268. const onClickPreview = useCallback(() => {
  269. isPreview.current = true
  270. formRef.current?.submit()
  271. }, [])
  272. const handleSubmit = useCallback((data: Record<string, any>) => {
  273. isPreview.current ? handlePreviewChunks(data) : handleProcess(data)
  274. }, [handlePreviewChunks, handleProcess])
  275. const handlePreviewFileChange = useCallback((file: DocumentItem) => {
  276. const { previewLocalFileRef } = dataSourceStore.getState()
  277. previewLocalFileRef.current = file
  278. onClickPreview()
  279. }, [dataSourceStore, onClickPreview])
  280. const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
  281. const { previewOnlineDocumentRef } = dataSourceStore.getState()
  282. previewOnlineDocumentRef.current = page
  283. onClickPreview()
  284. }, [dataSourceStore, onClickPreview])
  285. const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
  286. const { previewWebsitePageRef } = dataSourceStore.getState()
  287. previewWebsitePageRef.current = website
  288. onClickPreview()
  289. }, [dataSourceStore, onClickPreview])
  290. const handlePreviewOnlineDriveFileChange = useCallback((file: OnlineDriveFile) => {
  291. const { previewOnlineDriveFileRef } = dataSourceStore.getState()
  292. previewOnlineDriveFileRef.current = file
  293. onClickPreview()
  294. }, [dataSourceStore, onClickPreview])
  295. const handleSelectAll = useCallback(() => {
  296. const {
  297. onlineDocuments,
  298. fileList: onlineDriveFileList,
  299. selectedFileKeys,
  300. setOnlineDocuments,
  301. setSelectedFileKeys,
  302. setSelectedPagesId,
  303. } = dataSourceStore.getState()
  304. if (datasourceType === DatasourceType.onlineDocument) {
  305. const allIds = currentWorkspace?.pages.map(page => page.page_id) || []
  306. if (onlineDocuments.length < allIds.length) {
  307. const selectedPages = Array.from(allIds).map(pageId => PagesMapAndSelectedPagesId[pageId])
  308. setOnlineDocuments(selectedPages)
  309. setSelectedPagesId(new Set(allIds))
  310. }
  311. else {
  312. setOnlineDocuments([])
  313. setSelectedPagesId(new Set())
  314. }
  315. }
  316. if (datasourceType === DatasourceType.onlineDrive) {
  317. const allKeys = onlineDriveFileList.filter((item) => {
  318. return item.type !== 'bucket'
  319. }).map(file => file.key)
  320. if (selectedFileKeys.length < allKeys.length)
  321. setSelectedFileKeys(allKeys)
  322. else
  323. setSelectedFileKeys([])
  324. }
  325. }, [PagesMapAndSelectedPagesId, currentWorkspace?.pages, dataSourceStore, datasourceType])
  326. if (isFetchingPipelineInfo) {
  327. return (
  328. <Loading type='app' />
  329. )
  330. }
  331. return (
  332. <div
  333. className='relative flex h-[calc(100vh-56px)] w-full min-w-[1024px] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
  334. >
  335. <div className='h-full min-w-0 flex-1'>
  336. <div className='flex h-full flex-col px-14'>
  337. <LeftHeader
  338. steps={steps}
  339. title={t('datasetPipeline.addDocuments.title')}
  340. currentStep={currentStep}
  341. />
  342. <div className='grow overflow-y-auto'>
  343. {
  344. currentStep === 1 && (
  345. <div className='flex flex-col gap-y-5 pt-4'>
  346. <DataSourceOptions
  347. datasourceNodeId={datasource?.nodeId || ''}
  348. onSelect={setDatasource}
  349. pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
  350. />
  351. {datasourceType === DatasourceType.localFile && (
  352. <LocalFile
  353. allowedExtensions={datasource!.nodeData.fileExtensions || []}
  354. notSupportBatchUpload={notSupportBatchUpload}
  355. />
  356. )}
  357. {datasourceType === DatasourceType.onlineDocument && (
  358. <OnlineDocuments
  359. nodeId={datasource!.nodeId}
  360. nodeData={datasource!.nodeData}
  361. />
  362. )}
  363. {datasourceType === DatasourceType.websiteCrawl && (
  364. <WebsiteCrawl
  365. nodeId={datasource!.nodeId}
  366. nodeData={datasource!.nodeData}
  367. />
  368. )}
  369. {datasourceType === DatasourceType.onlineDrive && (
  370. <OnlineDrive
  371. nodeId={datasource!.nodeId}
  372. nodeData={datasource!.nodeData}
  373. />
  374. )}
  375. {isShowVectorSpaceFull && (
  376. <VectorSpaceFull />
  377. )}
  378. <Actions
  379. showSelect={showSelect}
  380. totalOptions={totalOptions}
  381. selectedOptions={selectedOptions}
  382. onSelectAll={handleSelectAll}
  383. disabled={nextBtnDisabled}
  384. handleNextStep={handleNextStep}
  385. tip={tip}
  386. />
  387. </div>
  388. )
  389. }
  390. {
  391. currentStep === 2 && (
  392. <ProcessDocuments
  393. ref={formRef}
  394. dataSourceNodeId={datasource!.nodeId}
  395. isRunning={isPending}
  396. onProcess={onClickProcess}
  397. onPreview={onClickPreview}
  398. onSubmit={handleSubmit}
  399. onBack={handleBackStep}
  400. />
  401. )
  402. }
  403. {
  404. currentStep === 3 && (
  405. <Processing
  406. batchId={batchId}
  407. documents={documents}
  408. />
  409. )
  410. }
  411. </div>
  412. </div>
  413. </div>
  414. {/* Preview */}
  415. {
  416. currentStep === 1 && (
  417. <div className='h-full min-w-0 flex-1'>
  418. <div className='flex h-full flex-col pl-2 pt-2'>
  419. {currentLocalFile && (
  420. <FilePreview
  421. file={currentLocalFile}
  422. hidePreview={hidePreviewLocalFile}
  423. />
  424. )}
  425. {currentDocument && (
  426. <OnlineDocumentPreview
  427. datasourceNodeId={datasource!.nodeId}
  428. currentPage={currentDocument}
  429. hidePreview={hidePreviewOnlineDocument}
  430. />
  431. )}
  432. {currentWebsite && (
  433. <WebsitePreview
  434. currentWebsite={currentWebsite}
  435. hidePreview={hideWebsitePreview}
  436. />
  437. )}
  438. </div>
  439. </div>
  440. )
  441. }
  442. {
  443. currentStep === 2 && (
  444. <div className='h-full min-w-0 flex-1'>
  445. <div className='flex h-full flex-col pl-2 pt-2'>
  446. <ChunkPreview
  447. dataSourceType={datasourceType as DatasourceType}
  448. localFiles={fileList.map(file => file.file)}
  449. onlineDocuments={onlineDocuments}
  450. websitePages={websitePages}
  451. onlineDriveFiles={selectedOnlineDriveFileList}
  452. isIdle={isIdle}
  453. isPending={isPending && isPreview.current}
  454. estimateData={estimateData}
  455. onPreview={onClickPreview}
  456. handlePreviewFileChange={handlePreviewFileChange}
  457. handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
  458. handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
  459. handlePreviewOnlineDriveFileChange={handlePreviewOnlineDriveFileChange}
  460. />
  461. </div>
  462. </div>
  463. )
  464. }
  465. </div>
  466. )
  467. }
  468. const CreateFormPipelineWrapper = () => {
  469. return (
  470. <DataSourceProvider>
  471. <CreateFormPipeline />
  472. </DataSourceProvider>
  473. )
  474. }
  475. export default CreateFormPipelineWrapper