You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.tsx 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. 'use client'
  2. import { useCallback, useMemo, useRef, useState } from 'react'
  3. import DataSourceOptions from './data-source-options'
  4. import type { CrawlResultItem, DocumentItem, CustomFile as File, FileIndexingEstimateResponse } from '@/models/datasets'
  5. import LocalFile from '@/app/components/rag-pipeline/components/panel/test-run/data-source/local-file'
  6. import { useProviderContextSelector } from '@/context/provider-context'
  7. import type { NotionPage } from '@/models/common'
  8. import OnlineDocuments from '@/app/components/rag-pipeline/components/panel/test-run/data-source/online-documents'
  9. import VectorSpaceFull from '@/app/components/billing/vector-space-full'
  10. import WebsiteCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl'
  11. import Actions from './data-source/actions'
  12. import { useTranslation } from 'react-i18next'
  13. import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
  14. import LeftHeader from './left-header'
  15. import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline'
  16. import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
  17. import Loading from '@/app/components/base/loading'
  18. import type { Node } from '@/app/components/workflow/types'
  19. import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
  20. import FilePreview from './preview/file-preview'
  21. import NotionPagePreview from './preview/notion-page-preview'
  22. import WebsitePreview from './preview/web-preview'
  23. import ProcessDocuments from './process-documents'
  24. import ChunkPreview from './preview/chunk-preview'
  25. import Processing from './processing'
  26. import type { InitialDocumentDetail, PublishedPipelineRunPreviewResponse, PublishedPipelineRunResponse } from '@/models/pipeline'
  27. import { DatasourceType } from '@/models/pipeline'
  28. import { TransferMethod } from '@/types/app'
  29. import { useAddDocumentsSteps, useLocalFile, useOnlineDocuments, useWebsiteCrawl } from './hooks'
  30. const CreateFormPipeline = () => {
  31. const { t } = useTranslation()
  32. const plan = useProviderContextSelector(state => state.plan)
  33. const enableBilling = useProviderContextSelector(state => state.enableBilling)
  34. const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
  35. const [datasource, setDatasource] = useState<Datasource>()
  36. const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
  37. const [batchId, setBatchId] = useState('')
  38. const [documents, setDocuments] = useState<InitialDocumentDetail[]>([])
  39. const isPreview = useRef(false)
  40. const formRef = useRef<any>(null)
  41. const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '')
  42. const {
  43. steps,
  44. currentStep,
  45. handleNextStep,
  46. handleBackStep,
  47. } = useAddDocumentsSteps()
  48. const {
  49. fileList,
  50. previewFile,
  51. allFileLoaded,
  52. updateFile,
  53. updateFileList,
  54. currentFile,
  55. updateCurrentFile,
  56. hideFilePreview,
  57. } = useLocalFile()
  58. const {
  59. onlineDocuments,
  60. previewOnlineDocument,
  61. updateOnlineDocuments,
  62. currentDocuments,
  63. updateCurrentPage,
  64. hideOnlineDocumentPreview,
  65. } = useOnlineDocuments()
  66. const {
  67. websitePages,
  68. previewWebsitePage,
  69. updataCheckedCrawlResultChange,
  70. currentWebsite,
  71. updateCurrentWebsite,
  72. hideWebsitePreview,
  73. } = useWebsiteCrawl()
  74. const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
  75. const isShowVectorSpaceFull = allFileLoaded && isVectorSpaceFull && enableBilling
  76. const notSupportBatchUpload = enableBilling && plan.type === 'sandbox'
  77. const nextBtnDisabled = useMemo(() => {
  78. if (!datasource) return true
  79. if (datasource.type === DatasourceType.localFile)
  80. return isShowVectorSpaceFull || !fileList.length || fileList.some(file => !file.file.id)
  81. if (datasource.type === DatasourceType.onlineDocument)
  82. return isShowVectorSpaceFull || !onlineDocuments.length
  83. if (datasource.type === DatasourceType.websiteCrawl)
  84. return isShowVectorSpaceFull || !websitePages.length
  85. return false
  86. }, [datasource, isShowVectorSpaceFull, fileList, onlineDocuments.length, websitePages.length])
  87. const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
  88. const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
  89. if (!datasource)
  90. return
  91. const datasourceInfoList: Record<string, any>[] = []
  92. if (datasource.type === DatasourceType.localFile) {
  93. const { id, name, type, size, extension, mime_type } = previewFile.current as File
  94. const documentInfo = {
  95. related_id: id,
  96. name,
  97. type,
  98. size,
  99. extension,
  100. mime_type,
  101. url: '',
  102. transfer_method: TransferMethod.local_file,
  103. }
  104. datasourceInfoList.push(documentInfo)
  105. }
  106. if (datasource.type === DatasourceType.onlineDocument) {
  107. const { workspace_id, ...rest } = previewOnlineDocument.current
  108. const documentInfo = {
  109. workspace_id,
  110. page: rest,
  111. }
  112. datasourceInfoList.push(documentInfo)
  113. }
  114. if (datasource.type === DatasourceType.websiteCrawl)
  115. datasourceInfoList.push(previewWebsitePage.current)
  116. await runPublishedPipeline({
  117. pipeline_id: pipelineId!,
  118. inputs: data,
  119. start_node_id: datasource.nodeId,
  120. datasource_type: datasource.type,
  121. datasource_info_list: datasourceInfoList,
  122. is_preview: true,
  123. }, {
  124. onSuccess: (res) => {
  125. setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs)
  126. },
  127. })
  128. }, [datasource, pipelineId, previewFile, previewOnlineDocument, previewWebsitePage, runPublishedPipeline])
  129. const handleProcess = useCallback(async (data: Record<string, any>) => {
  130. if (!datasource)
  131. return
  132. const datasourceInfoList: Record<string, any>[] = []
  133. if (datasource.type === DatasourceType.localFile) {
  134. fileList.forEach((file) => {
  135. const { id, name, type, size, extension, mime_type } = file.file
  136. const documentInfo = {
  137. related_id: id,
  138. name,
  139. type,
  140. size,
  141. extension,
  142. mime_type,
  143. url: '',
  144. transfer_method: TransferMethod.local_file,
  145. }
  146. datasourceInfoList.push(documentInfo)
  147. })
  148. }
  149. if (datasource.type === DatasourceType.onlineDocument) {
  150. onlineDocuments.forEach((page) => {
  151. const { workspace_id, ...rest } = page
  152. const documentInfo = {
  153. workspace_id,
  154. page: rest,
  155. }
  156. datasourceInfoList.push(documentInfo)
  157. })
  158. }
  159. if (datasource.type === DatasourceType.websiteCrawl) {
  160. websitePages.forEach((websitePage) => {
  161. datasourceInfoList.push(websitePage)
  162. })
  163. }
  164. await runPublishedPipeline({
  165. pipeline_id: pipelineId!,
  166. inputs: data,
  167. start_node_id: datasource.nodeId,
  168. datasource_type: datasource.type,
  169. datasource_info_list: datasourceInfoList,
  170. is_preview: false,
  171. }, {
  172. onSuccess: (res) => {
  173. setBatchId((res as PublishedPipelineRunResponse).batch || '')
  174. setDocuments((res as PublishedPipelineRunResponse).documents || [])
  175. handleNextStep()
  176. },
  177. })
  178. }, [datasource, fileList, handleNextStep, onlineDocuments, pipelineId, runPublishedPipeline, websitePages])
  179. const onClickProcess = useCallback(() => {
  180. isPreview.current = false
  181. formRef.current?.submit()
  182. }, [])
  183. const onClickPreview = useCallback(() => {
  184. isPreview.current = true
  185. formRef.current?.submit()
  186. }, [])
  187. const handleSubmit = useCallback((data: Record<string, any>) => {
  188. isPreview.current ? handlePreviewChunks(data) : handleProcess(data)
  189. }, [handlePreviewChunks, handleProcess])
  190. const handlePreviewFileChange = useCallback((file: DocumentItem) => {
  191. previewFile.current = file
  192. onClickPreview()
  193. }, [onClickPreview, previewFile])
  194. const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
  195. previewOnlineDocument.current = page
  196. onClickPreview()
  197. }, [onClickPreview, previewOnlineDocument])
  198. const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
  199. previewWebsitePage.current = website
  200. onClickPreview()
  201. }, [onClickPreview, previewWebsitePage])
  202. if (isFetchingPipelineInfo) {
  203. return (
  204. <Loading type='app' />
  205. )
  206. }
  207. return (
  208. <div
  209. className='relative flex h-[calc(100vh-56px)] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
  210. >
  211. <div className='flex h-full min-w-[760px] flex-1 flex-col px-14'>
  212. <LeftHeader
  213. steps={steps}
  214. title={t('datasetPipeline.addDocuments.title')}
  215. currentStep={currentStep}
  216. />
  217. <div className='grow overflow-y-auto'>
  218. {
  219. currentStep === 1 && (
  220. <div className='flex flex-col gap-y-5 pt-4'>
  221. <DataSourceOptions
  222. datasourceNodeId={datasource?.nodeId || ''}
  223. onSelect={setDatasource}
  224. pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
  225. />
  226. {datasource?.type === DatasourceType.localFile && (
  227. <LocalFile
  228. files={fileList}
  229. allowedExtensions={datasource?.fileExtensions || []}
  230. updateFile={updateFile}
  231. updateFileList={updateFileList}
  232. onPreview={updateCurrentFile}
  233. notSupportBatchUpload={notSupportBatchUpload}
  234. />
  235. )}
  236. {datasource?.type === DatasourceType.onlineDocument && (
  237. <OnlineDocuments
  238. nodeId={datasource?.nodeId || ''}
  239. headerInfo={{
  240. title: datasource.description,
  241. docTitle: datasource.docTitle || '',
  242. docLink: datasource.docLink || '',
  243. }}
  244. onlineDocuments={onlineDocuments}
  245. updateOnlineDocuments={updateOnlineDocuments}
  246. canPreview
  247. onPreview={updateCurrentPage}
  248. />
  249. )}
  250. {datasource?.type === DatasourceType.websiteCrawl && (
  251. <WebsiteCrawl
  252. nodeId={datasource?.nodeId || ''}
  253. headerInfo={{
  254. title: datasource.description,
  255. docTitle: datasource.docTitle || '',
  256. docLink: datasource.docLink || '',
  257. }}
  258. checkedCrawlResult={websitePages}
  259. onCheckedCrawlResultChange={updataCheckedCrawlResultChange}
  260. onPreview={updateCurrentWebsite}
  261. />
  262. )}
  263. {isShowVectorSpaceFull && (
  264. <VectorSpaceFull />
  265. )}
  266. <Actions disabled={nextBtnDisabled} handleNextStep={handleNextStep} />
  267. </div>
  268. )
  269. }
  270. {
  271. currentStep === 2 && (
  272. <ProcessDocuments
  273. ref={formRef}
  274. dataSourceNodeId={datasource?.nodeId || ''}
  275. onProcess={onClickProcess}
  276. onPreview={onClickPreview}
  277. onSubmit={handleSubmit}
  278. onBack={handleBackStep}
  279. />
  280. )
  281. }
  282. {
  283. currentStep === 3 && (
  284. <Processing
  285. batchId={batchId}
  286. documents={documents}
  287. />
  288. )
  289. }
  290. </div>
  291. </div>
  292. {/* Preview */}
  293. {
  294. currentStep === 1 && (
  295. <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
  296. {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
  297. {currentDocuments && <NotionPagePreview currentPage={currentDocuments} hidePreview={hideOnlineDocumentPreview} />}
  298. {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
  299. </div>
  300. )
  301. }
  302. {
  303. currentStep === 2 && (
  304. <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
  305. <ChunkPreview
  306. dataSourceType={datasource!.type}
  307. files={fileList.map(file => file.file)}
  308. onlineDocuments={onlineDocuments}
  309. websitePages={websitePages}
  310. isIdle={isIdle}
  311. isPending={isPending && isPreview.current}
  312. estimateData={estimateData}
  313. onPreview={onClickPreview}
  314. handlePreviewFileChange={handlePreviewFileChange}
  315. handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
  316. handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
  317. />
  318. </div>
  319. )
  320. }
  321. </div>
  322. )
  323. }
  324. export default CreateFormPipeline