Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

index.tsx 7.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. 'use client'
  2. import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
  3. import { useTranslation } from 'react-i18next'
  4. import type { CrawlResultItem } from '@/models/datasets'
  5. import { CrawlStep } from '@/models/datasets'
  6. import Header from '@/app/components/datasets/create/website/base/header'
  7. import Options from './base/options'
  8. import Crawling from './base/crawling'
  9. import ErrorMessage from './base/error-message'
  10. import CrawledResult from './base/crawled-result'
  11. import {
  12. useDraftPipelinePreProcessingParams,
  13. usePublishedPipelinePreProcessingParams,
  14. } from '@/service/use-pipeline'
  15. import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
  16. import { DatasourceType } from '@/models/pipeline'
  17. import { ssePost } from '@/service/base'
  18. import type {
  19. DataSourceNodeCompletedResponse,
  20. DataSourceNodeErrorResponse,
  21. DataSourceNodeProcessingResponse,
  22. } from '@/types/pipeline'
  23. import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
  24. import { useDataSourceStore, useDataSourceStoreWithSelector } from '../store'
  25. import { useShallow } from 'zustand/react/shallow'
  26. const I18N_PREFIX = 'datasetCreation.stepOne.website'
  27. export type WebsiteCrawlProps = {
  28. nodeId: string
  29. nodeData: DataSourceNodeType
  30. isInPipeline?: boolean
  31. }
  32. const WebsiteCrawl = ({
  33. nodeId,
  34. nodeData,
  35. isInPipeline = false,
  36. }: WebsiteCrawlProps) => {
  37. const { t } = useTranslation()
  38. const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
  39. const [totalNum, setTotalNum] = useState(0)
  40. const [crawledNum, setCrawledNum] = useState(0)
  41. const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
  42. const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
  43. const {
  44. crawlResult,
  45. step,
  46. checkedCrawlResult,
  47. previewIndex,
  48. } = useDataSourceStoreWithSelector(useShallow(state => ({
  49. crawlResult: state.crawlResult,
  50. step: state.step,
  51. checkedCrawlResult: state.websitePages,
  52. previewIndex: state.previewIndex,
  53. })))
  54. const dataSourceStore = useDataSourceStore()
  55. const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams)
  56. const { data: paramsConfig, isFetching: isFetchingParams } = usePreProcessingParams.current({
  57. pipeline_id: pipelineId!,
  58. node_id: nodeId,
  59. }, !!pipelineId && !!nodeId)
  60. useEffect(() => {
  61. if (step !== CrawlStep.init)
  62. setControlFoldOptions(Date.now())
  63. }, [step])
  64. useEffect(() => {
  65. const {
  66. setStep,
  67. setCrawlResult,
  68. setWebsitePages,
  69. setPreviewIndex,
  70. setCurrentWebsite,
  71. currentNodeIdRef,
  72. } = dataSourceStore.getState()
  73. if (nodeId !== currentNodeIdRef.current) {
  74. setStep(CrawlStep.init)
  75. setCrawlResult(undefined)
  76. setCurrentWebsite(undefined)
  77. setWebsitePages([])
  78. setPreviewIndex(-1)
  79. setCrawledNum(0)
  80. setTotalNum(0)
  81. setCrawlErrorMessage('')
  82. currentNodeIdRef.current = nodeId
  83. }
  84. // eslint-disable-next-line react-hooks/exhaustive-deps
  85. }, [nodeId])
  86. const isInit = step === CrawlStep.init
  87. const isCrawlFinished = step === CrawlStep.finished
  88. const isRunning = step === CrawlStep.running
  89. const showError = isCrawlFinished && crawlErrorMessage
  90. const datasourceNodeRunURL = !isInPipeline
  91. ? `/rag/pipelines/${pipelineId}/workflows/published/datasource/nodes/${nodeId}/run`
  92. : `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
  93. const handleCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => {
  94. const { setWebsitePages } = dataSourceStore.getState()
  95. setWebsitePages(checkedCrawlResult)
  96. }, [dataSourceStore])
  97. const handlePreview = useCallback((website: CrawlResultItem, index: number) => {
  98. const { setCurrentWebsite, setPreviewIndex } = dataSourceStore.getState()
  99. setCurrentWebsite(website)
  100. setPreviewIndex(index)
  101. }, [dataSourceStore])
  102. const handleRun = useCallback(async (value: Record<string, any>) => {
  103. const { setStep, setCrawlResult } = dataSourceStore.getState()
  104. setStep(CrawlStep.running)
  105. ssePost(
  106. datasourceNodeRunURL,
  107. {
  108. body: {
  109. inputs: value,
  110. datasource_type: DatasourceType.websiteCrawl,
  111. response_mode: 'streaming',
  112. },
  113. },
  114. {
  115. onDataSourceNodeProcessing: (data: DataSourceNodeProcessingResponse) => {
  116. setTotalNum(data.total ?? 0)
  117. setCrawledNum(data.completed ?? 0)
  118. },
  119. onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => {
  120. const { data: crawlData, time_consuming } = data
  121. const crawlResultData = {
  122. data: crawlData.map((item: any) => {
  123. const { content, ...rest } = item
  124. return {
  125. markdown: content || '',
  126. ...rest,
  127. } as CrawlResultItem
  128. }),
  129. time_consuming: time_consuming ?? 0,
  130. }
  131. setCrawlResult(crawlResultData)
  132. handleCheckedCrawlResultChange(isInPipeline ? [crawlData[0]] : crawlData) // default select the crawl result
  133. setCrawlErrorMessage('')
  134. setStep(CrawlStep.finished)
  135. },
  136. onDataSourceNodeError: (error: DataSourceNodeErrorResponse) => {
  137. setCrawlErrorMessage(error.error || t(`${I18N_PREFIX}.unknownError`))
  138. setStep(CrawlStep.finished)
  139. },
  140. },
  141. )
  142. }, [dataSourceStore, datasourceNodeRunURL, handleCheckedCrawlResultChange, isInPipeline, t])
  143. const handleSubmit = useCallback((value: Record<string, any>) => {
  144. handleRun(value)
  145. }, [handleRun])
  146. const headerInfo = useMemo(() => {
  147. return {
  148. title: nodeData.title,
  149. docTitle: 'How to use?',
  150. docLink: 'https://docs.dify.ai',
  151. }
  152. }, [nodeData])
  153. return (
  154. <div className='flex flex-col'>
  155. <Header
  156. isInPipeline
  157. {...headerInfo}
  158. />
  159. <div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
  160. <Options
  161. variables={paramsConfig?.variables || []}
  162. isRunning={isRunning}
  163. runDisabled={isFetchingParams}
  164. controlFoldOptions={controlFoldOptions}
  165. onSubmit={handleSubmit}
  166. />
  167. </div>
  168. {!isInit && (
  169. <div className='relative flex flex-col'>
  170. {isRunning && (
  171. <Crawling
  172. crawledNum={crawledNum}
  173. totalNum={totalNum}
  174. />
  175. )}
  176. {showError && (
  177. <ErrorMessage
  178. className='mt-2'
  179. title={t(`${I18N_PREFIX}.exceptionErrorTitle`)}
  180. errorMsg={crawlErrorMessage}
  181. />
  182. )}
  183. {isCrawlFinished && !showError && (
  184. <CrawledResult
  185. className='mt-2'
  186. list={crawlResult?.data || []}
  187. checkedList={checkedCrawlResult}
  188. onSelectedChange={handleCheckedCrawlResultChange}
  189. usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
  190. previewIndex={previewIndex}
  191. onPreview={handlePreview}
  192. showPreview={!isInPipeline}
  193. isMultipleChoice={!isInPipeline} // only support single choice in test run
  194. />
  195. )}
  196. </div>
  197. )}
  198. </div>
  199. )
  200. }
  201. export default React.memo(WebsiteCrawl)