Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. 'use client'
  2. import type { FC } from 'react'
  3. import React, { useCallback, useEffect, useState } from 'react'
  4. import { useTranslation } from 'react-i18next'
  5. import UrlInput from '../base/url-input'
  6. import OptionsWrap from '../base/options-wrap'
  7. import CrawledResult from '../base/crawled-result'
  8. import Crawling from '../base/crawling'
  9. import ErrorMessage from '../base/error-message'
  10. import Options from './options'
  11. import { useModalContextSelector } from '@/context/modal-context'
  12. import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
  13. import Toast from '@/app/components/base/toast'
  14. import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets'
  15. import { sleep } from '@/utils'
  16. import Header from '../base/header'
  17. const ERROR_I18N_PREFIX = 'common.errorMsg'
  18. const I18N_PREFIX = 'datasetCreation.stepOne.website'
  19. type Props = {
  20. onPreview: (payload: CrawlResultItem) => void
  21. checkedCrawlResult: CrawlResultItem[]
  22. onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
  23. onJobIdChange: (jobId: string) => void
  24. crawlOptions: CrawlOptions
  25. onCrawlOptionsChange: (payload: CrawlOptions) => void
  26. }
  27. enum Step {
  28. init = 'init',
  29. running = 'running',
  30. finished = 'finished',
  31. }
  32. const FireCrawl: FC<Props> = ({
  33. onPreview,
  34. checkedCrawlResult,
  35. onCheckedCrawlResultChange,
  36. onJobIdChange,
  37. crawlOptions,
  38. onCrawlOptionsChange,
  39. }) => {
  40. const { t } = useTranslation()
  41. const [step, setStep] = useState<Step>(Step.init)
  42. const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
  43. useEffect(() => {
  44. if (step !== Step.init)
  45. setControlFoldOptions(Date.now())
  46. }, [step])
  47. const setShowAccountSettingModal = useModalContextSelector(s => s.setShowAccountSettingModal)
  48. const handleSetting = useCallback(() => {
  49. setShowAccountSettingModal({
  50. payload: 'data-source',
  51. })
  52. }, [setShowAccountSettingModal])
  53. const checkValid = useCallback((url: string) => {
  54. let errorMsg = ''
  55. if (!url) {
  56. errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
  57. field: 'url',
  58. })
  59. }
  60. if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
  61. errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
  62. if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
  63. errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
  64. field: t(`${I18N_PREFIX}.limit`),
  65. })
  66. }
  67. return {
  68. isValid: !errorMsg,
  69. errorMsg,
  70. }
  71. }, [crawlOptions, t])
  72. const isInit = step === Step.init
  73. const isCrawlFinished = step === Step.finished
  74. const isRunning = step === Step.running
  75. const [crawlResult, setCrawlResult] = useState<{
  76. current: number
  77. total: number
  78. data: CrawlResultItem[]
  79. time_consuming: number | string
  80. } | undefined>(undefined)
  81. const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
  82. const showError = isCrawlFinished && crawlErrorMessage
  83. const waitForCrawlFinished = useCallback(async (jobId: string) => {
  84. try {
  85. const res = await checkFirecrawlTaskStatus(jobId) as any
  86. if (res.status === 'completed') {
  87. return {
  88. isError: false,
  89. data: {
  90. ...res,
  91. total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
  92. },
  93. }
  94. }
  95. if (res.status === 'error' || !res.status) {
  96. // can't get the error message from the firecrawl api
  97. return {
  98. isError: true,
  99. errorMessage: res.message,
  100. data: {
  101. data: [],
  102. },
  103. }
  104. }
  105. res.data = res.data.map((item: any) => ({
  106. ...item,
  107. content: item.markdown,
  108. }))
  109. // update the progress
  110. setCrawlResult({
  111. ...res,
  112. total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
  113. })
  114. onCheckedCrawlResultChange(res.data || []) // default select the crawl result
  115. await sleep(2500)
  116. return await waitForCrawlFinished(jobId)
  117. }
  118. catch (e: any) {
  119. const errorBody = await e.json()
  120. return {
  121. isError: true,
  122. errorMessage: errorBody.message,
  123. data: {
  124. data: [],
  125. },
  126. }
  127. }
  128. }, [crawlOptions.limit, onCheckedCrawlResultChange])
  129. const handleRun = useCallback(async (url: string) => {
  130. const { isValid, errorMsg } = checkValid(url)
  131. if (!isValid) {
  132. Toast.notify({
  133. message: errorMsg!,
  134. type: 'error',
  135. })
  136. return
  137. }
  138. setStep(Step.running)
  139. try {
  140. const passToServerCrawlOptions: any = {
  141. ...crawlOptions,
  142. }
  143. if (crawlOptions.max_depth === '')
  144. delete passToServerCrawlOptions.max_depth
  145. const res = await createFirecrawlTask({
  146. url,
  147. options: passToServerCrawlOptions,
  148. }) as any
  149. const jobId = res.job_id
  150. onJobIdChange(jobId)
  151. const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
  152. if (isError) {
  153. setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
  154. }
  155. else {
  156. data.data = data.data.map((item: any) => ({
  157. ...item,
  158. content: item.markdown,
  159. }))
  160. setCrawlResult(data)
  161. onCheckedCrawlResultChange(data.data || []) // default select the crawl result
  162. setCrawlErrorMessage('')
  163. }
  164. }
  165. catch (e) {
  166. setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
  167. console.log(e)
  168. }
  169. finally {
  170. setStep(Step.finished)
  171. }
  172. }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished, onCheckedCrawlResultChange])
  173. return (
  174. <div>
  175. <Header
  176. onClickConfiguration={handleSetting}
  177. title={t(`${I18N_PREFIX}.firecrawlTitle`)}
  178. buttonText={t(`${I18N_PREFIX}.configureFirecrawl`)}
  179. docTitle={t(`${I18N_PREFIX}.firecrawlDoc`)}
  180. docLink={'https://docs.firecrawl.dev/introduction'}
  181. />
  182. <div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle p-4 pb-0'>
  183. <UrlInput onRun={handleRun} isRunning={isRunning} />
  184. <OptionsWrap
  185. className='mt-4'
  186. controlFoldOptions={controlFoldOptions}
  187. >
  188. <Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
  189. </OptionsWrap>
  190. {!isInit && (
  191. <div className='relative left-[-16px] mt-3 w-[calc(100%_+_32px)] rounded-b-xl'>
  192. {isRunning
  193. && <Crawling
  194. className='mt-2'
  195. crawledNum={crawlResult?.current || 0}
  196. totalNum={crawlResult?.total || Number.parseFloat(crawlOptions.limit as string) || 0}
  197. />}
  198. {showError && (
  199. <ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
  200. )}
  201. {isCrawlFinished && !showError
  202. && <CrawledResult
  203. className='mb-2'
  204. list={crawlResult?.data || []}
  205. checkedList={checkedCrawlResult}
  206. onSelectedChange={onCheckedCrawlResultChange}
  207. onPreview={onPreview}
  208. usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
  209. />
  210. }
  211. </div>
  212. )}
  213. </div>
  214. </div>
  215. )
  216. }
  217. export default React.memo(FireCrawl)