您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

chunk-preview.tsx 9.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. import React, { useState } from 'react'
  2. import { useTranslation } from 'react-i18next'
  3. import PreviewContainer from '../../../preview/container'
  4. import { PreviewHeader } from '../../../preview/header'
  5. import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
  6. import { ChunkingMode } from '@/models/datasets'
  7. import type { NotionPage } from '@/models/common'
  8. import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
  9. import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
  10. import { ChunkContainer, QAPreview } from '../../../chunk'
  11. import { FormattedText } from '../../../formatted-text/formatted'
  12. import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
  13. import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
  14. import { RiSearchEyeLine } from '@remixicon/react'
  15. import Badge from '@/app/components/base/badge'
  16. import Button from '@/app/components/base/button'
  17. import type { OnlineDriveFile } from '@/models/pipeline'
  18. import { DatasourceType } from '@/models/pipeline'
  19. import { getFileExtension } from '../data-source/online-drive/file-list/list/utils'
  20. type ChunkPreviewProps = {
  21. dataSourceType: DatasourceType
  22. localFiles: CustomFile[]
  23. onlineDocuments: NotionPage[]
  24. websitePages: CrawlResultItem[]
  25. onlineDriveFiles: OnlineDriveFile[]
  26. isIdle: boolean
  27. isPending: boolean
  28. estimateData: FileIndexingEstimateResponse | undefined
  29. onPreview: () => void
  30. handlePreviewFileChange: (file: DocumentItem) => void
  31. handlePreviewOnlineDocumentChange: (page: NotionPage) => void
  32. handlePreviewWebsitePageChange: (page: CrawlResultItem) => void
  33. handlePreviewOnlineDriveFileChange: (file: OnlineDriveFile) => void
  34. }
  35. const ChunkPreview = ({
  36. dataSourceType,
  37. localFiles,
  38. onlineDocuments,
  39. websitePages,
  40. onlineDriveFiles,
  41. isIdle,
  42. isPending,
  43. estimateData,
  44. onPreview,
  45. handlePreviewFileChange,
  46. handlePreviewOnlineDocumentChange,
  47. handlePreviewWebsitePageChange,
  48. handlePreviewOnlineDriveFileChange,
  49. }: ChunkPreviewProps) => {
  50. const { t } = useTranslation()
  51. const currentDocForm = useDatasetDetailContextWithSelector(s => s.dataset?.doc_form)
  52. const [previewFile, setPreviewFile] = useState<DocumentItem>(localFiles[0] as DocumentItem)
  53. const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0])
  54. const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])
  55. const [previewOnlineDriveFile, setPreviewOnlineDriveFile] = useState<OnlineDriveFile>(onlineDriveFiles[0])
  56. return (
  57. <PreviewContainer
  58. header={<PreviewHeader
  59. title={t('datasetCreation.stepTwo.preview')}
  60. >
  61. <div className='flex items-center gap-1'>
  62. {dataSourceType === DatasourceType.localFile
  63. && <PreviewDocumentPicker
  64. files={localFiles as Array<Required<CustomFile>>}
  65. onChange={(selected) => {
  66. setPreviewFile(selected)
  67. handlePreviewFileChange(selected)
  68. }}
  69. value={previewFile}
  70. />
  71. }
  72. {dataSourceType === DatasourceType.onlineDocument
  73. && <PreviewDocumentPicker
  74. files={
  75. onlineDocuments.map(page => ({
  76. id: page.page_id,
  77. name: page.page_name,
  78. extension: 'md',
  79. }))
  80. }
  81. onChange={(selected) => {
  82. const selectedPage = onlineDocuments.find(page => page.page_id === selected.id)
  83. setPreviewOnlineDocument(selectedPage!)
  84. handlePreviewOnlineDocumentChange(selectedPage!)
  85. }}
  86. value={{
  87. id: previewOnlineDocument?.page_id || '',
  88. name: previewOnlineDocument?.page_name || '',
  89. extension: 'md',
  90. }}
  91. />
  92. }
  93. {dataSourceType === DatasourceType.websiteCrawl
  94. && <PreviewDocumentPicker
  95. files={
  96. websitePages.map(page => ({
  97. id: page.source_url,
  98. name: page.title,
  99. extension: 'md',
  100. }))
  101. }
  102. onChange={(selected) => {
  103. const selectedPage = websitePages.find(page => page.source_url === selected.id)
  104. setPreviewWebsitePage(selectedPage!)
  105. handlePreviewWebsitePageChange(selectedPage!)
  106. }}
  107. value={
  108. {
  109. id: previewWebsitePage?.source_url || '',
  110. name: previewWebsitePage?.title || '',
  111. extension: 'md',
  112. }
  113. }
  114. />
  115. }
  116. {dataSourceType === DatasourceType.onlineDrive
  117. && <PreviewDocumentPicker
  118. files={
  119. onlineDriveFiles.map(file => ({
  120. id: file.id,
  121. name: file.name,
  122. extension: getFileExtension(previewOnlineDriveFile?.name),
  123. }))
  124. }
  125. onChange={(selected) => {
  126. const selectedFile = onlineDriveFiles.find(file => file.id === selected.id)
  127. setPreviewOnlineDriveFile(selectedFile!)
  128. handlePreviewOnlineDriveFileChange(selectedFile!)
  129. }}
  130. value={
  131. {
  132. id: previewOnlineDriveFile?.id || '',
  133. name: previewOnlineDriveFile?.name || '',
  134. extension: getFileExtension(previewOnlineDriveFile?.name),
  135. }
  136. }
  137. />
  138. }
  139. {
  140. currentDocForm !== ChunkingMode.qa
  141. && <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
  142. count: estimateData?.total_segments || 0,
  143. }) as string}
  144. />
  145. }
  146. </div>
  147. </PreviewHeader>}
  148. className='relative flex h-full w-full shrink-0'
  149. mainClassName='space-y-6'
  150. >
  151. {!isPending && currentDocForm === ChunkingMode.qa && estimateData?.qa_preview && (
  152. estimateData?.qa_preview.map((item, index) => (
  153. <ChunkContainer
  154. key={`${item.question}-${index}`}
  155. label={`Chunk-${index + 1}`}
  156. characterCount={item.question.length + item.answer.length}
  157. >
  158. <QAPreview qa={item} />
  159. </ChunkContainer>
  160. ))
  161. )}
  162. {!isPending && currentDocForm === ChunkingMode.text && estimateData?.preview && (
  163. estimateData?.preview.map((item, index) => (
  164. <ChunkContainer
  165. key={`${item.content}-${index}`}
  166. label={`Chunk-${index + 1}`}
  167. characterCount={item.content.length}
  168. >
  169. {item.content}
  170. </ChunkContainer>
  171. ))
  172. )}
  173. {!isPending && currentDocForm === ChunkingMode.parentChild && estimateData?.preview && (
  174. estimateData?.preview?.map((item, index) => {
  175. const indexForLabel = index + 1
  176. return (
  177. <ChunkContainer
  178. key={`${item.content}-${index}`}
  179. label={`Chunk-${indexForLabel}`}
  180. characterCount={item.content.length}
  181. >
  182. <FormattedText>
  183. {item.child_chunks.map((child, index) => {
  184. const indexForLabel = index + 1
  185. return (
  186. <PreviewSlice
  187. key={child}
  188. label={`C-${indexForLabel}`}
  189. text={child}
  190. tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
  191. labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
  192. dividerClassName='leading-7'
  193. />
  194. )
  195. })}
  196. </FormattedText>
  197. </ChunkContainer>
  198. )
  199. })
  200. )}
  201. {isIdle && (
  202. <div className='flex h-full w-full items-center justify-center'>
  203. <div className='flex flex-col items-center justify-center gap-3 pb-4'>
  204. <RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
  205. <p className='text-sm text-text-tertiary'>
  206. {t('datasetCreation.stepTwo.previewChunkTip')}
  207. </p>
  208. <Button onClick={onPreview}>
  209. {t('datasetPipeline.addDocuments.stepTwo.previewChunks')}
  210. </Button>
  211. </div>
  212. </div>
  213. )}
  214. {isPending && (
  215. <div className='h-full w-full space-y-6 overflow-hidden'>
  216. {Array.from({ length: 10 }, (_, i) => (
  217. <SkeletonContainer key={i}>
  218. <SkeletonRow>
  219. <SkeletonRectangle className='w-20' />
  220. <SkeletonPoint />
  221. <SkeletonRectangle className='w-24' />
  222. </SkeletonRow>
  223. <SkeletonRectangle className='w-full' />
  224. <SkeletonRectangle className='w-full' />
  225. <SkeletonRectangle className='w-[422px]' />
  226. </SkeletonContainer>
  227. ))}
  228. </div>
  229. )}
  230. </PreviewContainer>
  231. )
  232. }
  233. export default React.memo(ChunkPreview)