Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

chunk-preview.tsx 7.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. import React, { useState } from 'react'
  2. import { useTranslation } from 'react-i18next'
  3. import { PreviewContainer } from '../../../preview/container'
  4. import { PreviewHeader } from '../../../preview/header'
  5. import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
  6. import { ChunkingMode } from '@/models/datasets'
  7. import type { NotionPage } from '@/models/common'
  8. import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
  9. import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
  10. import { ChunkContainer, QAPreview } from '../../../chunk'
  11. import { FormattedText } from '../../../formatted-text/formatted'
  12. import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
  13. import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
  14. import { RiSearchEyeLine } from '@remixicon/react'
  15. import Badge from '@/app/components/base/badge'
  16. import Button from '@/app/components/base/button'
  17. import { DatasourceType } from '@/models/pipeline'
  18. type ChunkPreviewProps = {
  19. dataSourceType: DatasourceType
  20. files: CustomFile[]
  21. onlineDocuments: NotionPage[]
  22. websitePages: CrawlResultItem[]
  23. isIdle: boolean
  24. isPending: boolean
  25. estimateData: FileIndexingEstimateResponse | undefined
  26. onPreview: () => void
  27. handlePreviewFileChange: (file: DocumentItem) => void
  28. handlePreviewOnlineDocumentChange: (page: NotionPage) => void
  29. handlePreviewWebsitePageChange: (page: CrawlResultItem) => void
  30. }
  31. const ChunkPreview = ({
  32. dataSourceType,
  33. files,
  34. onlineDocuments,
  35. websitePages,
  36. isIdle,
  37. isPending,
  38. estimateData,
  39. onPreview,
  40. handlePreviewFileChange,
  41. handlePreviewOnlineDocumentChange,
  42. handlePreviewWebsitePageChange,
  43. }: ChunkPreviewProps) => {
  44. const { t } = useTranslation()
  45. const currentDocForm = useDatasetDetailContextWithSelector(s => s.dataset?.doc_form)
  46. const [previewFile, setPreviewFile] = useState<DocumentItem>(files[0] as DocumentItem)
  47. const [previewOnlineDocument, setPreviewOnlineDocument] = useState<NotionPage>(onlineDocuments[0])
  48. const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])
  49. return (
  50. <PreviewContainer
  51. header={<PreviewHeader
  52. title={t('datasetCreation.stepTwo.preview')}
  53. >
  54. <div className='flex items-center gap-1'>
  55. {dataSourceType === DatasourceType.localFile
  56. && <PreviewDocumentPicker
  57. files={files as Array<Required<CustomFile>>}
  58. onChange={(selected) => {
  59. setPreviewFile(selected)
  60. handlePreviewFileChange(selected)
  61. }}
  62. value={previewFile}
  63. />
  64. }
  65. {dataSourceType === DatasourceType.onlineDocument
  66. && <PreviewDocumentPicker
  67. files={
  68. onlineDocuments.map(page => ({
  69. id: page.page_id,
  70. name: page.page_name,
  71. extension: 'md',
  72. }))
  73. }
  74. onChange={(selected) => {
  75. const selectedPage = onlineDocuments.find(page => page.page_id === selected.id)
  76. setPreviewOnlineDocument(selectedPage!)
  77. handlePreviewOnlineDocumentChange(selectedPage!)
  78. }}
  79. value={{
  80. id: previewOnlineDocument?.page_id || '',
  81. name: previewOnlineDocument?.page_name || '',
  82. extension: 'md',
  83. }}
  84. />
  85. }
  86. {dataSourceType === DatasourceType.websiteCrawl
  87. && <PreviewDocumentPicker
  88. files={
  89. websitePages.map(page => ({
  90. id: page.source_url,
  91. name: page.title,
  92. extension: 'md',
  93. }))
  94. }
  95. onChange={(selected) => {
  96. const selectedPage = websitePages.find(page => page.source_url === selected.id)
  97. setPreviewWebsitePage(selectedPage!)
  98. handlePreviewWebsitePageChange(selectedPage!)
  99. }}
  100. value={
  101. {
  102. id: previewWebsitePage?.source_url || '',
  103. name: previewWebsitePage?.title || '',
  104. extension: 'md',
  105. }
  106. }
  107. />
  108. }
  109. {
  110. currentDocForm !== ChunkingMode.qa
  111. && <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
  112. count: estimateData?.total_segments || 0,
  113. }) as string}
  114. />
  115. }
  116. </div>
  117. </PreviewHeader>}
  118. className='relative flex h-full w-full shrink-0'
  119. mainClassName='space-y-6'
  120. >
  121. {currentDocForm === ChunkingMode.qa && estimateData?.qa_preview && (
  122. estimateData?.qa_preview.map((item, index) => (
  123. <ChunkContainer
  124. key={item.question}
  125. label={`Chunk-${index + 1}`}
  126. characterCount={item.question.length + item.answer.length}
  127. >
  128. <QAPreview qa={item} />
  129. </ChunkContainer>
  130. ))
  131. )}
  132. {currentDocForm === ChunkingMode.text && estimateData?.preview && (
  133. estimateData?.preview.map((item, index) => (
  134. <ChunkContainer
  135. key={item.content}
  136. label={`Chunk-${index + 1}`}
  137. characterCount={item.content.length}
  138. >
  139. {item.content}
  140. </ChunkContainer>
  141. ))
  142. )}
  143. {currentDocForm === ChunkingMode.parentChild && estimateData?.preview && (
  144. estimateData?.preview?.map((item, index) => {
  145. const indexForLabel = index + 1
  146. // const childChunks = parentChildConfig.chunkForContext === 'full-doc'
  147. // ? item.child_chunks.slice(0, FULL_DOC_PREVIEW_LENGTH)
  148. // : item.child_chunks
  149. return (
  150. <ChunkContainer
  151. key={item.content}
  152. label={`Chunk-${indexForLabel}`}
  153. characterCount={item.content.length}
  154. >
  155. <FormattedText>
  156. {item.child_chunks.map((child, index) => {
  157. const indexForLabel = index + 1
  158. return (
  159. <PreviewSlice
  160. key={child}
  161. label={`C-${indexForLabel}`}
  162. text={child}
  163. tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
  164. labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
  165. dividerClassName='leading-7'
  166. />
  167. )
  168. })}
  169. </FormattedText>
  170. </ChunkContainer>
  171. )
  172. })
  173. )}
  174. {isIdle && (
  175. <div className='flex h-full w-full items-center justify-center'>
  176. <div className='flex flex-col items-center justify-center gap-3 pb-4'>
  177. <RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
  178. <p className='text-sm text-text-tertiary'>
  179. {t('datasetCreation.stepTwo.previewChunkTip')}
  180. </p>
  181. <Button onClick={onPreview}>
  182. {t('datasetPipeline.addDocuments.stepTwo.previewChunks')}
  183. </Button>
  184. </div>
  185. </div>
  186. )}
  187. {isPending && (
  188. <div className='space-y-6'>
  189. {Array.from({ length: 10 }, (_, i) => (
  190. <SkeletonContainer key={i}>
  191. <SkeletonRow>
  192. <SkeletonRectangle className='w-20' />
  193. <SkeletonPoint />
  194. <SkeletonRectangle className='w-24' />
  195. </SkeletonRow>
  196. <SkeletonRectangle className='w-full' />
  197. <SkeletonRectangle className='w-full' />
  198. <SkeletonRectangle className='w-[422px]' />
  199. </SkeletonContainer>
  200. ))}
  201. </div>
  202. )}
  203. </PreviewContainer>
  204. )
  205. }
  206. export default React.memo(ChunkPreview)