Преглед на файлове

feat(chunk-card-list): implement ChunkCard and QAItem components, refactor ChunkCardList to utilize new structure and types

tags/2.0.0-beta.1
twwu преди 2 месеца
родител
ревизия
1e9bfd8872

+ 78
- 0
web/app/components/rag-pipeline/components/chunk-card-list/chunk-card.tsx Целия файл

@@ -0,0 +1,78 @@
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import type { QAChunk } from './types'
import { QAItemType } from './types'
import { PreviewSlice } from '@/app/components/datasets/formatted-text/flavours/preview-slice'
import SegmentIndexTag from '@/app/components/datasets/documents/detail/completed/common/segment-index-tag'
import Dot from '@/app/components/datasets/documents/detail/completed/common/dot'
import { formatNumber } from '@/utils/format'
import QAItem from './q-a-item'
import { ChunkingMode, type ParentMode } from '@/models/datasets'

type ChunkCardProps = {
chunkType: ChunkingMode
parentMode?: ParentMode
content: string | string[] | QAChunk
positionId?: string | number
wordCount: number
}

const ChunkCard = (props: ChunkCardProps) => {
const { chunkType, parentMode, content, positionId, wordCount } = props
const { t } = useTranslation()

const isFullDoc = useMemo(() => {
return chunkType === ChunkingMode.parentChild && parentMode === 'full-doc'
}, [chunkType, parentMode])

const isParagraph = useMemo(() => {
return chunkType === ChunkingMode.parentChild && parentMode === 'paragraph'
}, [chunkType, parentMode])

const contentElement = useMemo(() => {
if (chunkType === ChunkingMode.parentChild) {
return (content as string[]).map((child, index) => {
const indexForLabel = index + 1
return (
<PreviewSlice
key={child}
label={`C-${indexForLabel}`}
text={child}
tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
dividerClassName='leading-7'
/>
)
})
}

if (chunkType === ChunkingMode.qa) {
return (
<div className='flex flex-col gap-2'>
<QAItem type={QAItemType.Question} text={(content as QAChunk).question} />
<QAItem type={QAItemType.Answer} text={(content as QAChunk).answer} />
</div>
)
}

return content as string
}, [content, chunkType])

return (
<div className='flex flex-col gap-1 rounded-lg bg-components-panel-bg px-3 py-2.5'>
{!isFullDoc && (
<div className='inline-flex items-center justify-start gap-2'>
<SegmentIndexTag
positionId={positionId}
labelPrefix={isParagraph ? 'Parent-Chunk' : 'Chunk'}
/>
<Dot />
<div className='system-xs-medium text-text-tertiary'>{`${formatNumber(wordCount)} ${t('datasetDocuments.segment.characters', { count: wordCount })}`}</div>
</div>
)}
<div className='body-md-regular text-text-secondary'>{contentElement}</div>
</div>
)
}

export default React.memo(ChunkCard)

+ 24
- 131
web/app/components/rag-pipeline/components/chunk-card-list/index.tsx Целия файл

@@ -1,153 +1,46 @@
import { useMemo } from 'react'
import SegmentIndexTag from '@/app/components/datasets/documents/detail/completed/common/segment-index-tag'
import Dot from '@/app/components/datasets/documents/detail/completed/common/dot'
import { PreviewSlice } from '@/app/components/datasets/formatted-text/flavours/preview-slice'
import { useTranslation } from 'react-i18next'
import { formatNumber } from '@/utils/format'
import cn from '@/utils/classnames'

enum QAItemType {
Question = 'question',
Answer = 'answer',
}

type QAItemProps = {
type: QAItemType
text: string
}

const QAItem = (props: QAItemProps) => {
const { type, text } = props
return <div className='inline-flex items-start justify-start gap-1 self-stretch'>
<div className='w-4 text-[13px] font-medium leading-5 text-text-tertiary'>{type === QAItemType.Question ? 'Q' : 'A'}</div>
<div className='body-md-regular flex-1 text-text-secondary'>{text}</div>
</div>
}

export enum ChunkType {
General = 'general',
Paragraph = 'paragraph',
FullDoc = 'full-doc',
QA = 'qa',
}

type ChunkCardProps = {
type: ChunkType
content: string | string[] | QAChunk
positionId?: string | number
wordCount: number
}

const ChunkCard = (props: ChunkCardProps) => {
const { type, content, positionId, wordCount } = props
const { t } = useTranslation()

const renderContent = () => {
// ChunkType.Paragraph && ChunkType.FullDoc
if (Array.isArray(content)) {
return content.map((child, index) => {
const indexForLabel = index + 1
return (
<PreviewSlice
key={child}
label={`C-${indexForLabel}`}
text={child}
tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
dividerClassName='leading-7'
/>
)
})
}

// ChunkType.QA
if (typeof content === 'object') {
return <div className='flex flex-col gap-2'>
<QAItem type={QAItemType.Question} text={(content as QAChunk).question} />
<QAItem type={QAItemType.Answer} text={(content as QAChunk).answer} />
</div>
}

// ChunkType.General
return content
}

return (
<div className='flex flex-col gap-1 rounded-lg bg-components-panel-bg px-3 py-2.5'>
{type !== ChunkType.FullDoc && <div className='inline-flex items-center justify-start gap-2'>
<SegmentIndexTag
positionId={positionId}
labelPrefix={type === ChunkType.Paragraph ? 'Parent-Chunk' : 'Chunk'}
/>
<Dot />
<div className='system-xs-medium text-text-tertiary'>{formatNumber(wordCount)} {t('datasetDocuments.segment.characters', { count: wordCount })}</div>
</div>}
<div className='body-md-regular text-text-secondary'>{renderContent()}</div>
</div>
)
}

export type ChunkInfo = {
general_chunks?: string[]
parent_child_chunks?: ParentChildChunk[]
parent_mode?: string
qa_chunks?: QAChunk[]
}

type ParentChildChunk = {
child_contents: string[]
parent_content: string
parent_mode: string
}

type QAChunk = {
question: string
answer: string
}
import type { ChunkInfo, GeneralChunks, ParentChildChunk, ParentChildChunks, QAChunk, QAChunks } from './types'
import { ChunkingMode, type ParentMode } from '@/models/datasets'
import ChunkCard from './chunk-card'

type ChunkCardListProps = {
chunkType: ChunkingMode
parentMode?: ParentMode
chunkInfo: ChunkInfo
className?: string
}

export const ChunkCardList = (props: ChunkCardListProps) => {
const { chunkInfo, className } = props

const chunkType = useMemo(() => {
if (chunkInfo?.general_chunks)
return ChunkType.General

if (chunkInfo?.parent_child_chunks)
return chunkInfo.parent_mode as ChunkType

return ChunkType.QA
}, [chunkInfo])
const { chunkType, parentMode, chunkInfo, className } = props

const chunkList = useMemo(() => {
if (chunkInfo?.general_chunks)
return chunkInfo.general_chunks
if (chunkInfo?.parent_child_chunks)
return chunkInfo.parent_child_chunks
return chunkInfo?.qa_chunks ?? []
if (chunkType === ChunkingMode.text)
return chunkInfo as GeneralChunks
if (chunkType === ChunkingMode.parentChild)
return (chunkInfo as ParentChildChunks).parent_child_chunks
return (chunkInfo as QAChunks).qa_chunks
}, [chunkInfo])

const getWordCount = (seg: string | ParentChildChunk | QAChunk) => {
if (chunkType === ChunkingMode.parentChild)
return (seg as ParentChildChunk).parent_content.length
if (chunkType === ChunkingMode.text)
return (seg as string).length
return (seg as QAChunk).question.length + (seg as QAChunk).answer.length
}

return (
<div className={cn('flex w-full flex-col gap-y-1', className)}>
{chunkList.map((seg: string | ParentChildChunk | QAChunk, index: number) => {
const isParentChildMode = [ChunkType.Paragraph, ChunkType.FullDoc].includes(chunkType!)
let wordCount = 0
if (isParentChildMode)
wordCount = (seg as ParentChildChunk)?.parent_content?.length
else if (typeof seg === 'string')
wordCount = seg.length
else
wordCount = (seg as QAChunk)?.question?.length + (seg as QAChunk)?.answer?.length
{chunkList.map((seg, index: number) => {
const wordCount = getWordCount(seg)

return (
<ChunkCard
key={`${chunkType}-${index}`}
type={chunkType}
content={isParentChildMode ? (seg as ParentChildChunk).child_contents : (seg as string | QAChunk)}
chunkType={chunkType}
parentMode={parentMode}
content={chunkType === ChunkingMode.parentChild ? (seg as ParentChildChunk).child_contents : (seg as string | QAChunk)}
wordCount={wordCount}
positionId={index + 1}
/>

+ 19
- 0
web/app/components/rag-pipeline/components/chunk-card-list/q-a-item.tsx Целия файл

@@ -0,0 +1,19 @@
import React from 'react'
import { QAItemType } from './types'

type QAItemProps = {
type: QAItemType
text: string
}

const QAItem = (props: QAItemProps) => {
const { type, text } = props
return (
<div className='inline-flex items-start justify-start gap-1 self-stretch'>
<div className='w-4 text-[13px] font-medium leading-5 text-text-tertiary'>{type === QAItemType.Question ? 'Q' : 'A'}</div>
<div className='body-md-regular flex-1 text-text-secondary'>{text}</div>
</div>
)
}

export default React.memo(QAItem)

+ 28
- 0
web/app/components/rag-pipeline/components/chunk-card-list/types.ts Целия файл

@@ -0,0 +1,28 @@
export type GeneralChunks = string[]

export type ParentChildChunk = {
child_contents: string[]
parent_content: string
parent_mode: string
}

export type ParentChildChunks = {
parent_child_chunks: ParentChildChunk[]
parent_mode: string
}

export type QAChunk = {
question: string
answer: string
}

export type QAChunks = {
qa_chunks: QAChunk[]
}

export type ChunkInfo = GeneralChunks | ParentChildChunks | QAChunks

export enum QAItemType {
Question = 'question',
Answer = 'answer',
}

+ 0
- 1
web/app/components/rag-pipeline/components/panel/test-run/result/index.tsx Целия файл

@@ -29,7 +29,6 @@ const Result = () => {
isRunning={!workflowRunningData?.result || workflowRunningData?.result.status === WorkflowRunningStatus.Running}
outputs={workflowRunningData?.result?.outputs}
error={workflowRunningData?.result?.error}
tracing={workflowRunningData?.tracing}
onSwitchToDetail={() => switchTab('DETAIL')}
/>
)}

+ 3
- 14
web/app/components/rag-pipeline/components/panel/test-run/result/result-preview/index.tsx Целия файл

@@ -1,6 +1,4 @@
import Button from '@/app/components/base/button'
import { BlockEnum } from '@/app/components/workflow/types'
import type { NodeTracing } from '@/types/workflow'
import { RiLoader2Line } from '@remixicon/react'
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
@@ -12,7 +10,6 @@ type ResultTextProps = {
isRunning?: boolean
outputs?: any
error?: string
tracing?: NodeTracing[]
onSwitchToDetail: () => void
}

@@ -20,21 +17,13 @@ const ResultPreview = ({
isRunning,
outputs,
error,
tracing,
onSwitchToDetail,
}: ResultTextProps) => {
const { t } = useTranslation()

const chunkInfo = useMemo(() => {
if (!outputs || !tracing)
return undefined
const knowledgeIndexNode = tracing.find(node => node.node_type === BlockEnum.KnowledgeBase)
return knowledgeIndexNode?.inputs?.chunks
}, [outputs, tracing])

const previewChunks = useMemo(() => {
return formatPreviewChunks(chunkInfo, outputs)
}, [chunkInfo, outputs])
return formatPreviewChunks(outputs)
}, [outputs])

return (
<>
@@ -54,7 +43,7 @@ const ResultPreview = ({
)}
{outputs && previewChunks && (
<div className='flex grow flex-col bg-background-body p-1'>
<ChunkCardList chunkInfo={previewChunks} />
<ChunkCardList chunkType={outputs.chunk_structure} chunkInfo={previewChunks} />
<div className='system-xs-regular mt-1 flex items-center gap-x-2 text-text-tertiary'>
<div className='h-px flex-1 bg-gradient-to-r from-background-gradient-mask-transparent to-divider-regular' />
<span className='shrink-0truncate' title={t('pipeline.result.resultPreview.footerTip', { count: RAG_PIPELINE_PREVIEW_CHUNK_NUM })}>

+ 21
- 32
web/app/components/rag-pipeline/components/panel/test-run/result/result-preview/utils.ts Целия файл

@@ -1,18 +1,17 @@
import { RAG_PIPELINE_PREVIEW_CHUNK_NUM } from '@/config'
import { type ChunkInfo, ChunkType } from '../../../../chunk-card-list'
import type { ChunkInfo, GeneralChunks, ParentChildChunks, QAChunks } from '../../../../chunk-card-list/types'
import type { ParentMode } from '@/models/datasets'
import { ChunkingMode } from '@/models/datasets'

type GeneralChunkPreview = {
content: string
}

const formatGeneralChunks = (outputs: any) => {
if (!outputs) return undefined
const chunkInfo: ChunkInfo = {
general_chunks: [],
}
const chunkInfo: GeneralChunks = []
const chunks = outputs.preview as GeneralChunkPreview[]
chunks.slice(0, RAG_PIPELINE_PREVIEW_CHUNK_NUM).forEach((chunk) => {
chunkInfo.general_chunks?.push(chunk.content)
chunkInfo.push(chunk.content)
})

return chunkInfo
@@ -23,29 +22,27 @@ type ParentChildChunkPreview = {
child_chunks: string[]
}

const formatParentChildChunks = (outputs: any, chunkType: ChunkType) => {
if (!outputs) return undefined
const chunkInfo: ChunkInfo = {
const formatParentChildChunks = (outputs: any, parentMode: ParentMode) => {
const chunkInfo: ParentChildChunks = {
parent_child_chunks: [],
parent_mode: chunkType,
parent_mode: parentMode,
}
const chunks = outputs.preview as ParentChildChunkPreview[]
if (chunkType === ChunkType.Paragraph) {
if (parentMode === 'paragraph') {
chunks.slice(0, RAG_PIPELINE_PREVIEW_CHUNK_NUM).forEach((chunk) => {
chunkInfo.parent_child_chunks?.push({
parent_content: chunk.content,
child_contents: chunk.child_chunks,
parent_mode: chunkType,
parent_mode: parentMode,
})
})
return chunkInfo
}
else {
if (parentMode === 'full-doc') {
chunks.forEach((chunk) => {
chunkInfo.parent_child_chunks?.push({
parent_content: chunk.content,
child_contents: chunk.child_chunks.slice(0, RAG_PIPELINE_PREVIEW_CHUNK_NUM),
parent_mode: chunkType,
parent_mode: parentMode,
})
})
}
@@ -59,8 +56,7 @@ type QAChunkPreview = {
}

const formatQAChunks = (outputs: any) => {
if (!outputs) return undefined
const chunkInfo: ChunkInfo = {
const chunkInfo: QAChunks = {
qa_chunks: [],
}
const chunks = outputs.qa_preview as QAChunkPreview[]
@@ -73,26 +69,19 @@ const formatQAChunks = (outputs: any) => {
return chunkInfo
}

export const formatPreviewChunks = (chunkInfo: ChunkInfo, outputs: any): ChunkInfo | undefined => {
if (!chunkInfo) return undefined

let chunkType = ChunkType.General
if (chunkInfo?.general_chunks)
chunkType = ChunkType.General

if (chunkInfo?.parent_child_chunks)
chunkType = chunkInfo.parent_mode as ChunkType
export const formatPreviewChunks = (outputs: any): ChunkInfo | undefined => {
if (!outputs) return undefined

if (chunkInfo?.qa_chunks)
chunkType = ChunkType.QA
const chunkingMode = outputs.chunk_structure
const parentMode = outputs.parent_mode

if (chunkType === ChunkType.General)
if (chunkingMode === ChunkingMode.text)
return formatGeneralChunks(outputs)

if (chunkType === ChunkType.Paragraph || chunkType === ChunkType.FullDoc)
return formatParentChildChunks(outputs, chunkType)
if (chunkingMode === ChunkingMode.parentChild)
return formatParentChildChunks(outputs, parentMode)

if (chunkType === ChunkType.QA)
if (chunkingMode === ChunkingMode.qa)
return formatQAChunks(outputs)

return undefined

+ 8
- 2
web/app/components/workflow/variable-inspect/value-content.tsx Целия файл

@@ -26,8 +26,10 @@ import { VarInInspectType } from '@/types/workflow'
import cn from '@/utils/classnames'
import BoolValue from '../panel/chat-variable-panel/components/bool-value'
import { useStore } from '@/app/components/workflow/store'
import { ChunkCardList, type ChunkInfo } from '@/app/components/rag-pipeline/components/chunk-card-list'
import { ChunkCardList } from '@/app/components/rag-pipeline/components/chunk-card-list'
import type { ChunkInfo } from '@/app/components/rag-pipeline/components/chunk-card-list/types'
import { PreviewMode } from '../../base/features/types'
import { ChunkingMode } from '@/models/datasets'

enum ViewMode {
Code = 'code',
@@ -98,7 +100,11 @@ const DisplayContent = (props: DisplayContentProps) => {
{viewMode === ViewMode.Preview && (
type === ContentType.Markdown
? <Markdown className='grow overflow-auto rounded-lg !bg-white px-4 py-3' content={(mdString ?? '') as string} />
: <ChunkCardList chunkInfo={JSON.parse(jsonString!) as ChunkInfo} />
: <ChunkCardList
chunkType={ChunkingMode.text} // todo: delete mock data
parentMode={'full-doc'} // todo: delete mock data
chunkInfo={JSON.parse(jsonString!) as ChunkInfo}
/>
)}
</div>
</div>

Loading…
Отказ
Запис