|
|
|
@@ -1,4 +1,3 @@ |
|
|
|
/* eslint-disable no-mixed-operators */ |
|
|
|
'use client' |
|
|
|
import React, { useEffect, useLayoutEffect, useRef, useState } from 'react' |
|
|
|
import { useTranslation } from 'react-i18next' |
|
|
|
@@ -11,7 +10,7 @@ import { groupBy } from 'lodash-es' |
|
|
|
import PreviewItem, { PreviewType } from './preview-item' |
|
|
|
import LanguageSelect from './language-select' |
|
|
|
import s from './index.module.css' |
|
|
|
import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets' |
|
|
|
import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' |
|
|
|
import { |
|
|
|
createDocument, |
|
|
|
createFirstDocument, |
|
|
|
@@ -33,13 +32,14 @@ import { useDatasetDetailContext } from '@/context/dataset-detail' |
|
|
|
import I18n from '@/context/i18n' |
|
|
|
import { IS_CE_EDITION } from '@/config' |
|
|
|
|
|
|
|
type ValueOf<T> = T[keyof T] |
|
|
|
type StepTwoProps = { |
|
|
|
isSetting?: boolean |
|
|
|
documentDetail?: FullDocumentDetail |
|
|
|
hasSetAPIKEY: boolean |
|
|
|
onSetting: () => void |
|
|
|
datasetId?: string |
|
|
|
indexingType?: string |
|
|
|
indexingType?: ValueOf<IndexingType> |
|
|
|
dataSourceType: DataSourceType |
|
|
|
files: CustomFile[] |
|
|
|
notionPages?: NotionPage[] |
|
|
|
@@ -89,21 +89,23 @@ const StepTwo = ({ |
|
|
|
const [rules, setRules] = useState<PreProcessingRule[]>([]) |
|
|
|
const [defaultConfig, setDefaultConfig] = useState<Rules>() |
|
|
|
const hasSetIndexType = !!indexingType |
|
|
|
const [indexType, setIndexType] = useState<IndexingType>( |
|
|
|
indexingType |
|
|
|
|| hasSetAPIKEY |
|
|
|
const [indexType, setIndexType] = useState<ValueOf<IndexingType>>( |
|
|
|
(indexingType |
|
|
|
|| hasSetAPIKEY) |
|
|
|
? IndexingType.QUALIFIED |
|
|
|
: IndexingType.ECONOMICAL, |
|
|
|
) |
|
|
|
const [docForm, setDocForm] = useState<DocForm | string>( |
|
|
|
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT, |
|
|
|
(datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT, |
|
|
|
) |
|
|
|
const [docLanguage, setDocLanguage] = useState<string>(locale === 'en' ? 'English' : 'Chinese') |
|
|
|
const [QATipHide, setQATipHide] = useState(false) |
|
|
|
const [previewSwitched, setPreviewSwitched] = useState(false) |
|
|
|
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() |
|
|
|
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null) |
|
|
|
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null) |
|
|
|
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null) |
|
|
|
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null) |
|
|
|
const [estimateTokes, setEstimateTokes] = useState<Pick<IndexingEstimateResponse, 'tokens' | 'total_price'> | null>(null) |
|
|
|
|
|
|
|
const fileIndexingEstimate = (() => { |
|
|
|
return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate |
|
|
|
})() |
|
|
|
@@ -153,7 +155,7 @@ const StepTwo = ({ |
|
|
|
} |
|
|
|
const resetRules = () => { |
|
|
|
if (defaultConfig) { |
|
|
|
setSegmentIdentifier(defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator || '\\n') |
|
|
|
setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n') |
|
|
|
setMax(defaultConfig.segmentation.max_tokens) |
|
|
|
setRules(defaultConfig.pre_processing_rules) |
|
|
|
} |
|
|
|
@@ -161,12 +163,14 @@ const StepTwo = ({ |
|
|
|
|
|
|
|
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => { |
|
|
|
// eslint-disable-next-line @typescript-eslint/no-use-before-define |
|
|
|
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)) |
|
|
|
if (segmentationType === SegmentType.CUSTOM) |
|
|
|
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!) |
|
|
|
if (segmentationType === SegmentType.CUSTOM) { |
|
|
|
setCustomFileIndexingEstimate(res) |
|
|
|
|
|
|
|
else |
|
|
|
} |
|
|
|
else { |
|
|
|
setAutomaticFileIndexingEstimate(res) |
|
|
|
indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price }) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
const confirmChangeCustomConfig = () => { |
|
|
|
@@ -179,8 +183,8 @@ const StepTwo = ({ |
|
|
|
const getIndexing_technique = () => indexingType || indexType |
|
|
|
|
|
|
|
const getProcessRule = () => { |
|
|
|
const processRule: any = { |
|
|
|
rules: {}, // api will check this. It will be removed after api refactored. |
|
|
|
const processRule: ProcessRule = { |
|
|
|
rules: {} as any, // api will check this. It will be removed after api refactored. |
|
|
|
mode: segmentationType, |
|
|
|
} |
|
|
|
if (segmentationType === SegmentType.CUSTOM) { |
|
|
|
@@ -220,37 +224,35 @@ const StepTwo = ({ |
|
|
|
}) as NotionInfo[] |
|
|
|
} |
|
|
|
|
|
|
|
const getFileIndexingEstimateParams = (docForm: DocForm) => { |
|
|
|
let params |
|
|
|
const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => { |
|
|
|
if (dataSourceType === DataSourceType.FILE) { |
|
|
|
params = { |
|
|
|
return { |
|
|
|
info_list: { |
|
|
|
data_source_type: dataSourceType, |
|
|
|
file_info_list: { |
|
|
|
file_ids: files.map(file => file.id), |
|
|
|
file_ids: files.map(file => file.id) as string[], |
|
|
|
}, |
|
|
|
}, |
|
|
|
indexing_technique: getIndexing_technique(), |
|
|
|
indexing_technique: getIndexing_technique() as string, |
|
|
|
process_rule: getProcessRule(), |
|
|
|
doc_form: docForm, |
|
|
|
doc_language: docLanguage, |
|
|
|
dataset_id: datasetId, |
|
|
|
dataset_id: datasetId as string, |
|
|
|
} |
|
|
|
} |
|
|
|
if (dataSourceType === DataSourceType.NOTION) { |
|
|
|
params = { |
|
|
|
return { |
|
|
|
info_list: { |
|
|
|
data_source_type: dataSourceType, |
|
|
|
notion_info_list: getNotionInfo(), |
|
|
|
}, |
|
|
|
indexing_technique: getIndexing_technique(), |
|
|
|
indexing_technique: getIndexing_technique() as string, |
|
|
|
process_rule: getProcessRule(), |
|
|
|
doc_form: docForm, |
|
|
|
doc_language: docLanguage, |
|
|
|
dataset_id: datasetId, |
|
|
|
dataset_id: datasetId as string, |
|
|
|
} |
|
|
|
} |
|
|
|
return params |
|
|
|
} |
|
|
|
|
|
|
|
const getCreationParams = () => { |
|
|
|
@@ -291,7 +293,7 @@ const StepTwo = ({ |
|
|
|
try { |
|
|
|
const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) |
|
|
|
const separator = res.rules.segmentation.separator |
|
|
|
setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n') |
|
|
|
setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') |
|
|
|
setMax(res.rules.segmentation.max_tokens) |
|
|
|
setRules(res.rules.pre_processing_rules) |
|
|
|
setDefaultConfig(res.rules) |
|
|
|
@@ -306,7 +308,7 @@ const StepTwo = ({ |
|
|
|
const rules = documentDetail.dataset_process_rule.rules |
|
|
|
const separator = rules.segmentation.separator |
|
|
|
const max = rules.segmentation.max_tokens |
|
|
|
setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n') |
|
|
|
setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') |
|
|
|
setMax(max) |
|
|
|
setRules(rules.pre_processing_rules) |
|
|
|
setDefaultConfig(rules) |
|
|
|
@@ -330,7 +332,7 @@ const StepTwo = ({ |
|
|
|
res = await createFirstDocument({ |
|
|
|
body: params, |
|
|
|
}) |
|
|
|
updateIndexingTypeCache && updateIndexingTypeCache(indexType) |
|
|
|
updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) |
|
|
|
updateResultCache && updateResultCache(res) |
|
|
|
} |
|
|
|
else { |
|
|
|
@@ -338,7 +340,7 @@ const StepTwo = ({ |
|
|
|
datasetId, |
|
|
|
body: params, |
|
|
|
}) |
|
|
|
updateIndexingTypeCache && updateIndexingTypeCache(indexType) |
|
|
|
updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) |
|
|
|
updateResultCache && updateResultCache(res) |
|
|
|
} |
|
|
|
if (mutateDatasetRes) |
|
|
|
@@ -549,9 +551,9 @@ const StepTwo = ({ |
|
|
|
<div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div> |
|
|
|
<div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div> |
|
|
|
{ |
|
|
|
fileIndexingEstimate |
|
|
|
estimateTokes |
|
|
|
? ( |
|
|
|
<div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(fileIndexingEstimate.total_price)}</span>)</div> |
|
|
|
<div className='text-xs font-medium text-gray-800'>{formatNumber(estimateTokes.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(estimateTokes.total_price)}</span>)</div> |
|
|
|
) |
|
|
|
: ( |
|
|
|
<div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div> |