|
|
|
@@ -33,13 +33,14 @@ import { DataSourceType, DocForm } from '@/models/datasets' |
|
|
|
import NotionIcon from '@/app/components/base/notion-icon' |
|
|
|
import Switch from '@/app/components/base/switch' |
|
|
|
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common' |
|
|
|
import { XClose } from '@/app/components/base/icons/src/vender/line/general' |
|
|
|
import { HelpCircle, XClose } from '@/app/components/base/icons/src/vender/line/general' |
|
|
|
import { useDatasetDetailContext } from '@/context/dataset-detail' |
|
|
|
import I18n from '@/context/i18n' |
|
|
|
import { IS_CE_EDITION } from '@/config' |
|
|
|
import { RETRIEVE_METHOD } from '@/types/app' |
|
|
|
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' |
|
|
|
import Tooltip from '@/app/components/base/tooltip' |
|
|
|
import TooltipPlus from '@/app/components/base/tooltip-plus' |
|
|
|
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks' |
|
|
|
import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language' |
|
|
|
|
|
|
|
@@ -99,7 +100,8 @@ const StepTwo = ({ |
|
|
|
const [previewScrolled, setPreviewScrolled] = useState(false) |
|
|
|
const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO) |
|
|
|
const [segmentIdentifier, setSegmentIdentifier] = useState('\\n') |
|
|
|
const [max, setMax] = useState(1000) |
|
|
|
const [max, setMax] = useState(500) |
|
|
|
const [overlap, setOverlap] = useState(50) |
|
|
|
const [rules, setRules] = useState<PreProcessingRule[]>([]) |
|
|
|
const [defaultConfig, setDefaultConfig] = useState<Rules>() |
|
|
|
const hasSetIndexType = !!indexingType |
|
|
|
@@ -171,6 +173,7 @@ const StepTwo = ({ |
|
|
|
if (defaultConfig) { |
|
|
|
setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n') |
|
|
|
setMax(defaultConfig.segmentation.max_tokens) |
|
|
|
setOverlap(defaultConfig.segmentation.chunk_overlap) |
|
|
|
setRules(defaultConfig.pre_processing_rules) |
|
|
|
} |
|
|
|
} |
|
|
|
@@ -207,6 +210,7 @@ const StepTwo = ({ |
|
|
|
segmentation: { |
|
|
|
separator: segmentIdentifier === '\\n' ? '\n' : segmentIdentifier, |
|
|
|
max_tokens: max, |
|
|
|
chunk_overlap: overlap, |
|
|
|
}, |
|
|
|
} |
|
|
|
processRule.rules = ruleObj |
|
|
|
@@ -275,6 +279,10 @@ const StepTwo = ({ |
|
|
|
} = useModelListAndDefaultModelAndCurrentProviderAndModel(3) |
|
|
|
const getCreationParams = () => { |
|
|
|
let params |
|
|
|
if (segmentationType === SegmentType.CUSTOM && overlap > max) { |
|
|
|
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') }) |
|
|
|
return |
|
|
|
} |
|
|
|
if (isSetting) { |
|
|
|
params = { |
|
|
|
original_document_id: documentDetail?.id, |
|
|
|
@@ -337,6 +345,7 @@ const StepTwo = ({ |
|
|
|
const separator = res.rules.segmentation.separator |
|
|
|
setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') |
|
|
|
setMax(res.rules.segmentation.max_tokens) |
|
|
|
setOverlap(res.rules.segmentation.chunk_overlap) |
|
|
|
setRules(res.rules.pre_processing_rules) |
|
|
|
setDefaultConfig(res.rules) |
|
|
|
} |
|
|
|
@@ -350,8 +359,10 @@ const StepTwo = ({ |
|
|
|
const rules = documentDetail.dataset_process_rule.rules |
|
|
|
const separator = rules.segmentation.separator |
|
|
|
const max = rules.segmentation.max_tokens |
|
|
|
const overlap = rules.segmentation.chunk_overlap |
|
|
|
setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') |
|
|
|
setMax(max) |
|
|
|
setOverlap(overlap) |
|
|
|
setRules(rules.pre_processing_rules) |
|
|
|
setDefaultConfig(rules) |
|
|
|
} |
|
|
|
@@ -569,13 +580,35 @@ const StepTwo = ({ |
|
|
|
<input |
|
|
|
type="number" |
|
|
|
className={s.input} |
|
|
|
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} |
|
|
|
placeholder={t('datasetCreation.stepTwo.maxLength') || ''} |
|
|
|
value={max} |
|
|
|
min={1} |
|
|
|
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))} |
|
|
|
/> |
|
|
|
</div> |
|
|
|
</div> |
|
|
|
<div className={s.formRow}> |
|
|
|
<div className='w-full'> |
|
|
|
<div className={s.label}> |
|
|
|
{t('datasetCreation.stepTwo.overlap')} |
|
|
|
<TooltipPlus popupContent={ |
|
|
|
<div className='max-w-[200px]'> |
|
|
|
{t('datasetCreation.stepTwo.overlapTip')} |
|
|
|
</div> |
|
|
|
}> |
|
|
|
<HelpCircle className='ml-1 w-3.5 h-3.5 text-gray-400' /> |
|
|
|
</TooltipPlus> |
|
|
|
</div> |
|
|
|
<input |
|
|
|
type="number" |
|
|
|
className={s.input} |
|
|
|
placeholder={t('datasetCreation.stepTwo.overlap') || ''} |
|
|
|
value={overlap} |
|
|
|
min={1} |
|
|
|
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} |
|
|
|
/> |
|
|
|
</div> |
|
|
|
</div> |
|
|
|
<div className={s.formRow}> |
|
|
|
<div className='w-full flex flex-col gap-1'> |
|
|
|
<div className={s.label}>{t('datasetCreation.stepTwo.rules')}</div> |