| @@ -1,4 +1,4 @@ | |||
| x-shared-env: &shared-api-worker-env | |||
| x-shared-env: &shared-api-worker-env | |||
| services: | |||
| # API service | |||
| api: | |||
| @@ -57,6 +57,7 @@ services: | |||
| TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} | |||
| CSP_WHITELIST: ${CSP_WHITELIST:-} | |||
| TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-} | |||
| INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-} | |||
| # The postgres database. | |||
| db: | |||
| @@ -448,6 +448,7 @@ services: | |||
| TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} | |||
| CSP_WHITELIST: ${CSP_WHITELIST:-} | |||
| TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-} | |||
| INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-} | |||
| # The postgres database. | |||
| db: | |||
| @@ -28,3 +28,6 @@ NEXT_PUBLIC_CSP_WHITELIST= | |||
| # The maximum number of top-k value for RAG. | |||
| NEXT_PUBLIC_TOP_K_MAX_VALUE=10 | |||
| # The maximum number of tokens for segmentation | |||
| NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000 | |||
| @@ -98,6 +98,7 @@ export enum IndexingType { | |||
| const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' | |||
| const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500 | |||
| const DEFAULT_OVERLAP = 50 | |||
| const MAXIMUM_CHUNK_TOKEN_LENGTH = parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10) | |||
| type ParentChildConfig = { | |||
| chunkForContext: ParentMode | |||
| @@ -163,7 +164,7 @@ const StepTwo = ({ | |||
| doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER)) | |||
| }, []) | |||
| const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH) // default chunk length | |||
| const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000) | |||
| const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(MAXIMUM_CHUNK_TOKEN_LENGTH) | |||
| const [overlap, setOverlap] = useState(DEFAULT_OVERLAP) | |||
| const [rules, setRules] = useState<PreProcessingRule[]>([]) | |||
| const [defaultConfig, setDefaultConfig] = useState<Rules>() | |||
| @@ -342,8 +343,8 @@ const StepTwo = ({ | |||
| } | |||
| const updatePreview = () => { | |||
| if (segmentationType === ProcessMode.general && maxChunkLength > 4000) { | |||
| Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) | |||
| if (segmentationType === ProcessMode.general && maxChunkLength > MAXIMUM_CHUNK_TOKEN_LENGTH) { | |||
| Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: MAXIMUM_CHUNK_TOKEN_LENGTH }) }) | |||
| return | |||
| } | |||
| fetchEstimate() | |||
| @@ -393,7 +394,7 @@ const StepTwo = ({ | |||
| score_threshold_enabled: false, | |||
| score_threshold: 0.5, | |||
| }) | |||
| // eslint-disable-next-line react-hooks/exhaustive-deps | |||
| // eslint-disable-next-line react-hooks/exhaustive-deps | |||
| }, [rerankDefaultModel, isRerankDefaultModelValid]) | |||
| const getCreationParams = () => { | |||
| @@ -39,6 +39,8 @@ export const DelimiterInput: FC<InputProps & { tooltip?: string }> = (props) => | |||
| } | |||
| export const MaxLengthInput: FC<InputNumberProps> = (props) => { | |||
| const maxValue = parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10) | |||
| const { t } = useTranslation() | |||
| return <FormField label={<div className='system-sm-semibold mb-1'> | |||
| {t('datasetCreation.stepTwo.maxLength')} | |||
| @@ -46,8 +48,8 @@ export const MaxLengthInput: FC<InputNumberProps> = (props) => { | |||
| <InputNumber | |||
| type="number" | |||
| className='h-9' | |||
| placeholder={'≤ 4000'} | |||
| max={4000} | |||
| placeholder={`≤ ${maxValue}`} | |||
| max={maxValue} | |||
| min={1} | |||
| {...props} | |||
| /> | |||
| @@ -45,6 +45,7 @@ const LocaleLayout = ({ | |||
| data-public-site-about={process.env.NEXT_PUBLIC_SITE_ABOUT} | |||
| data-public-text-generation-timeout-ms={process.env.NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS} | |||
| data-public-top-k-max-value={process.env.NEXT_PUBLIC_TOP_K_MAX_VALUE} | |||
| data-public-indexing-max-segmentation-tokens-length={process.env.NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} | |||
| > | |||
| <BrowserInitor> | |||
| <SentryInitor> | |||
| @@ -24,5 +24,6 @@ export NEXT_TELEMETRY_DISABLED=${NEXT_TELEMETRY_DISABLED} | |||
| export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=${TEXT_GENERATION_TIMEOUT_MS} | |||
| export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST} | |||
| export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE} | |||
| export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} | |||
| pm2 start ./pm2.json --no-daemon | |||