|
|
|
@@ -144,23 +144,18 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from |
|
|
|
|
|
|
|
### Request Body |
|
|
|
<Properties> |
|
|
|
<Property name='original_document_id' type='string' key='original_document_id'> |
|
|
|
Source document ID (optional) |
|
|
|
<Property name='data' type='multipart/form-data json string' key='data'> |
|
|
|
- original_document_id Source document ID (optional) |
|
|
|
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document |
|
|
|
- The source document cannot be an archived document |
|
|
|
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by defaul |
|
|
|
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required |
|
|
|
</Property> |
|
|
|
<Property name='file' type='multipart/form-data' key='file'> |
|
|
|
Files that need to be uploaded. |
|
|
|
</Property> |
|
|
|
<Property name='indexing_technique' type='string' key='indexing_technique'> |
|
|
|
Index mode |
|
|
|
|
|
|
|
- indexing_technique Index mode |
|
|
|
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index |
|
|
|
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index |
|
|
|
</Property> |
|
|
|
<Property name='process_rule' type='object' key='process_rule'> |
|
|
|
Processing rules |
|
|
|
|
|
|
|
- process_rule Processing rules |
|
|
|
- <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom |
|
|
|
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty) |
|
|
|
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules |
|
|
|
@@ -173,6 +168,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from |
|
|
|
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n |
|
|
|
- <code>max_tokens</code> Maximum length (token) defaults to 1000 |
|
|
|
</Property> |
|
|
|
<Property name='file' type='multipart/form-data' key='file'> |
|
|
|
Files that need to be uploaded. |
|
|
|
</Property> |
|
|
|
</Properties> |
|
|
|
</Col> |
|
|
|
<Col sticky> |
|
|
|
@@ -180,7 +178,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from |
|
|
|
title="Request" |
|
|
|
tag="POST" |
|
|
|
label="/datasets/{dataset_id}/document/create_by_file" |
|
|
|
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} |
|
|
|
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} |
|
|
|
> |
|
|
|
```bash {{ title: 'cURL' }} |
|
|
|
curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ |