Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: crazywoola <427733928@qq.com>tags/0.3.24
| 'use client' | |||||
| import type { FC } from 'react' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| import CopyFeedback from '@/app/components/base/copy-feedback' | |||||
| import SecretKeyButton from '@/app/components/develop/secret-key/secret-key-button' | |||||
| import { randomString } from '@/utils' | |||||
| type ApiServerProps = { | |||||
| apiBaseUrl: string | |||||
| } | |||||
| const ApiServer: FC<ApiServerProps> = ({ | |||||
| apiBaseUrl, | |||||
| }) => { | |||||
| const { t } = useTranslation() | |||||
| return ( | |||||
| <div className='flex items-center'> | |||||
| <div className='flex items-center mr-2 pl-1.5 pr-1 h-8 bg-white/80 border-[0.5px] border-white rounded-lg'> | |||||
| <div className='mr-0.5 px-1.5 h-5 border border-gray-200 text-[11px] text-gray-500 rounded-md'>{t('appApi.apiServer')}</div> | |||||
| <div className='px-1 w-[248px] text-[13px] font-medium text-gray-800'>{apiBaseUrl}</div> | |||||
| <div className='mx-1 w-[1px] h-[14px] bg-gray-200'></div> | |||||
| <CopyFeedback | |||||
| content={apiBaseUrl} | |||||
| selectorId={randomString(8)} | |||||
| className={'!w-6 !h-6 hover:bg-gray-200'} | |||||
| /> | |||||
| </div> | |||||
| <div className='flex items-center mr-2 px-3 h-8 bg-[#ECFDF3] text-xs font-semibold text-[#039855] rounded-lg border-[0.5px] border-[#D1FADF]'> | |||||
| {t('appApi.ok')} | |||||
| </div> | |||||
| <SecretKeyButton | |||||
| className='flex-shrink-0 !h-8 bg-white' | |||||
| textCls='!text-gray-700 font-medium' | |||||
| iconCls='stroke-[1.2px]' | |||||
| /> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default ApiServer |
| 'use client' | |||||
| import { useRef, useState } from 'react' | |||||
| import { useTranslation } from 'react-i18next' | |||||
| import useSWR from 'swr' | |||||
| import Datasets from './Datasets' | |||||
| import DatasetFooter from './DatasetFooter' | |||||
| import ApiServer from './ApiServer' | |||||
| import Doc from './Doc' | |||||
| import TabSlider from '@/app/components/base/tab-slider' | |||||
| import { fetchDatasetApiBaseUrl } from '@/service/datasets' | |||||
| const Container = () => { | |||||
| const { t } = useTranslation() | |||||
| const options = [ | |||||
| { | |||||
| value: 'dataset', | |||||
| text: t('dataset.datasets'), | |||||
| }, | |||||
| { | |||||
| value: 'api', | |||||
| text: t('dataset.datasetsApi'), | |||||
| }, | |||||
| ] | |||||
| const [activeTab, setActiveTab] = useState('dataset') | |||||
| const containerRef = useRef<HTMLDivElement>(null) | |||||
| const { data } = useSWR(activeTab === 'dataset' ? null : '/datasets/api-base-info', fetchDatasetApiBaseUrl) | |||||
| return ( | |||||
| <div ref={containerRef} className='grow relative flex flex-col bg-gray-100 overflow-y-auto'> | |||||
| <div className='sticky top-0 flex justify-between pt-4 px-12 pb-2 h-14 bg-gray-100 z-10'> | |||||
| <TabSlider | |||||
| value={activeTab} | |||||
| onChange={newActiveTab => setActiveTab(newActiveTab)} | |||||
| options={options} | |||||
| /> | |||||
| { | |||||
| activeTab === 'api' && ( | |||||
| <ApiServer apiBaseUrl={data?.api_base_url || ''} /> | |||||
| ) | |||||
| } | |||||
| </div> | |||||
| { | |||||
| activeTab === 'dataset' && ( | |||||
| <div className=''> | |||||
| <Datasets containerRef={containerRef}/> | |||||
| <DatasetFooter /> | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| { | |||||
| activeTab === 'api' && ( | |||||
| <Doc apiBaseUrl={data?.api_base_url || ''} /> | |||||
| ) | |||||
| } | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default Container |
| import DatasetCard from './DatasetCard' | import DatasetCard from './DatasetCard' | ||||
| import type { DataSetListResponse } from '@/models/datasets' | import type { DataSetListResponse } from '@/models/datasets' | ||||
| import { fetchDatasets } from '@/service/datasets' | import { fetchDatasets } from '@/service/datasets' | ||||
| import { useAppContext, useSelector } from '@/context/app-context' | |||||
| import { useAppContext } from '@/context/app-context' | |||||
| const getKey = (pageIndex: number, previousPageData: DataSetListResponse) => { | const getKey = (pageIndex: number, previousPageData: DataSetListResponse) => { | ||||
| if (!pageIndex || previousPageData.has_more) | if (!pageIndex || previousPageData.has_more) | ||||
| return null | return null | ||||
| } | } | ||||
| const Datasets = () => { | |||||
| type Props = { | |||||
| containerRef: React.RefObject<HTMLDivElement> | |||||
| } | |||||
| const Datasets = ({ | |||||
| containerRef, | |||||
| }: Props) => { | |||||
| const { isCurrentWorkspaceManager } = useAppContext() | const { isCurrentWorkspaceManager } = useAppContext() | ||||
| const { data, isLoading, setSize, mutate } = useSWRInfinite(getKey, fetchDatasets, { revalidateFirstPage: false, revalidateAll: true }) | const { data, isLoading, setSize, mutate } = useSWRInfinite(getKey, fetchDatasets, { revalidateFirstPage: false, revalidateAll: true }) | ||||
| const loadingStateRef = useRef(false) | const loadingStateRef = useRef(false) | ||||
| const pageContainerRef = useSelector(state => state.pageContainerRef) | |||||
| const anchorRef = useRef<HTMLAnchorElement>(null) | const anchorRef = useRef<HTMLAnchorElement>(null) | ||||
| useEffect(() => { | useEffect(() => { | ||||
| useEffect(() => { | useEffect(() => { | ||||
| const onScroll = debounce(() => { | const onScroll = debounce(() => { | ||||
| if (!loadingStateRef.current) { | if (!loadingStateRef.current) { | ||||
| const { scrollTop, clientHeight } = pageContainerRef.current! | |||||
| const { scrollTop, clientHeight } = containerRef.current! | |||||
| const anchorOffset = anchorRef.current!.offsetTop | const anchorOffset = anchorRef.current!.offsetTop | ||||
| if (anchorOffset - scrollTop - clientHeight < 100) | if (anchorOffset - scrollTop - clientHeight < 100) | ||||
| setSize(size => size + 1) | setSize(size => size + 1) | ||||
| } | } | ||||
| }, 50) | }, 50) | ||||
| pageContainerRef.current?.addEventListener('scroll', onScroll) | |||||
| return () => pageContainerRef.current?.removeEventListener('scroll', onScroll) | |||||
| containerRef.current?.addEventListener('scroll', onScroll) | |||||
| return () => containerRef.current?.removeEventListener('scroll', onScroll) | |||||
| }, []) | }, []) | ||||
| return ( | return ( | ||||
| <nav className='grid content-start grid-cols-1 gap-4 px-12 pt-8 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4 grow shrink-0'> | |||||
| <nav className='grid content-start grid-cols-1 gap-4 px-12 pt-2 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4 grow shrink-0'> | |||||
| { isCurrentWorkspaceManager && <NewDatasetCard ref={anchorRef} /> } | { isCurrentWorkspaceManager && <NewDatasetCard ref={anchorRef} /> } | ||||
| {data?.map(({ data: datasets }) => datasets.map(dataset => ( | {data?.map(({ data: datasets }) => datasets.map(dataset => ( | ||||
| <DatasetCard key={dataset.id} dataset={dataset} onDelete={mutate} />), | <DatasetCard key={dataset.id} dataset={dataset} onDelete={mutate} />), |
| 'use client' | |||||
| import type { FC } from 'react' | |||||
| import { useContext } from 'use-context-selector' | |||||
| import TemplateEn from './template/template.en.mdx' | |||||
| import TemplateZh from './template/template.zh.mdx' | |||||
| import I18n from '@/context/i18n' | |||||
| type DocProps = { | |||||
| apiBaseUrl: string | |||||
| } | |||||
| const Doc: FC<DocProps> = ({ | |||||
| apiBaseUrl, | |||||
| }) => { | |||||
| const { locale } = useContext(I18n) | |||||
| return ( | |||||
| <article className='mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'> | |||||
| { | |||||
| locale === 'en' | |||||
| ? <TemplateEn apiBaseUrl={apiBaseUrl} /> | |||||
| : <TemplateZh apiBaseUrl={apiBaseUrl} /> | |||||
| } | |||||
| </article> | |||||
| ) | |||||
| } | |||||
| export default Doc |
| import Datasets from './Datasets' | |||||
| import DatasetFooter from './DatasetFooter' | |||||
| import Container from './Container' | |||||
| const AppList = async () => { | const AppList = async () => { | ||||
| return ( | return ( | ||||
| <div className='flex flex-col overflow-auto bg-gray-100 shrink-0 grow'> | |||||
| <Datasets /> | |||||
| <DatasetFooter /> | |||||
| </div > | |||||
| <Container /> | |||||
| ) | ) | ||||
| } | } | ||||
| import { CodeGroup } from '@/app/components/develop/code.tsx' | |||||
| import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx' | |||||
| # Dataset API | |||||
| <br/> | |||||
| <br/> | |||||
| <Heading | |||||
| url='/datasets' | |||||
| method='POST' | |||||
| title='Create an empty dataset' | |||||
| name='#create_empty_dataset' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| Dataset name | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST '${apiBaseUrl}/v1/datasets' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "name": "name" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "id": "", | |||||
| "name": "name", | |||||
| "description": null, | |||||
| "provider": "vendor", | |||||
| "permission": "only_me", | |||||
| "data_source_type": null, | |||||
| "indexing_technique": null, | |||||
| "app_count": 0, | |||||
| "document_count": 0, | |||||
| "word_count": 0, | |||||
| "created_by": "", | |||||
| "created_at": 1695636173, | |||||
| "updated_by": "", | |||||
| "updated_at": 1695636173, | |||||
| "embedding_model": null, | |||||
| "embedding_model_provider": null, | |||||
| "embedding_available": null | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets' | |||||
| method='GET' | |||||
| title='Dataset list' | |||||
| name='#dataset_list' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Query | |||||
| <Properties> | |||||
| <Property name='page' type='string' key='page'> | |||||
| Page number | |||||
| </Property> | |||||
| <Property name='limit' type='string' key='limit'> | |||||
| Number of items returned, default 20, range 1-100 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET 'https://api.dify.ai/v1/datasets?page=1&limit=20' \ | |||||
| --header 'Authorization: Bearer {api_key}' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [ | |||||
| { | |||||
| "id": "", | |||||
| "name": "name", | |||||
| "description": "desc", | |||||
| "permission": "only_me", | |||||
| "data_source_type": "upload_file", | |||||
| "indexing_technique": "", | |||||
| "app_count": 2, | |||||
| "document_count": 10, | |||||
| "word_count": 1200, | |||||
| "created_by": "", | |||||
| "created_at": "", | |||||
| "updated_by": "", | |||||
| "updated_at": "" | |||||
| }, | |||||
| ... | |||||
| ], | |||||
| "has_more": true, | |||||
| "limit": 20, | |||||
| "total": 50, | |||||
| "page": 1 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/document/create_by_text' | |||||
| method='POST' | |||||
| title='Create a document from text' | |||||
| name='#create_by_text' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| This api is based on an existing dataset and creates a new document through text based on this dataset. | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| Document name | |||||
| </Property> | |||||
| <Property name='text' type='string' key='text'> | |||||
| Document content | |||||
| </Property> | |||||
| <Property name='indexing_technique' type='string' key='indexing_technique'> | |||||
| Index mode | |||||
| - high_quality High quality: embedding using embedding model, built as vector database index | |||||
| - economy Economy: Build using inverted index of Keyword Table Index | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| Processing rules | |||||
| - mode (string) Cleaning, segmentation mode, automatic / custom | |||||
| - rules (text) Custom rules (in automatic mode, this field is empty) | |||||
| - pre_processing_rules (array[object]) Preprocessing rules | |||||
| - id (string) Unique identifier for the preprocessing rule | |||||
| - enumerate | |||||
| - remove_extra_spaces Replace consecutive spaces, newlines, tabs | |||||
| - remove_urls_emails Delete URL, email address | |||||
| - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. | |||||
| - segmentation (object) segmentation rules | |||||
| - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n | |||||
| - max_tokens Maximum length (token) defaults to 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/document/create_by_text" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_text' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "name": "text", | |||||
| "text": "text", | |||||
| "indexing_technique": "high_quality", | |||||
| "process_rule": { | |||||
| "mode": "automatic" | |||||
| } | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "text.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695690280, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/document/create_by_file' | |||||
| method='POST' | |||||
| title='Create documents from files' | |||||
| name='#create_by_file' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| This api is based on an existing dataset and creates a new document through a file based on this dataset. | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='original_document_id' type='string' key='original_document_id'> | |||||
| Source document ID (optional) | |||||
| - Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document | |||||
| - The source document cannot be an archived document | |||||
| - When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by defaul | |||||
| - When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required | |||||
| </Property> | |||||
| <Property name='file' type='multipart/form-data' key='file'> | |||||
| Files that need to be uploaded. | |||||
| </Property> | |||||
| <Property name='indexing_technique' type='string' key='indexing_technique'> | |||||
| Index mode | |||||
| - high_quality High quality: embedding using embedding model, built as vector database index | |||||
| - economy Economy: Build using inverted index of Keyword Table Index | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| Processing rules | |||||
| - mode (string) Cleaning, segmentation mode, automatic / custom | |||||
| - rules (text) Custom rules (in automatic mode, this field is empty) | |||||
| - pre_processing_rules (array[object]) Preprocessing rules | |||||
| - id (string) Unique identifier for the preprocessing rule | |||||
| - enumerate | |||||
| - remove_extra_spaces Replace consecutive spaces, newlines, tabs | |||||
| - remove_urls_emails Delete URL, email address | |||||
| - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. | |||||
| - segmentation (object) segmentation rules | |||||
| - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n | |||||
| - max_tokens Maximum length (token) defaults to 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/document/create_by_file" | |||||
| targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_file' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ | |||||
| --form 'file=@"/path/to/file"' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "Dify.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695308667, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/update_by_text' | |||||
| method='POST' | |||||
| title='Update document via text' | |||||
| name='#update_by_text' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| This api is based on an existing dataset and updates the document through text based on this dataset. | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| Document name (optional) | |||||
| </Property> | |||||
| <Property name='text' type='string' key='text'> | |||||
| Document content (optional) | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| Processing rules | |||||
| - mode (string) Cleaning, segmentation mode, automatic / custom | |||||
| - rules (text) Custom rules (in automatic mode, this field is empty) | |||||
| - pre_processing_rules (array[object]) Preprocessing rules | |||||
| - id (string) Unique identifier for the preprocessing rule | |||||
| - enumerate | |||||
| - remove_extra_spaces Replace consecutive spaces, newlines, tabs | |||||
| - remove_urls_emails Delete URL, email address | |||||
| - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. | |||||
| - segmentation (object) segmentation rules | |||||
| - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n | |||||
| - max_tokens Maximum length (token) defaults to 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/update_by_text" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "name": "name", | |||||
| "text": "text" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "name.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695308667, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/update_by_file' | |||||
| method='POST' | |||||
| title='Update a document from a file' | |||||
| name='#update_by_file' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| This api is based on an existing dataset, and updates documents through files based on this dataset | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| Document name (optional) | |||||
| </Property> | |||||
| <Property name='file' type='multipart/form-data' key='file'> | |||||
| Files to be uploaded | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| Processing rules | |||||
| - mode (string) Cleaning, segmentation mode, automatic / custom | |||||
| - rules (text) Custom rules (in automatic mode, this field is empty) | |||||
| - pre_processing_rules (array[object]) Preprocessing rules | |||||
| - id (string) Unique identifier for the preprocessing rule | |||||
| - enumerate | |||||
| - remove_extra_spaces Replace consecutive spaces, newlines, tabs | |||||
| - remove_urls_emails Delete URL, email address | |||||
| - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. | |||||
| - segmentation (object) segmentation rules | |||||
| - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n | |||||
| - max_tokens Maximum length (token) defaults to 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/update_by_file" | |||||
| targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/{document_id}/create_by_file' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ | |||||
| --form 'file=@"/path/to/file"' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "Dify.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695308667, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "20230921150427533684" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/batch/{batch}/indexing-status' | |||||
| method='GET' | |||||
| title='Get document embedding status (progress)' | |||||
| name='#indexing_status' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| <Property name='batch' type='string' key='batch'> | |||||
| Batch number of uploaded documents | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="GET" | |||||
| label="/datasets/{dataset_id}/batch/{batch}/indexing-status" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data":[{ | |||||
| "id": "", | |||||
| "indexing_status": "indexing", | |||||
| "processing_started_at": 1681623462.0, | |||||
| "parsing_completed_at": 1681623462.0, | |||||
| "cleaning_completed_at": 1681623462.0, | |||||
| "splitting_completed_at": 1681623462.0, | |||||
| "completed_at": null, | |||||
| "paused_at": null, | |||||
| "error": null, | |||||
| "stopped_at": null, | |||||
| "completed_segments": 24, | |||||
| "total_segments": 100 | |||||
| }] | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}' | |||||
| method='DELETE' | |||||
| title='Delete document' | |||||
| name='#delete_document' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="DELETE" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}" | |||||
| targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request DELETE 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "result": "success" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents' | |||||
| method='GET' | |||||
| title='Dataset document list' | |||||
| name='#dataset_document_list' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Path Query | |||||
| <Properties> | |||||
| <Property name='keyword' type='string' key='keyword'> | |||||
| Search keywords, currently only search document names(optional) | |||||
| </Property> | |||||
| <Property name='page' type='string' key='page'> | |||||
| Page number(optional) | |||||
| </Property> | |||||
| <Property name='limit' type='string' key='limit'> | |||||
| Number of items returned, default 20, range 1-100(optional) | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="GET" | |||||
| label="/datasets/{dataset_id}/documents" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [ | |||||
| { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "file_upload", | |||||
| "data_source_info": null, | |||||
| "dataset_process_rule_id": null, | |||||
| "name": "dify", | |||||
| "created_from": "", | |||||
| "created_by": "", | |||||
| "created_at": 1681623639, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false | |||||
| }, | |||||
| ], | |||||
| "has_more": false, | |||||
| "limit": 20, | |||||
| "total": 9, | |||||
| "page": 1 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments' | |||||
| method='POST' | |||||
| title='Add segment' | |||||
| name='#create_new_segment' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Dataset ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='segments' type='object list' key='segments'> | |||||
| segments (object list) Segmented content | |||||
| - content (text) Text content/question content, required | |||||
| - answer(text) Answer content, if the mode of the data set is qa mode, pass the value(optional) | |||||
| - keywords(list) Keywords(optional) | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/segments' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "segments": [ | |||||
| { | |||||
| "content": "1", | |||||
| "answer": "1", | |||||
| "keywords": ["a"] | |||||
| } | |||||
| ] | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [{ | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "document_id": "", | |||||
| "content": "1", | |||||
| "answer": "1", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "keywords": [ | |||||
| "a" | |||||
| ], | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "hit_count": 0, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| }], | |||||
| "doc_form": "text_model" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| Error message | |||||
| - **document_indexing**: Document indexing failed | |||||
| - **provider_not_initialize**: Embedding model is not configured | |||||
| - **not_found**, Document does not exist | |||||
| - **dataset_name_duplicate**: Duplicate dataset name | |||||
| - **provider_quota_exceeded**: Model quota exceeds limit | |||||
| - **dataset_not_initialized**: The dataset has not been initialized yet | |||||
| - **unsupported_file_type**: Unsupported file types. | |||||
| - Currently only supports, txt, markdown, md, pdf, html, htm, xlsx, docx, csv | |||||
| - **too_many_files**: There are too many files. Currently, only a single file is uploaded | |||||
| - **file_too_large*: The file is too large, support below 15M based on you environment configuration |
| import { CodeGroup } from '@/app/components/develop/code.tsx' | |||||
| import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx' | |||||
| # 数据集 API | |||||
| <br/> | |||||
| <br/> | |||||
| <Heading | |||||
| url='/datasets' | |||||
| method='POST' | |||||
| title='创建空数据集' | |||||
| name='#create_empty_dataset' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| 数据集名称 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "name": "name" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "id": "", | |||||
| "name": "name", | |||||
| "description": null, | |||||
| "provider": "vendor", | |||||
| "permission": "only_me", | |||||
| "data_source_type": null, | |||||
| "indexing_technique": null, | |||||
| "app_count": 0, | |||||
| "document_count": 0, | |||||
| "word_count": 0, | |||||
| "created_by": "", | |||||
| "created_at": 1695636173, | |||||
| "updated_by": "", | |||||
| "updated_at": 1695636173, | |||||
| "embedding_model": null, | |||||
| "embedding_model_provider": null, | |||||
| "embedding_available": null | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets' | |||||
| method='GET' | |||||
| title='数据集列表' | |||||
| name='#dataset_list' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Query | |||||
| <Properties> | |||||
| <Property name='page' type='string' key='page'> | |||||
| 页码 | |||||
| </Property> | |||||
| <Property name='limit' type='string' key='limit'> | |||||
| 返回条数,默认 20,范围 1-100 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET 'https://api.dify.ai/v1/datasets?page=1&limit=20' \ | |||||
| --header 'Authorization: Bearer {api_key}' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [ | |||||
| { | |||||
| "id": "", | |||||
| "name": "数据集名称", | |||||
| "description": "描述信息", | |||||
| "permission": "only_me", | |||||
| "data_source_type": "upload_file", | |||||
| "indexing_technique": "", | |||||
| "app_count": 2, | |||||
| "document_count": 10, | |||||
| "word_count": 1200, | |||||
| "created_by": "", | |||||
| "created_at": "", | |||||
| "updated_by": "", | |||||
| "updated_at": "" | |||||
| }, | |||||
| ... | |||||
| ], | |||||
| "has_more": true, | |||||
| "limit": 20, | |||||
| "total": 50, | |||||
| "page": 1 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/document/create_by_text' | |||||
| method='POST' | |||||
| title='通过文本创建文档' | |||||
| name='#create_by_text' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| 此接口基于已存在数据集,在此数据集的基础上通过文本创建新的文档 | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| 文档名称 | |||||
| </Property> | |||||
| <Property name='text' type='string' key='text'> | |||||
| 文档内容 | |||||
| </Property> | |||||
| <Property name='indexing_technique' type='string' key='indexing_technique'> | |||||
| 索引方式 | |||||
| - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 | |||||
| - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| 处理规则 | |||||
| - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 | |||||
| - rules (text) 自定义规则(自动模式下,该字段为空) | |||||
| - pre_processing_rules (array[object]) 预处理规则 | |||||
| - id (string) 预处理规则的唯一标识符 | |||||
| - 枚举: | |||||
| - remove_extra_spaces 替换连续空格、换行符、制表符 | |||||
| - remove_urls_emails 删除 URL、电子邮件地址 | |||||
| - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 | |||||
| - segmentation (object) 分段规则 | |||||
| - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n | |||||
| - max_tokens 最大长度 (token) 默认为 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/document/create_by_text" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_text' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "name": "text", | |||||
| "text": "text", | |||||
| "indexing_technique": "high_quality", | |||||
| "process_rule": { | |||||
| "mode": "automatic" | |||||
| } | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "text.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695690280, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/document/create_by_file' | |||||
| method='POST' | |||||
| title='通过文件创建文档 ' | |||||
| name='#create_by_file' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| 此接口基于已存在数据集,在此数据集的基础上通过文件创建新的文档 | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='original_document_id' type='string' key='original_document_id'> | |||||
| 源文档 ID (选填) | |||||
| - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制 | |||||
| - 源文档不可为归档的文档 | |||||
| - 当传入 original_document_id 时,代表文档进行更新操作,process_rule 为可填项目,不填默认使用源文档的分段方式 | |||||
| - 未传入 original_document_id 时,代表文档进行新增操作,process_rule 为必填 | |||||
| </Property> | |||||
| <Property name='file' type='multipart/form-data' key='file'> | |||||
| 需要上传的文件。 | |||||
| </Property> | |||||
| <Property name='indexing_technique' type='string' key='indexing_technique'> | |||||
| 索引方式 | |||||
| - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 | |||||
| - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| 处理规则 | |||||
| - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义。 | |||||
| - rules (text) 自定义规则(自动模式下,该字段为空) | |||||
| - pre_processing_rules (array[object]) 预处理规则 | |||||
| - id (string) 预处理规则的唯一标识符 | |||||
| - 枚举: | |||||
| - remove_extra_spaces 替换连续空格、换行符、制表符 | |||||
| - remove_urls_emails 删除 URL、电子邮件地址 | |||||
| - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值。 | |||||
| - segmentation (object) 分段规则 | |||||
| - separator 自定义分段标识符,目前仅允许设置一个分隔符,默认为 \n | |||||
| - max_tokens 最大长度 (token) 默认为 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/document/create_by_file" | |||||
| targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_file' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ | |||||
| --form 'file=@"/path/to/file"' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "Dify.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695308667, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/update_by_text' | |||||
| method='POST' | |||||
| title='通过文本更新文档 ' | |||||
| name='#update_by_text' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| 此接口基于已存在数据集,在此数据集的基础上通过文本更新文档 | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| 文档名称 (选填) | |||||
| </Property> | |||||
| <Property name='text' type='string' key='text'> | |||||
| 文档内容(选填) | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| 处理规则(选填) | |||||
| - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义。 | |||||
| - rules (text) 自定义规则(自动模式下,该字段为空) | |||||
| - pre_processing_rules (array[object]) 预处理规则 | |||||
| - id (string) 预处理规则的唯一标识符 | |||||
| - 枚举: | |||||
| - remove_extra_spaces 替换连续空格、换行符、制表符 | |||||
| - remove_urls_emails 删除 URL、电子邮件地址 | |||||
| - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值。 | |||||
| - segmentation (object) 分段规则 | |||||
| - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n | |||||
| - max_tokens 最大长度 (token) 默认为 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/update_by_text" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "name": "name", | |||||
| "text": "text" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "name.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695308667, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/update_by_file' | |||||
| method='POST' | |||||
| title='通过文件更新文档 ' | |||||
| name='#update_by_file' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| 此接口基于已存在数据集,在此数据集的基础上通过文件更新文档的操作。 | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='name' type='string' key='name'> | |||||
| 文档名称 (选填) | |||||
| </Property> | |||||
| <Property name='file' type='multipart/form-data' key='file'> | |||||
| 需要上传的文件 | |||||
| </Property> | |||||
| <Property name='process_rule' type='object' key='process_rule'> | |||||
| 处理规则(选填) | |||||
| - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义。 | |||||
| - rules (text) 自定义规则(自动模式下,该字段为空) | |||||
| - pre_processing_rules (array[object]) 预处理规则 | |||||
| - id (string) 预处理规则的唯一标识符 | |||||
| - 枚举: | |||||
| - remove_extra_spaces 替换连续空格、换行符、制表符 | |||||
| - remove_urls_emails 删除 URL、电子邮件地址 | |||||
| - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 | |||||
| - segmentation (object) 分段规则 | |||||
| - separator 自定义分段标识符,目前仅允许设置一个分隔符,默认为 \n | |||||
| - max_tokens 最大长度 (token) 默认为 1000 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/update_by_file" | |||||
| targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/{document_id}/create_by_file' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ | |||||
| --form 'file=@"/path/to/file"' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "document": { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "upload_file", | |||||
| "data_source_info": { | |||||
| "upload_file_id": "" | |||||
| }, | |||||
| "dataset_process_rule_id": "", | |||||
| "name": "Dify.txt", | |||||
| "created_from": "api", | |||||
| "created_by": "", | |||||
| "created_at": 1695308667, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false, | |||||
| "display_status": "queuing", | |||||
| "word_count": 0, | |||||
| "hit_count": 0, | |||||
| "doc_form": "text_model" | |||||
| }, | |||||
| "batch": "20230921150427533684" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/batch/{batch}/indexing-status' | |||||
| method='GET' | |||||
| title='获取文档嵌入状态(进度)' | |||||
| name='#indexing_status' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| <Property name='batch' type='string' key='batch'> | |||||
| 上传文档的批次号 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="GET" | |||||
| label="/datasets/{dataset_id}/batch/{batch}/indexing-status" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data":[{ | |||||
| "id": "", | |||||
| "indexing_status": "indexing", | |||||
| "processing_started_at": 1681623462.0, | |||||
| "parsing_completed_at": 1681623462.0, | |||||
| "cleaning_completed_at": 1681623462.0, | |||||
| "splitting_completed_at": 1681623462.0, | |||||
| "completed_at": null, | |||||
| "paused_at": null, | |||||
| "error": null, | |||||
| "stopped_at": null, | |||||
| "completed_segments": 24, | |||||
| "total_segments": 100 | |||||
| }] | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}' | |||||
| method='DELETE' | |||||
| title='删除文档' | |||||
| name='#delete_document' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="DELETE" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}" | |||||
| targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request DELETE 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "result": "success" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents' | |||||
| method='GET' | |||||
| title='数据集文档列表' | |||||
| name='#dataset_document_list' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Path Query | |||||
| <Properties> | |||||
| <Property name='keyword' type='string' key='keyword'> | |||||
| 搜索关键词,可选,目前仅搜索文档名称 | |||||
| </Property> | |||||
| <Property name='page' type='string' key='page'> | |||||
| 页码,可选 | |||||
| </Property> | |||||
| <Property name='limit' type='string' key='limit'> | |||||
| 返回条数,可选,默认 20,范围 1-100 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="GET" | |||||
| label="/datasets/{dataset_id}/documents" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [ | |||||
| { | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "data_source_type": "file_upload", | |||||
| "data_source_info": null, | |||||
| "dataset_process_rule_id": null, | |||||
| "name": "dify", | |||||
| "created_from": "", | |||||
| "created_by": "", | |||||
| "created_at": 1681623639, | |||||
| "tokens": 0, | |||||
| "indexing_status": "waiting", | |||||
| "error": null, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "archived": false | |||||
| }, | |||||
| ], | |||||
| "has_more": false, | |||||
| "limit": 20, | |||||
| "total": 9, | |||||
| "page": 1 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments' | |||||
| method='POST' | |||||
| title='新增分段' | |||||
| name='#create_new_segment' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 数据集 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='segments' type='object list' key='segments'> | |||||
| segments (object list) 分段内容 | |||||
| - content (text) 文本内容/问题内容,必填 | |||||
| - answer(text) 答案内容,非必填,如果数据集的模式为qa模式则传值 | |||||
| - keywords(list) 关键字,非必填 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/segments' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "segments": [ | |||||
| { | |||||
| "content": "1", | |||||
| "answer": "1", | |||||
| "keywords": ["a"] | |||||
| } | |||||
| ] | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [{ | |||||
| "id": "", | |||||
| "position": 1, | |||||
| "document_id": "", | |||||
| "content": "1", | |||||
| "answer": "1", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "keywords": [ | |||||
| "a" | |||||
| ], | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "hit_count": 0, | |||||
| "enabled": true, | |||||
| "disabled_at": null, | |||||
| "disabled_by": null, | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| }], | |||||
| "doc_form": "text_model" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| --- | |||||
| 错误信息 | |||||
| - **document_indexing**: 文档索引失败 | |||||
| - **provider_not_initialize**: Embedding 模型未配置 | |||||
| - **not_found**,文档不存在 | |||||
| - **dataset_name_duplicate**: 数据集名称重复 | |||||
| - **provider_quota_exceeded**: 模型额度超过限制 | |||||
| - **dataset_not_initialized**: 数据集还未初始化 | |||||
| - **unsupported_file_type**: 不支持的文件类型 | |||||
| - 目前只支持:txt, markdown, md, pdf, html, htm, xlsx, docx, csv | |||||
| - **too_many_files**: 文件数量过多,暂时只支持单一文件上传 | |||||
| - **file_too_large*: 文件太大,默认支持15M以下, 具体需要参考环境变量配置 |
| import type { FC } from 'react' | |||||
| type Option = { | |||||
| value: string | |||||
| text: string | |||||
| } | |||||
| type TabSliderProps = { | |||||
| value: string | |||||
| onChange: (v: string) => void | |||||
| options: Option[] | |||||
| } | |||||
| const TabSlider: FC<TabSliderProps> = ({ | |||||
| value, | |||||
| onChange, | |||||
| options, | |||||
| }) => { | |||||
| const currentIndex = options.findIndex(option => option.value === value) | |||||
| const current = options[currentIndex] | |||||
| return ( | |||||
| <div className='relative flex p-0.5 rounded-lg bg-gray-200'> | |||||
| { | |||||
| options.map((option, index) => ( | |||||
| <div | |||||
| key={option.value} | |||||
| className={` | |||||
| flex justify-center items-center w-[118px] h-7 text-[13px] | |||||
| font-semibold text-gray-600 rounded-[7px] cursor-pointer | |||||
| hover:bg-gray-50 | |||||
| ${index !== options.length - 1 && 'mr-[1px]'} | |||||
| `} | |||||
| onClick={() => onChange(option.value)} | |||||
| > | |||||
| {option.text} | |||||
| </div> | |||||
| )) | |||||
| } | |||||
| { | |||||
| current && ( | |||||
| <div | |||||
| className={` | |||||
| absolute flex justify-center items-center w-[118px] h-7 bg-white text-[13px] font-semibold text-primary-600 | |||||
| border-[0.5px] border-gray-200 rounded-[7px] shadow-xs transition-transform | |||||
| `} | |||||
| style={{ transform: `translateX(${currentIndex * 118 + 1}px)` }} | |||||
| > | |||||
| {current.text} | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| </div> | |||||
| ) | |||||
| } | |||||
| export default TabSlider |
| type ISecretKeyButtonProps = { | type ISecretKeyButtonProps = { | ||||
| className?: string | className?: string | ||||
| appId: string | |||||
| appId?: string | |||||
| iconCls?: string | iconCls?: string | ||||
| textCls?: string | textCls?: string | ||||
| } | } |
| import s from './style.module.css' | import s from './style.module.css' | ||||
| import Modal from '@/app/components/base/modal' | import Modal from '@/app/components/base/modal' | ||||
| import Button from '@/app/components/base/button' | import Button from '@/app/components/base/button' | ||||
| import { createApikey, delApikey, fetchApiKeysList } from '@/service/apps' | |||||
| import { | |||||
| createApikey as createAppApikey, | |||||
| delApikey as delAppApikey, | |||||
| fetchApiKeysList as fetchAppApiKeysList, | |||||
| } from '@/service/apps' | |||||
| import { | |||||
| createApikey as createDatasetApikey, | |||||
| delApikey as delDatasetApikey, | |||||
| fetchApiKeysList as fetchDatasetApiKeysList, | |||||
| } from '@/service/datasets' | |||||
| import type { CreateApiKeyResponse } from '@/models/app' | import type { CreateApiKeyResponse } from '@/models/app' | ||||
| import Tooltip from '@/app/components/base/tooltip' | import Tooltip from '@/app/components/base/tooltip' | ||||
| import Loading from '@/app/components/base/loading' | import Loading from '@/app/components/base/loading' | ||||
| type ISecretKeyModalProps = { | type ISecretKeyModalProps = { | ||||
| isShow: boolean | isShow: boolean | ||||
| appId: string | |||||
| appId?: string | |||||
| onClose: () => void | onClose: () => void | ||||
| } | } | ||||
| const [isVisible, setVisible] = useState(false) | const [isVisible, setVisible] = useState(false) | ||||
| const [newKey, setNewKey] = useState<CreateApiKeyResponse | undefined>(undefined) | const [newKey, setNewKey] = useState<CreateApiKeyResponse | undefined>(undefined) | ||||
| const { mutate } = useSWRConfig() | const { mutate } = useSWRConfig() | ||||
| const commonParams = { url: `/apps/${appId}/api-keys`, params: {} } | |||||
| const commonParams = appId | |||||
| ? { url: `/apps/${appId}/api-keys`, params: {} } | |||||
| : { url: '/datasets/api-keys', params: {} } | |||||
| const fetchApiKeysList = appId ? fetchAppApiKeysList : fetchDatasetApiKeysList | |||||
| const { data: apiKeysList } = useSWR(commonParams, fetchApiKeysList) | const { data: apiKeysList } = useSWR(commonParams, fetchApiKeysList) | ||||
| const [delKeyID, setDelKeyId] = useState('') | const [delKeyID, setDelKeyId] = useState('') | ||||
| if (!delKeyID) | if (!delKeyID) | ||||
| return | return | ||||
| await delApikey({ url: `/apps/${appId}/api-keys/${delKeyID}`, params: {} }) | |||||
| const delApikey = appId ? delAppApikey : delDatasetApikey | |||||
| const params = appId | |||||
| ? { url: `/apps/${appId}/api-keys/${delKeyID}`, params: {} } | |||||
| : { url: `/datasets/api-keys/${delKeyID}`, params: {} } | |||||
| await delApikey(params) | |||||
| mutate(commonParams) | mutate(commonParams) | ||||
| } | } | ||||
| const onCreate = async () => { | const onCreate = async () => { | ||||
| const res = await createApikey({ url: `/apps/${appId}/api-keys`, body: {} }) | |||||
| const params = appId | |||||
| ? { url: `/apps/${appId}/api-keys`, body: {} } | |||||
| : { url: '/datasets/api-keys', body: {} } | |||||
| const createApikey = appId ? createAppApikey : createDatasetApikey | |||||
| const res = await createApikey(params) | |||||
| setVisible(true) | setVisible(true) | ||||
| setNewKey(res) | setNewKey(res) | ||||
| mutate(commonParams) | mutate(commonParams) |
| never: '从未', | never: '从未', | ||||
| apiKeyModal: { | apiKeyModal: { | ||||
| apiSecretKey: 'API 密钥', | apiSecretKey: 'API 密钥', | ||||
| apiSecretKeyTips: '如果不想你的应用 API 被滥用,请保护好你的 API Key :) 最佳实践是避免在前端代码中明文引用。', | |||||
| apiSecretKeyTips: '如果不想你的 API 被滥用,请保护好你的 API Key :) 最佳实践是避免在前端代码中明文引用。', | |||||
| createNewSecretKey: '创建密钥', | createNewSecretKey: '创建密钥', | ||||
| secretKey: '密钥', | secretKey: '密钥', | ||||
| created: '创建时间', | created: '创建时间', |
| intro6: ' as a standalone ChatGPT index plug-in to publish', | intro6: ' as a standalone ChatGPT index plug-in to publish', | ||||
| unavailable: 'Unavailable', | unavailable: 'Unavailable', | ||||
| unavailableTip: 'Embedding model is not available, the default embedding model needs to be configured', | unavailableTip: 'Embedding model is not available, the default embedding model needs to be configured', | ||||
| datasets: 'DATASETS', | |||||
| datasetsApi: 'API', | |||||
| } | } | ||||
| export default translation | export default translation |
| intro6: '为独立的 ChatGPT 插件发布使用', | intro6: '为独立的 ChatGPT 插件发布使用', | ||||
| unavailable: '不可用', | unavailable: '不可用', | ||||
| unavailableTip: '由于 embedding 模型不可用,需要配置默认 embedding 模型', | unavailableTip: '由于 embedding 模型不可用,需要配置默认 embedding 模型', | ||||
| datasets: '数据集', | |||||
| datasetsApi: 'API', | |||||
| } | } | ||||
| export default translation | export default translation |
| createDocumentResponse, | createDocumentResponse, | ||||
| } from '@/models/datasets' | } from '@/models/datasets' | ||||
| import type { CommonResponse, DataSourceNotionWorkspace } from '@/models/common' | import type { CommonResponse, DataSourceNotionWorkspace } from '@/models/common' | ||||
| import type { | |||||
| ApikeysListResponse, | |||||
| CreateApiKeyResponse, | |||||
| } from '@/models/app' | |||||
| // apis for documents in a dataset | // apis for documents in a dataset | ||||
| export const fetchNotionPagePreview: Fetcher<{ content: string }, { workspaceID: string; pageID: string; pageType: string }> = ({ workspaceID, pageID, pageType }) => { | export const fetchNotionPagePreview: Fetcher<{ content: string }, { workspaceID: string; pageID: string; pageType: string }> = ({ workspaceID, pageID, pageType }) => { | ||||
| return get<{ content: string }>(`notion/workspaces/${workspaceID}/pages/${pageID}/${pageType}/preview`) | return get<{ content: string }>(`notion/workspaces/${workspaceID}/pages/${pageID}/${pageType}/preview`) | ||||
| } | } | ||||
| export const fetchApiKeysList: Fetcher<ApikeysListResponse, { url: string; params: Record<string, any> }> = ({ url, params }) => { | |||||
| return get<ApikeysListResponse>(url, params) | |||||
| } | |||||
| export const delApikey: Fetcher<CommonResponse, { url: string; params: Record<string, any> }> = ({ url, params }) => { | |||||
| return del<CommonResponse>(url, params) | |||||
| } | |||||
| export const createApikey: Fetcher<CreateApiKeyResponse, { url: string; body: Record<string, any> }> = ({ url, body }) => { | |||||
| return post<CreateApiKeyResponse>(url, body) | |||||
| } | |||||
| export const fetchDatasetApiBaseUrl: Fetcher<{ api_base_url: string }, string> = (url) => { | |||||
| return get<{ api_base_url: string }>(url) | |||||
| } |