選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

template.en.mdx 78KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305
  1. {/**
  2. * @typedef Props
  3. * @property {string} apiBaseUrl
  4. */}
  5. import { CodeGroup } from '@/app/components/develop/code.tsx'
  6. import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'
  7. # Knowledge API
  8. <div>
  9. ### Authentication
  10. Service API of Dify authenticates using an `API-Key`.
  11. It is suggested that developers store the `API-Key` in the backend instead of sharing or storing it in the client side to avoid the leakage of the `API-Key`, which may lead to property loss.
  12. All API requests should include your `API-Key` in the **`Authorization`** HTTP Header, as shown below:
  13. <CodeGroup title="Code">
  14. ```javascript
  15. Authorization: Bearer {API_KEY}
  16. ```
  17. </CodeGroup>
  18. </div>
  19. <hr className='ml-0 mr-0' />
  20. <Heading
  21. url='/datasets/{dataset_id}/document/create-by-text'
  22. method='POST'
  23. title='Create a Document from Text'
  24. name='#create-by-text'
  25. />
  26. <Row>
  27. <Col>
  28. This API is based on an existing knowledge and creates a new document through text based on this knowledge.
  29. ### Path
  30. <Properties>
  31. <Property name='dataset_id' type='string' key='dataset_id'>
  32. Knowledge ID
  33. </Property>
  34. </Properties>
  35. ### Request Body
  36. <Properties>
  37. <Property name='name' type='string' key='name'>
  38. Document name
  39. </Property>
  40. <Property name='text' type='string' key='text'>
  41. Document content
  42. </Property>
  43. <Property name='indexing_technique' type='string' key='indexing_technique'>
  44. Index mode
  45. - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
  46. - <code>economy</code> Economy: Build using inverted index of keyword table index
  47. </Property>
  48. <Property name='doc_form' type='string' key='doc_form'>
  49. Format of indexed content
  50. - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form
  51. - <code>hierarchical_model</code> Parent-child mode
  52. - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
  53. </Property>
  54. <Property name='doc_language' type='string' key='doc_language'>
  55. In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
  56. </Property>
  57. <Property name='process_rule' type='object' key='process_rule'>
  58. Processing rules
  59. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  60. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  61. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  62. - <code>id</code> (string) Unique identifier for the preprocessing rule
  63. - enumerate
  64. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  65. - <code>remove_urls_emails</code> Delete URL, email address
  66. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  67. - <code>segmentation</code> (object) Segmentation rules
  68. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  69. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  70. - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
  71. - <code>subchunk_segmentation</code> (object) Child chunk rules
  72. - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
  73. - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
  74. - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
  75. </Property>
  76. <PropertyInstruction>When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used.</PropertyInstruction>
  77. <Property name='retrieval_model' type='object' key='retrieval_model'>
  78. Retrieval model
  79. - <code>search_method</code> (string) Search method
  80. - <code>hybrid_search</code> Hybrid search
  81. - <code>semantic_search</code> Semantic search
  82. - <code>full_text_search</code> Full-text search
  83. - <code>reranking_enable</code> (bool) Whether to enable reranking
  84. - <code>reranking_mode</code> (object) Rerank model configuration
  85. - <code>reranking_provider_name</code> (string) Rerank model provider
  86. - <code>reranking_model_name</code> (string) Rerank model name
  87. - <code>top_k</code> (int) Number of results to return
  88. - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
  89. - <code>score_threshold</code> (float) Score threshold
  90. </Property>
  91. <Property name='embedding_model' type='string' key='embedding_model'>
  92. Embedding model name
  93. </Property>
  94. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  95. Embedding model provider
  96. </Property>
  97. </Properties>
  98. </Col>
  99. <Col sticky>
  100. <CodeGroup
  101. title="Request"
  102. tag="POST"
  103. label="/datasets/{dataset_id}/document/create-by-text"
  104. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
  105. >
  106. ```bash {{ title: 'cURL' }}
  107. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
  108. --header 'Authorization: Bearer {api_key}' \
  109. --header 'Content-Type: application/json' \
  110. --data-raw '{
  111. "name": "text",
  112. "text": "text",
  113. "indexing_technique": "high_quality",
  114. "process_rule": {
  115. "mode": "automatic"
  116. }
  117. }'
  118. ```
  119. </CodeGroup>
  120. <CodeGroup title="Response">
  121. ```json {{ title: 'Response' }}
  122. {
  123. "document": {
  124. "id": "",
  125. "position": 1,
  126. "data_source_type": "upload_file",
  127. "data_source_info": {
  128. "upload_file_id": ""
  129. },
  130. "dataset_process_rule_id": "",
  131. "name": "text.txt",
  132. "created_from": "api",
  133. "created_by": "",
  134. "created_at": 1695690280,
  135. "tokens": 0,
  136. "indexing_status": "waiting",
  137. "error": null,
  138. "enabled": true,
  139. "disabled_at": null,
  140. "disabled_by": null,
  141. "archived": false,
  142. "display_status": "queuing",
  143. "word_count": 0,
  144. "hit_count": 0,
  145. "doc_form": "text_model"
  146. },
  147. "batch": ""
  148. }
  149. ```
  150. </CodeGroup>
  151. </Col>
  152. </Row>
  153. <hr className='ml-0 mr-0' />
  154. <Heading
  155. url='/datasets/{dataset_id}/document/create-by-file'
  156. method='POST'
  157. title='Create a Document from a File'
  158. name='#create-by-file'
  159. />
  160. <Row>
  161. <Col>
  162. This API is based on an existing knowledge and creates a new document through a file based on this knowledge.
  163. ### Path
  164. <Properties>
  165. <Property name='dataset_id' type='string' key='dataset_id'>
  166. Knowledge ID
  167. </Property>
  168. </Properties>
  169. ### Request Body
  170. <Properties>
  171. <Property name='data' type='multipart/form-data json string' key='data'>
  172. - <code>original_document_id</code> Source document ID (optional)
  173. - Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
  174. - The source document cannot be an archived document
  175. - When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default
  176. - When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
  177. - <code>indexing_technique</code> Index mode
  178. - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
  179. - <code>economy</code> Economy: Build using inverted index of keyword table index
  180. - <code>doc_form</code> Format of indexed content
  181. - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form
  182. - <code>hierarchical_model</code> Parent-child mode
  183. - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
  184. - <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
  185. - <code>process_rule</code> Processing rules
  186. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  187. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  188. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  189. - <code>id</code> (string) Unique identifier for the preprocessing rule
  190. - enumerate
  191. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  192. - <code>remove_urls_emails</code> Delete URL, email address
  193. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  194. - <code>segmentation</code> (object) Segmentation rules
  195. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  196. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  197. - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
  198. - <code>subchunk_segmentation</code> (object) Child chunk rules
  199. - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
  200. - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
  201. - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
  202. </Property>
  203. <Property name='file' type='multipart/form-data' key='file'>
  204. Files that need to be uploaded.
  205. </Property>
  206. <PropertyInstruction>When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used.</PropertyInstruction>
  207. <Property name='retrieval_model' type='object' key='retrieval_model'>
  208. Retrieval model
  209. - <code>search_method</code> (string) Search method
  210. - <code>hybrid_search</code> Hybrid search
  211. - <code>semantic_search</code> Semantic search
  212. - <code>full_text_search</code> Full-text search
  213. - <code>reranking_enable</code> (bool) Whether to enable reranking
  214. - <code>reranking_mode</code> (object) Rerank model configuration
  215. - <code>reranking_provider_name</code> (string) Rerank model provider
  216. - <code>reranking_model_name</code> (string) Rerank model name
  217. - <code>top_k</code> (int) Number of results to return
  218. - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
  219. - <code>score_threshold</code> (float) Score threshold
  220. </Property>
  221. <Property name='embedding_model' type='string' key='embedding_model'>
  222. Embedding model name
  223. </Property>
  224. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  225. Embedding model provider
  226. </Property>
  227. </Properties>
  228. </Col>
  229. <Col sticky>
  230. <CodeGroup
  231. title="Request"
  232. tag="POST"
  233. label="/datasets/{dataset_id}/document/create-by-file"
  234. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  235. >
  236. ```bash {{ title: 'cURL' }}
  237. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
  238. --header 'Authorization: Bearer {api_key}' \
  239. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  240. --form 'file=@"/path/to/file"'
  241. ```
  242. </CodeGroup>
  243. <CodeGroup title="Response">
  244. ```json {{ title: 'Response' }}
  245. {
  246. "document": {
  247. "id": "",
  248. "position": 1,
  249. "data_source_type": "upload_file",
  250. "data_source_info": {
  251. "upload_file_id": ""
  252. },
  253. "dataset_process_rule_id": "",
  254. "name": "Dify.txt",
  255. "created_from": "api",
  256. "created_by": "",
  257. "created_at": 1695308667,
  258. "tokens": 0,
  259. "indexing_status": "waiting",
  260. "error": null,
  261. "enabled": true,
  262. "disabled_at": null,
  263. "disabled_by": null,
  264. "archived": false,
  265. "display_status": "queuing",
  266. "word_count": 0,
  267. "hit_count": 0,
  268. "doc_form": "text_model"
  269. },
  270. "batch": ""
  271. }
  272. ```
  273. </CodeGroup>
  274. </Col>
  275. </Row>
  276. <hr className='ml-0 mr-0' />
  277. <Heading
  278. url='/datasets'
  279. method='POST'
  280. title='Create an Empty Knowledge Base'
  281. name='#create_empty_dataset'
  282. />
  283. <Row>
  284. <Col>
  285. ### Request Body
  286. <Properties>
  287. <Property name='name' type='string' key='name'>
  288. Knowledge name
  289. </Property>
  290. <Property name='description' type='string' key='description'>
  291. Knowledge description (optional)
  292. </Property>
  293. <Property name='indexing_technique' type='string' key='indexing_technique'>
  294. Index technique (optional)
  295. If this is not set, embedding_model, embedding_provider_name and retrieval_model will be set to null
  296. - <code>high_quality</code> High quality
  297. - <code>economy</code> Economy
  298. </Property>
  299. <Property name='permission' type='string' key='permission'>
  300. Permission
  301. - <code>only_me</code> Only me
  302. - <code>all_team_members</code> All team members
  303. - <code>partial_members</code> Partial members
  304. </Property>
  305. <Property name='provider' type='string' key='provider'>
  306. Provider (optional, default: vendor)
  307. - <code>vendor</code> Vendor
  308. - <code>external</code> External knowledge
  309. </Property>
  310. <Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
  311. External knowledge API ID (optional)
  312. </Property>
  313. <Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
  314. External knowledge ID (optional)
  315. </Property>
  316. <Property name='embedding_model' type='str' key='embedding_model'>
  317. Embedding model name (optional)
  318. </Property>
  319. <Property name='embedding_provider_name' type='str' key='embedding_provider_name'>
  320. Embedding model provider name (optional)
  321. </Property>
  322. <Property name='retrieval_model' type='object' key='retrieval_model'>
  323. Retrieval model (optional)
  324. - <code>search_method</code> (string) Search method
  325. - <code>hybrid_search</code> Hybrid search
  326. - <code>semantic_search</code> Semantic search
  327. - <code>full_text_search</code> Full-text search
  328. - <code>reranking_enable</code> (bool) Whether to enable reranking
  329. - <code>reranking_model</code> (object) Rerank model configuration
  330. - <code>reranking_provider_name</code> (string) Rerank model provider
  331. - <code>reranking_model_name</code> (string) Rerank model name
  332. - <code>top_k</code> (int) Number of results to return
  333. - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
  334. - <code>score_threshold</code> (float) Score threshold
  335. </Property>
  336. </Properties>
  337. </Col>
  338. <Col sticky>
  339. <CodeGroup
  340. title="Request"
  341. tag="POST"
  342. label="/datasets"
  343. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
  344. >
  345. ```bash {{ title: 'cURL' }}
  346. curl --location --request POST '${apiBaseUrl}/v1/datasets' \
  347. --header 'Authorization: Bearer {api_key}' \
  348. --header 'Content-Type: application/json' \
  349. --data-raw '{
  350. "name": "name",
  351. "permission": "only_me"
  352. }'
  353. ```
  354. </CodeGroup>
  355. <CodeGroup title="Response">
  356. ```json {{ title: 'Response' }}
  357. {
  358. "id": "",
  359. "name": "name",
  360. "description": null,
  361. "provider": "vendor",
  362. "permission": "only_me",
  363. "data_source_type": null,
  364. "indexing_technique": null,
  365. "app_count": 0,
  366. "document_count": 0,
  367. "word_count": 0,
  368. "created_by": "",
  369. "created_at": 1695636173,
  370. "updated_by": "",
  371. "updated_at": 1695636173,
  372. "embedding_model": null,
  373. "embedding_model_provider": null,
  374. "embedding_available": null
  375. }
  376. ```
  377. </CodeGroup>
  378. </Col>
  379. </Row>
  380. <hr className='ml-0 mr-0' />
  381. <Heading
  382. url='/datasets'
  383. method='GET'
  384. title='Get Knowledge Base List'
  385. name='#dataset_list'
  386. />
  387. <Row>
  388. <Col>
  389. ### Query
  390. <Properties>
  391. <Property name='keyword' type='string' key='keyword'>
  392. Search keyword, optional
  393. </Property>
  394. <Property name='tag_ids' type='array[string]' key='tag_ids'>
  395. Tag ID list, optional
  396. </Property>
  397. <Property name='page' type='string' key='page'>
  398. Page number, optional, default 1
  399. </Property>
  400. <Property name='limit' type='string' key='limit'>
  401. Number of items returned, optional, default 20, range 1-100
  402. </Property>
  403. <Property name='include_all' type='boolean' key='include_all'>
  404. Whether to include all datasets (only effective for owners), optional, defaults to false
  405. </Property>
  406. </Properties>
  407. </Col>
  408. <Col sticky>
  409. <CodeGroup
  410. title="Request"
  411. tag="GET"
  412. label="/datasets"
  413. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  414. >
  415. ```bash {{ title: 'cURL' }}
  416. curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
  417. --header 'Authorization: Bearer {api_key}'
  418. ```
  419. </CodeGroup>
  420. <CodeGroup title="Response">
  421. ```json {{ title: 'Response' }}
  422. {
  423. "data": [
  424. {
  425. "id": "",
  426. "name": "name",
  427. "description": "desc",
  428. "permission": "only_me",
  429. "data_source_type": "upload_file",
  430. "indexing_technique": "",
  431. "app_count": 2,
  432. "document_count": 10,
  433. "word_count": 1200,
  434. "created_by": "",
  435. "created_at": "",
  436. "updated_by": "",
  437. "updated_at": ""
  438. },
  439. ...
  440. ],
  441. "has_more": true,
  442. "limit": 20,
  443. "total": 50,
  444. "page": 1
  445. }
  446. ```
  447. </CodeGroup>
  448. </Col>
  449. </Row>
  450. <hr className='ml-0 mr-0' />
  451. <Heading
  452. url='/datasets/{dataset_id}'
  453. method='GET'
  454. title='Get knowledge base details by knowledge base ID'
  455. name='#view_dataset'
  456. />
  457. <Row>
  458. <Col>
  459. ### Path
  460. <Properties>
  461. <Property name='dataset_id' type='string' key='dataset_id'>
  462. Knowledge Base ID
  463. </Property>
  464. </Properties>
  465. </Col>
  466. <Col sticky>
  467. <CodeGroup
  468. title="Request"
  469. tag="GET"
  470. label="/datasets/{dataset_id}"
  471. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  472. >
  473. ```bash {{ title: 'cURL' }}
  474. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \
  475. --header 'Authorization: Bearer {api_key}'
  476. ```
  477. </CodeGroup>
  478. <CodeGroup title="Response">
  479. ```json {{ title: 'Response' }}
  480. {
  481. "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f",
  482. "name": "Test Knowledge Base",
  483. "description": "",
  484. "provider": "vendor",
  485. "permission": "only_me",
  486. "data_source_type": null,
  487. "indexing_technique": null,
  488. "app_count": 0,
  489. "document_count": 0,
  490. "word_count": 0,
  491. "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  492. "created_at": 1735620612,
  493. "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  494. "updated_at": 1735620612,
  495. "embedding_model": null,
  496. "embedding_model_provider": null,
  497. "embedding_available": true,
  498. "retrieval_model_dict": {
  499. "search_method": "semantic_search",
  500. "reranking_enable": false,
  501. "reranking_mode": null,
  502. "reranking_model": {
  503. "reranking_provider_name": "",
  504. "reranking_model_name": ""
  505. },
  506. "weights": null,
  507. "top_k": 2,
  508. "score_threshold_enabled": false,
  509. "score_threshold": null
  510. },
  511. "tags": [],
  512. "doc_form": null,
  513. "external_knowledge_info": {
  514. "external_knowledge_id": null,
  515. "external_knowledge_api_id": null,
  516. "external_knowledge_api_name": null,
  517. "external_knowledge_api_endpoint": null
  518. },
  519. "external_retrieval_model": {
  520. "top_k": 2,
  521. "score_threshold": 0.0,
  522. "score_threshold_enabled": null
  523. }
  524. }
  525. ```
  526. </CodeGroup>
  527. </Col>
  528. </Row>
  529. <hr className='ml-0 mr-0' />
  530. <Heading
  531. url='/datasets/{dataset_id}'
  532. method='POST'
  533. title='Update knowledge base'
  534. name='#update_dataset'
  535. />
  536. <Row>
  537. <Col>
  538. ### Path
  539. <Properties>
  540. <Property name='dataset_id' type='string' key='dataset_id'>
  541. Knowledge Base ID
  542. </Property>
  543. <Property name='indexing_technique' type='string' key='indexing_technique'>
  544. Index technique (optional)
  545. - <code>high_quality</code> High quality
  546. - <code>economy</code> Economy
  547. </Property>
  548. <Property name='permission' type='string' key='permission'>
  549. Permission
  550. - <code>only_me</code> Only me
  551. - <code>all_team_members</code> All team members
  552. - <code>partial_members</code> Partial members
  553. </Property>
  554. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  555. Specified embedding model provider, must be set up in the system first, corresponding to the provider field(Optional)
  556. </Property>
  557. <Property name='embedding_model' type='string' key='embedding_model'>
  558. Specified embedding model, corresponding to the model field(Optional)
  559. </Property>
  560. <Property name='retrieval_model' type='string' key='retrieval_model'>
  561. Specified retrieval model, corresponding to the model field(Optional)
  562. </Property>
  563. <Property name='partial_member_list' type='array' key='partial_member_list'>
  564. Partial member list(Optional)
  565. </Property>
  566. </Properties>
  567. </Col>
  568. <Col sticky>
  569. <CodeGroup
  570. title="Request"
  571. tag="POST"
  572. label="/datasets/{dataset_id}"
  573. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `}
  574. >
  575. ```bash {{ title: 'cURL' }}
  576. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \
  577. --header 'Authorization: Bearer {api_key}' \
  578. --header 'Content-Type: application/json' \
  579. --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\
  580. "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}'
  581. ```
  582. </CodeGroup>
  583. <CodeGroup title="Response">
  584. ```json {{ title: 'Response' }}
  585. {
  586. "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f",
  587. "name": "Test Knowledge Base",
  588. "description": "",
  589. "provider": "vendor",
  590. "permission": "only_me",
  591. "data_source_type": null,
  592. "indexing_technique": "high_quality",
  593. "app_count": 0,
  594. "document_count": 0,
  595. "word_count": 0,
  596. "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  597. "created_at": 1735620612,
  598. "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  599. "updated_at": 1735622679,
  600. "embedding_model": "embedding-3",
  601. "embedding_model_provider": "zhipuai",
  602. "embedding_available": null,
  603. "retrieval_model_dict": {
  604. "search_method": "semantic_search",
  605. "reranking_enable": false,
  606. "reranking_mode": null,
  607. "reranking_model": {
  608. "reranking_provider_name": "",
  609. "reranking_model_name": ""
  610. },
  611. "weights": null,
  612. "top_k": 2,
  613. "score_threshold_enabled": false,
  614. "score_threshold": null
  615. },
  616. "tags": [],
  617. "doc_form": null,
  618. "external_knowledge_info": {
  619. "external_knowledge_id": null,
  620. "external_knowledge_api_id": null,
  621. "external_knowledge_api_name": null,
  622. "external_knowledge_api_endpoint": null
  623. },
  624. "external_retrieval_model": {
  625. "top_k": 2,
  626. "score_threshold": 0.0,
  627. "score_threshold_enabled": null
  628. },
  629. "partial_member_list": []
  630. }
  631. ```
  632. </CodeGroup>
  633. </Col>
  634. </Row>
  635. <hr className='ml-0 mr-0' />
  636. <Heading
  637. url='/datasets/{dataset_id}'
  638. method='DELETE'
  639. title='Delete a Knowledge Base'
  640. name='#delete_dataset'
  641. />
  642. <Row>
  643. <Col>
  644. ### Path
  645. <Properties>
  646. <Property name='dataset_id' type='string' key='dataset_id'>
  647. Knowledge ID
  648. </Property>
  649. </Properties>
  650. </Col>
  651. <Col sticky>
  652. <CodeGroup
  653. title="Request"
  654. tag="DELETE"
  655. label="/datasets/{dataset_id}"
  656. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  657. >
  658. ```bash {{ title: 'cURL' }}
  659. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}' \
  660. --header 'Authorization: Bearer {api_key}'
  661. ```
  662. </CodeGroup>
  663. <CodeGroup title="Response">
  664. ```text {{ title: 'Response' }}
  665. 204 No Content
  666. ```
  667. </CodeGroup>
  668. </Col>
  669. </Row>
  670. <hr className='ml-0 mr-0' />
  671. <Heading
  672. url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
  673. method='POST'
  674. title='Update a Document with Text'
  675. name='#update-by-text'
  676. />
  677. <Row>
  678. <Col>
  679. This API is based on an existing knowledge and updates the document through text based on this knowledge.
  680. ### Path
  681. <Properties>
  682. <Property name='dataset_id' type='string' key='dataset_id'>
  683. Knowledge ID
  684. </Property>
  685. <Property name='document_id' type='string' key='document_id'>
  686. Document ID
  687. </Property>
  688. </Properties>
  689. ### Request Body
  690. <Properties>
  691. <Property name='name' type='string' key='name'>
  692. Document name (optional)
  693. </Property>
  694. <Property name='text' type='string' key='text'>
  695. Document content (optional)
  696. </Property>
  697. <Property name='process_rule' type='object' key='process_rule'>
  698. Processing rules
  699. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  700. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  701. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  702. - <code>id</code> (string) Unique identifier for the preprocessing rule
  703. - enumerate
  704. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  705. - <code>remove_urls_emails</code> Delete URL, email address
  706. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  707. - <code>segmentation</code> (object) Segmentation rules
  708. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  709. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  710. - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
  711. - <code>subchunk_segmentation</code> (object) Child chunk rules
  712. - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
  713. - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
  714. - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
  715. </Property>
  716. </Properties>
  717. </Col>
  718. <Col sticky>
  719. <CodeGroup
  720. title="Request"
  721. tag="POST"
  722. label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
  723. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
  724. >
  725. ```bash {{ title: 'cURL' }}
  726. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
  727. --header 'Authorization: Bearer {api_key}' \
  728. --header 'Content-Type: application/json' \
  729. --data-raw '{
  730. "name": "name",
  731. "text": "text"
  732. }'
  733. ```
  734. </CodeGroup>
  735. <CodeGroup title="Response">
  736. ```json {{ title: 'Response' }}
  737. {
  738. "document": {
  739. "id": "",
  740. "position": 1,
  741. "data_source_type": "upload_file",
  742. "data_source_info": {
  743. "upload_file_id": ""
  744. },
  745. "dataset_process_rule_id": "",
  746. "name": "name.txt",
  747. "created_from": "api",
  748. "created_by": "",
  749. "created_at": 1695308667,
  750. "tokens": 0,
  751. "indexing_status": "waiting",
  752. "error": null,
  753. "enabled": true,
  754. "disabled_at": null,
  755. "disabled_by": null,
  756. "archived": false,
  757. "display_status": "queuing",
  758. "word_count": 0,
  759. "hit_count": 0,
  760. "doc_form": "text_model"
  761. },
  762. "batch": ""
  763. }
  764. ```
  765. </CodeGroup>
  766. </Col>
  767. </Row>
  768. <hr className='ml-0 mr-0' />
  769. <Heading
  770. url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
  771. method='POST'
  772. title='Update a Document with a File'
  773. name='#update-by-file'
  774. />
  775. <Row>
  776. <Col>
  777. This API is based on an existing knowledge, and updates documents through files based on this knowledge
  778. ### Path
  779. <Properties>
  780. <Property name='dataset_id' type='string' key='dataset_id'>
  781. Knowledge ID
  782. </Property>
  783. <Property name='document_id' type='string' key='document_id'>
  784. Document ID
  785. </Property>
  786. </Properties>
  787. ### Request Body
  788. <Properties>
  789. <Property name='name' type='string' key='name'>
  790. Document name (optional)
  791. </Property>
  792. <Property name='file' type='multipart/form-data' key='file'>
  793. Files to be uploaded
  794. </Property>
  795. <Property name='process_rule' type='object' key='process_rule'>
  796. Processing rules
  797. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  798. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  799. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  800. - <code>id</code> (string) Unique identifier for the preprocessing rule
  801. - enumerate
  802. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  803. - <code>remove_urls_emails</code> Delete URL, email address
  804. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  805. - <code>segmentation</code> (object) Segmentation rules
  806. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  807. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  808. - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
  809. - <code>subchunk_segmentation</code> (object) Child chunk rules
  810. - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
  811. - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
  812. - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
  813. </Property>
  814. </Properties>
  815. </Col>
  816. <Col sticky>
  817. <CodeGroup
  818. title="Request"
  819. tag="POST"
  820. label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
  821. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  822. >
  823. ```bash {{ title: 'cURL' }}
  824. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
  825. --header 'Authorization: Bearer {api_key}' \
  826. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  827. --form 'file=@"/path/to/file"'
  828. ```
  829. </CodeGroup>
  830. <CodeGroup title="Response">
  831. ```json {{ title: 'Response' }}
  832. {
  833. "document": {
  834. "id": "",
  835. "position": 1,
  836. "data_source_type": "upload_file",
  837. "data_source_info": {
  838. "upload_file_id": ""
  839. },
  840. "dataset_process_rule_id": "",
  841. "name": "Dify.txt",
  842. "created_from": "api",
  843. "created_by": "",
  844. "created_at": 1695308667,
  845. "tokens": 0,
  846. "indexing_status": "waiting",
  847. "error": null,
  848. "enabled": true,
  849. "disabled_at": null,
  850. "disabled_by": null,
  851. "archived": false,
  852. "display_status": "queuing",
  853. "word_count": 0,
  854. "hit_count": 0,
  855. "doc_form": "text_model"
  856. },
  857. "batch": "20230921150427533684"
  858. }
  859. ```
  860. </CodeGroup>
  861. </Col>
  862. </Row>
  863. <hr className='ml-0 mr-0' />
  864. <Heading
  865. url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
  866. method='GET'
  867. title='Get Document Embedding Status (Progress)'
  868. name='#indexing_status'
  869. />
  870. <Row>
  871. <Col>
  872. ### Path
  873. <Properties>
  874. <Property name='dataset_id' type='string' key='dataset_id'>
  875. Knowledge ID
  876. </Property>
  877. <Property name='batch' type='string' key='batch'>
  878. Batch number of uploaded documents
  879. </Property>
  880. </Properties>
  881. </Col>
  882. <Col sticky>
  883. <CodeGroup
  884. title="Request"
  885. tag="GET"
  886. label="/datasets/{dataset_id}/documents/{batch}/indexing-status"
  887. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
  888. >
  889. ```bash {{ title: 'cURL' }}
  890. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
  891. --header 'Authorization: Bearer {api_key}' \
  892. ```
  893. </CodeGroup>
  894. <CodeGroup title="Response">
  895. ```json {{ title: 'Response' }}
  896. {
  897. "data":[{
  898. "id": "",
  899. "indexing_status": "indexing",
  900. "processing_started_at": 1681623462.0,
  901. "parsing_completed_at": 1681623462.0,
  902. "cleaning_completed_at": 1681623462.0,
  903. "splitting_completed_at": 1681623462.0,
  904. "completed_at": null,
  905. "paused_at": null,
  906. "error": null,
  907. "stopped_at": null,
  908. "completed_segments": 24,
  909. "total_segments": 100
  910. }]
  911. }
  912. ```
  913. </CodeGroup>
  914. </Col>
  915. </Row>
  916. <hr className='ml-0 mr-0' />
  917. <Heading
  918. url='/datasets/{dataset_id}/documents/{document_id}'
  919. method='DELETE'
  920. title='Delete a Document'
  921. name='#delete_document'
  922. />
  923. <Row>
  924. <Col>
  925. ### Path
  926. <Properties>
  927. <Property name='dataset_id' type='string' key='dataset_id'>
  928. Knowledge ID
  929. </Property>
  930. <Property name='document_id' type='string' key='document_id'>
  931. Document ID
  932. </Property>
  933. </Properties>
  934. </Col>
  935. <Col sticky>
  936. <CodeGroup
  937. title="Request"
  938. tag="DELETE"
  939. label="/datasets/{dataset_id}/documents/{document_id}"
  940. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  941. >
  942. ```bash {{ title: 'cURL' }}
  943. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
  944. --header 'Authorization: Bearer {api_key}' \
  945. ```
  946. </CodeGroup>
  947. <CodeGroup title="Response">
  948. ```json {{ title: 'Response' }}
  949. {
  950. "result": "success"
  951. }
  952. ```
  953. </CodeGroup>
  954. </Col>
  955. </Row>
  956. <hr className='ml-0 mr-0' />
  957. <Heading
  958. url='/datasets/{dataset_id}/documents'
  959. method='GET'
  960. title='Get the Document List of a Knowledge Base'
  961. name='#dataset_document_list'
  962. />
  963. <Row>
  964. <Col>
  965. ### Path
  966. <Properties>
  967. <Property name='dataset_id' type='string' key='dataset_id'>
  968. Knowledge ID
  969. </Property>
  970. </Properties>
  971. ### Query
  972. <Properties>
  973. <Property name='keyword' type='string' key='keyword'>
  974. Search keywords, currently only search document names (optional)
  975. </Property>
  976. <Property name='page' type='string' key='page'>
  977. Page number (optional)
  978. </Property>
  979. <Property name='limit' type='string' key='limit'>
  980. Number of items returned, default 20, range 1-100 (optional)
  981. </Property>
  982. </Properties>
  983. </Col>
  984. <Col sticky>
  985. <CodeGroup
  986. title="Request"
  987. tag="GET"
  988. label="/datasets/{dataset_id}/documents"
  989. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
  990. >
  991. ```bash {{ title: 'cURL' }}
  992. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
  993. --header 'Authorization: Bearer {api_key}' \
  994. ```
  995. </CodeGroup>
  996. <CodeGroup title="Response">
  997. ```json {{ title: 'Response' }}
  998. {
  999. "data": [
  1000. {
  1001. "id": "",
  1002. "position": 1,
  1003. "data_source_type": "file_upload",
  1004. "data_source_info": null,
  1005. "dataset_process_rule_id": null,
  1006. "name": "dify",
  1007. "created_from": "",
  1008. "created_by": "",
  1009. "created_at": 1681623639,
  1010. "tokens": 0,
  1011. "indexing_status": "waiting",
  1012. "error": null,
  1013. "enabled": true,
  1014. "disabled_at": null,
  1015. "disabled_by": null,
  1016. "archived": false
  1017. },
  1018. ],
  1019. "has_more": false,
  1020. "limit": 20,
  1021. "total": 9,
  1022. "page": 1
  1023. }
  1024. ```
  1025. </CodeGroup>
  1026. </Col>
  1027. </Row>
  1028. <hr className='ml-0 mr-0' />
  1029. <Heading
  1030. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  1031. method='POST'
  1032. title='Add Chunks to a Document'
  1033. name='#create_new_segment'
  1034. />
  1035. <Row>
  1036. <Col>
  1037. ### Path
  1038. <Properties>
  1039. <Property name='dataset_id' type='string' key='dataset_id'>
  1040. Knowledge ID
  1041. </Property>
  1042. <Property name='document_id' type='string' key='document_id'>
  1043. Document ID
  1044. </Property>
  1045. </Properties>
  1046. ### Request Body
  1047. <Properties>
  1048. <Property name='segments' type='object list' key='segments'>
  1049. - <code>content</code> (text) Text content / question content, required
  1050. - <code>answer</code> (text) Answer content, if the mode of the knowledge is Q&A mode, pass the value (optional)
  1051. - <code>keywords</code> (list) Keywords (optional)
  1052. </Property>
  1053. </Properties>
  1054. </Col>
  1055. <Col sticky>
  1056. <CodeGroup
  1057. title="Request"
  1058. tag="POST"
  1059. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  1060. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
  1061. >
  1062. ```bash {{ title: 'cURL' }}
  1063. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  1064. --header 'Authorization: Bearer {api_key}' \
  1065. --header 'Content-Type: application/json' \
  1066. --data-raw '{
  1067. "segments": [
  1068. {
  1069. "content": "1",
  1070. "answer": "1",
  1071. "keywords": ["a"]
  1072. }
  1073. ]
  1074. }'
  1075. ```
  1076. </CodeGroup>
  1077. <CodeGroup title="Response">
  1078. ```json {{ title: 'Response' }}
  1079. {
  1080. "data": [{
  1081. "id": "",
  1082. "position": 1,
  1083. "document_id": "",
  1084. "content": "1",
  1085. "answer": "1",
  1086. "word_count": 25,
  1087. "tokens": 0,
  1088. "keywords": [
  1089. "a"
  1090. ],
  1091. "index_node_id": "",
  1092. "index_node_hash": "",
  1093. "hit_count": 0,
  1094. "enabled": true,
  1095. "disabled_at": null,
  1096. "disabled_by": null,
  1097. "status": "completed",
  1098. "created_by": "",
  1099. "created_at": 1695312007,
  1100. "indexing_at": 1695312007,
  1101. "completed_at": 1695312007,
  1102. "error": null,
  1103. "stopped_at": null
  1104. }],
  1105. "doc_form": "text_model"
  1106. }
  1107. ```
  1108. </CodeGroup>
  1109. </Col>
  1110. </Row>
  1111. <hr className='ml-0 mr-0' />
  1112. <Heading
  1113. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  1114. method='GET'
  1115. title='Get Chunks from a Document'
  1116. name='#get_segment'
  1117. />
  1118. <Row>
  1119. <Col>
  1120. ### Path
  1121. <Properties>
  1122. <Property name='dataset_id' type='string' key='dataset_id'>
  1123. Knowledge ID
  1124. </Property>
  1125. <Property name='document_id' type='string' key='document_id'>
  1126. Document ID
  1127. </Property>
  1128. </Properties>
  1129. ### Query
  1130. <Properties>
  1131. <Property name='keyword' type='string' key='keyword'>
  1132. Keyword (optional)
  1133. </Property>
  1134. <Property name='status' type='string' key='status'>
  1135. Search status, completed
  1136. </Property>
  1137. <Property name='page' type='string' key='page'>
  1138. Page number (optional)
  1139. </Property>
  1140. <Property name='limit' type='string' key='limit'>
  1141. Number of items returned, default 20, range 1-100 (optional)
  1142. </Property>
  1143. </Properties>
  1144. </Col>
  1145. <Col sticky>
  1146. <CodeGroup
  1147. title="Request"
  1148. tag="GET"
  1149. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  1150. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1151. >
  1152. ```bash {{ title: 'cURL' }}
  1153. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  1154. --header 'Authorization: Bearer {api_key}' \
  1155. --header 'Content-Type: application/json'
  1156. ```
  1157. </CodeGroup>
  1158. <CodeGroup title="Response">
  1159. ```json {{ title: 'Response' }}
  1160. {
  1161. "data": [{
  1162. "id": "",
  1163. "position": 1,
  1164. "document_id": "",
  1165. "content": "1",
  1166. "answer": "1",
  1167. "word_count": 25,
  1168. "tokens": 0,
  1169. "keywords": [
  1170. "a"
  1171. ],
  1172. "index_node_id": "",
  1173. "index_node_hash": "",
  1174. "hit_count": 0,
  1175. "enabled": true,
  1176. "disabled_at": null,
  1177. "disabled_by": null,
  1178. "status": "completed",
  1179. "created_by": "",
  1180. "created_at": 1695312007,
  1181. "indexing_at": 1695312007,
  1182. "completed_at": 1695312007,
  1183. "error": null,
  1184. "stopped_at": null
  1185. }],
  1186. "doc_form": "text_model",
  1187. "has_more": false,
  1188. "limit": 20,
  1189. "total": 9,
  1190. "page": 1
  1191. }
  1192. ```
  1193. </CodeGroup>
  1194. </Col>
  1195. </Row>
  1196. <hr className='ml-0 mr-0' />
  1197. <Heading
  1198. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
  1199. method='DELETE'
  1200. title='Delete a Chunk in a Document'
  1201. name='#delete_segment'
  1202. />
  1203. <Row>
  1204. <Col>
  1205. ### Path
  1206. <Properties>
  1207. <Property name='dataset_id' type='string' key='dataset_id'>
  1208. Knowledge ID
  1209. </Property>
  1210. <Property name='document_id' type='string' key='document_id'>
  1211. Document ID
  1212. </Property>
  1213. <Property name='segment_id' type='string' key='segment_id'>
  1214. Document Segment ID
  1215. </Property>
  1216. </Properties>
  1217. </Col>
  1218. <Col sticky>
  1219. <CodeGroup
  1220. title="Request"
  1221. tag="DELETE"
  1222. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
  1223. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1224. >
  1225. ```bash {{ title: 'cURL' }}
  1226. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  1227. --header 'Authorization: Bearer {api_key}' \
  1228. --header 'Content-Type: application/json'
  1229. ```
  1230. </CodeGroup>
  1231. <CodeGroup title="Response">
  1232. ```json {{ title: 'Response' }}
  1233. {
  1234. "result": "success"
  1235. }
  1236. ```
  1237. </CodeGroup>
  1238. </Col>
  1239. </Row>
  1240. <hr className='ml-0 mr-0' />
  1241. <Heading
  1242. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
  1243. method='POST'
  1244. title='Update a Chunk in a Document'
  1245. name='#update_segment'
  1246. />
  1247. <Row>
  1248. <Col>
  1249. ### POST
  1250. <Properties>
  1251. <Property name='dataset_id' type='string' key='dataset_id'>
  1252. Knowledge ID
  1253. </Property>
  1254. <Property name='document_id' type='string' key='document_id'>
  1255. Document ID
  1256. </Property>
  1257. <Property name='segment_id' type='string' key='segment_id'>
  1258. Document Segment ID
  1259. </Property>
  1260. </Properties>
  1261. ### Request Body
  1262. <Properties>
  1263. <Property name='segment' type='object' key='segment'>
  1264. - <code>content</code> (text) Text content / question content, required
  1265. - <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
  1266. - <code>keywords</code> (list) Keyword (optional)
  1267. - <code>enabled</code> (bool) False / true (optional)
  1268. - <code>regenerate_child_chunks</code> (bool) Whether to regenerate child chunks (optional)
  1269. </Property>
  1270. </Properties>
  1271. </Col>
  1272. <Col sticky>
  1273. <CodeGroup
  1274. title="Request"
  1275. tag="POST"
  1276. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
  1277. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segment\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
  1278. >
  1279. ```bash {{ title: 'cURL' }}
  1280. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  1281. --header 'Content-Type: application/json' \
  1282. --data-raw '{
  1283. "segment": {
  1284. "content": "1",
  1285. "answer": "1",
  1286. "keywords": ["a"],
  1287. "enabled": false
  1288. }
  1289. }'
  1290. ```
  1291. </CodeGroup>
  1292. <CodeGroup title="Response">
  1293. ```json {{ title: 'Response' }}
  1294. {
  1295. "data": {
  1296. "id": "",
  1297. "position": 1,
  1298. "document_id": "",
  1299. "content": "1",
  1300. "answer": "1",
  1301. "word_count": 25,
  1302. "tokens": 0,
  1303. "keywords": [
  1304. "a"
  1305. ],
  1306. "index_node_id": "",
  1307. "index_node_hash": "",
  1308. "hit_count": 0,
  1309. "enabled": true,
  1310. "disabled_at": null,
  1311. "disabled_by": null,
  1312. "status": "completed",
  1313. "created_by": "",
  1314. "created_at": 1695312007,
  1315. "indexing_at": 1695312007,
  1316. "completed_at": 1695312007,
  1317. "error": null,
  1318. "stopped_at": null
  1319. },
  1320. "doc_form": "text_model"
  1321. }
  1322. ```
  1323. </CodeGroup>
  1324. </Col>
  1325. </Row>
  1326. <hr className='ml-0 mr-0' />
  1327. <Heading
  1328. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks'
  1329. method='POST'
  1330. title='Create Child Chunk'
  1331. name='#create_child_chunk'
  1332. />
  1333. <Row>
  1334. <Col>
  1335. ### Path
  1336. <Properties>
  1337. <Property name='dataset_id' type='string' key='dataset_id'>
  1338. Knowledge ID
  1339. </Property>
  1340. <Property name='document_id' type='string' key='document_id'>
  1341. Document ID
  1342. </Property>
  1343. <Property name='segment_id' type='string' key='segment_id'>
  1344. Segment ID
  1345. </Property>
  1346. </Properties>
  1347. ### Request Body
  1348. <Properties>
  1349. <Property name='content' type='string' key='content'>
  1350. Child chunk content
  1351. </Property>
  1352. </Properties>
  1353. </Col>
  1354. <Col sticky>
  1355. <CodeGroup
  1356. title="Request"
  1357. tag="POST"
  1358. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks"
  1359. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Child chunk content"}'`}
  1360. >
  1361. ```bash {{ title: 'cURL' }}
  1362. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \
  1363. --header 'Authorization: Bearer {api_key}' \
  1364. --header 'Content-Type: application/json' \
  1365. --data-raw '{
  1366. "content": "Child chunk content"
  1367. }'
  1368. ```
  1369. </CodeGroup>
  1370. <CodeGroup title="Response">
  1371. ```json {{ title: 'Response' }}
  1372. {
  1373. "data": {
  1374. "id": "",
  1375. "segment_id": "",
  1376. "content": "Child chunk content",
  1377. "word_count": 25,
  1378. "tokens": 0,
  1379. "index_node_id": "",
  1380. "index_node_hash": "",
  1381. "status": "completed",
  1382. "created_by": "",
  1383. "created_at": 1695312007,
  1384. "indexing_at": 1695312007,
  1385. "completed_at": 1695312007,
  1386. "error": null,
  1387. "stopped_at": null
  1388. }
  1389. }
  1390. ```
  1391. </CodeGroup>
  1392. </Col>
  1393. </Row>
  1394. <hr className='ml-0 mr-0' />
  1395. <Heading
  1396. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks'
  1397. method='GET'
  1398. title='Get Child Chunks'
  1399. name='#get_child_chunks'
  1400. />
  1401. <Row>
  1402. <Col>
  1403. ### Path
  1404. <Properties>
  1405. <Property name='dataset_id' type='string' key='dataset_id'>
  1406. Knowledge ID
  1407. </Property>
  1408. <Property name='document_id' type='string' key='document_id'>
  1409. Document ID
  1410. </Property>
  1411. <Property name='segment_id' type='string' key='segment_id'>
  1412. Segment ID
  1413. </Property>
  1414. </Properties>
  1415. ### Query
  1416. <Properties>
  1417. <Property name='keyword' type='string' key='keyword'>
  1418. Search keyword (optional)
  1419. </Property>
  1420. <Property name='page' type='integer' key='page'>
  1421. Page number (optional, default: 1)
  1422. </Property>
  1423. <Property name='limit' type='integer' key='limit'>
  1424. Items per page (optional, default: 20, max: 100)
  1425. </Property>
  1426. </Properties>
  1427. </Col>
  1428. <Col sticky>
  1429. <CodeGroup
  1430. title="Request"
  1431. tag="GET"
  1432. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks"
  1433. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  1434. >
  1435. ```bash {{ title: 'cURL' }}
  1436. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \
  1437. --header 'Authorization: Bearer {api_key}'
  1438. ```
  1439. </CodeGroup>
  1440. <CodeGroup title="Response">
  1441. ```json {{ title: 'Response' }}
  1442. {
  1443. "data": [{
  1444. "id": "",
  1445. "segment_id": "",
  1446. "content": "Child chunk content",
  1447. "word_count": 25,
  1448. "tokens": 0,
  1449. "index_node_id": "",
  1450. "index_node_hash": "",
  1451. "status": "completed",
  1452. "created_by": "",
  1453. "created_at": 1695312007,
  1454. "indexing_at": 1695312007,
  1455. "completed_at": 1695312007,
  1456. "error": null,
  1457. "stopped_at": null
  1458. }],
  1459. "total": 1,
  1460. "total_pages": 1,
  1461. "page": 1,
  1462. "limit": 20
  1463. }
  1464. ```
  1465. </CodeGroup>
  1466. </Col>
  1467. </Row>
  1468. <hr className='ml-0 mr-0' />
  1469. <Heading
  1470. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}'
  1471. method='DELETE'
  1472. title='Delete Child Chunk'
  1473. name='#delete_child_chunk'
  1474. />
  1475. <Row>
  1476. <Col>
  1477. ### Path
  1478. <Properties>
  1479. <Property name='dataset_id' type='string' key='dataset_id'>
  1480. Knowledge ID
  1481. </Property>
  1482. <Property name='document_id' type='string' key='document_id'>
  1483. Document ID
  1484. </Property>
  1485. <Property name='segment_id' type='string' key='segment_id'>
  1486. Segment ID
  1487. </Property>
  1488. <Property name='child_chunk_id' type='string' key='child_chunk_id'>
  1489. Child Chunk ID
  1490. </Property>
  1491. </Properties>
  1492. </Col>
  1493. <Col sticky>
  1494. <CodeGroup
  1495. title="Request"
  1496. tag="DELETE"
  1497. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}"
  1498. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1499. >
  1500. ```bash {{ title: 'cURL' }}
  1501. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \
  1502. --header 'Authorization: Bearer {api_key}'
  1503. ```
  1504. </CodeGroup>
  1505. <CodeGroup title="Response">
  1506. ```json {{ title: 'Response' }}
  1507. {
  1508. "result": "success"
  1509. }
  1510. ```
  1511. </CodeGroup>
  1512. </Col>
  1513. </Row>
  1514. <hr className='ml-0 mr-0' />
  1515. <Heading
  1516. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}'
  1517. method='PATCH'
  1518. title='Update Child Chunk'
  1519. name='#update_child_chunk'
  1520. />
  1521. <Row>
  1522. <Col>
  1523. ### Path
  1524. <Properties>
  1525. <Property name='dataset_id' type='string' key='dataset_id'>
  1526. Knowledge ID
  1527. </Property>
  1528. <Property name='document_id' type='string' key='document_id'>
  1529. Document ID
  1530. </Property>
  1531. <Property name='segment_id' type='string' key='segment_id'>
  1532. Segment ID
  1533. </Property>
  1534. <Property name='child_chunk_id' type='string' key='child_chunk_id'>
  1535. Child Chunk ID
  1536. </Property>
  1537. </Properties>
  1538. ### Request Body
  1539. <Properties>
  1540. <Property name='content' type='string' key='content'>
  1541. Child chunk content
  1542. </Property>
  1543. </Properties>
  1544. </Col>
  1545. <Col sticky>
  1546. <CodeGroup
  1547. title="Request"
  1548. tag="PATCH"
  1549. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}"
  1550. targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Updated child chunk content"}'`}
  1551. >
  1552. ```bash {{ title: 'cURL' }}
  1553. curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \
  1554. --header 'Authorization: Bearer {api_key}' \
  1555. --header 'Content-Type: application/json' \
  1556. --data-raw '{
  1557. "content": "Updated child chunk content"
  1558. }'
  1559. ```
  1560. </CodeGroup>
  1561. <CodeGroup title="Response">
  1562. ```json {{ title: 'Response' }}
  1563. {
  1564. "data": {
  1565. "id": "",
  1566. "segment_id": "",
  1567. "content": "Updated child chunk content",
  1568. "word_count": 25,
  1569. "tokens": 0,
  1570. "index_node_id": "",
  1571. "index_node_hash": "",
  1572. "status": "completed",
  1573. "created_by": "",
  1574. "created_at": 1695312007,
  1575. "indexing_at": 1695312007,
  1576. "completed_at": 1695312007,
  1577. "error": null,
  1578. "stopped_at": null
  1579. }
  1580. }
  1581. ```
  1582. </CodeGroup>
  1583. </Col>
  1584. </Row>
  1585. <hr className='ml-0 mr-0' />
  1586. <Heading
  1587. url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
  1588. method='GET'
  1589. title='Get Upload File'
  1590. name='#get_upload_file'
  1591. />
  1592. <Row>
  1593. <Col>
  1594. ### Path
  1595. <Properties>
  1596. <Property name='dataset_id' type='string' key='dataset_id'>
  1597. Knowledge ID
  1598. </Property>
  1599. <Property name='document_id' type='string' key='document_id'>
  1600. Document ID
  1601. </Property>
  1602. </Properties>
  1603. </Col>
  1604. <Col sticky>
  1605. <CodeGroup
  1606. title="Request"
  1607. tag="GET"
  1608. label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
  1609. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1610. >
  1611. ```bash {{ title: 'cURL' }}
  1612. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
  1613. --header 'Authorization: Bearer {api_key}' \
  1614. --header 'Content-Type: application/json'
  1615. ```
  1616. </CodeGroup>
  1617. <CodeGroup title="Response">
  1618. ```json {{ title: 'Response' }}
  1619. {
  1620. "id": "file_id",
  1621. "name": "file_name",
  1622. "size": 1024,
  1623. "extension": "txt",
  1624. "url": "preview_url",
  1625. "download_url": "download_url",
  1626. "mime_type": "text/plain",
  1627. "created_by": "user_id",
  1628. "created_at": 1728734540,
  1629. }
  1630. ```
  1631. </CodeGroup>
  1632. </Col>
  1633. </Row>
  1634. <hr className='ml-0 mr-0' />
  1635. <Heading
  1636. url='/datasets/{dataset_id}/retrieve'
  1637. method='POST'
  1638. title='Retrieve Chunks from a Knowledge Base'
  1639. name='#dataset_retrieval'
  1640. />
  1641. <Row>
  1642. <Col>
  1643. ### Path
  1644. <Properties>
  1645. <Property name='dataset_id' type='string' key='dataset_id'>
  1646. Knowledge ID
  1647. </Property>
  1648. </Properties>
  1649. ### Request Body
  1650. <Properties>
  1651. <Property name='query' type='string' key='query'>
  1652. Query keyword
  1653. </Property>
  1654. <Property name='retrieval_model' type='object' key='retrieval_model'>
  1655. Retrieval model (optional, if not filled, it will be recalled according to the default method)
  1656. - <code>search_method</code> (text) Search method: One of the following four keywords is required
  1657. - <code>keyword_search</code> Keyword search
  1658. - <code>semantic_search</code> Semantic search
  1659. - <code>full_text_search</code> Full-text search
  1660. - <code>hybrid_search</code> Hybrid search
  1661. - <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
  1662. - <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
  1663. - <code>reranking_provider_name</code> (string) Rerank model provider
  1664. - <code>reranking_model_name</code> (string) Rerank model name
  1665. - <code>weights</code> (float) Semantic search weight setting in hybrid search mode
  1666. - <code>top_k</code> (integer) Number of results to return (optional)
  1667. - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
  1668. - <code>score_threshold</code> (float) Score threshold
  1669. </Property>
  1670. <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
  1671. Unused field
  1672. </Property>
  1673. </Properties>
  1674. </Col>
  1675. <Col sticky>
  1676. <CodeGroup
  1677. title="Request"
  1678. tag="POST"
  1679. label="/datasets/{dataset_id}/retrieve"
  1680. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
  1681. "query": "test",
  1682. "retrieval_model": {
  1683. "search_method": "keyword_search",
  1684. "reranking_enable": false,
  1685. "reranking_mode": null,
  1686. "reranking_model": {
  1687. "reranking_provider_name": "",
  1688. "reranking_model_name": ""
  1689. },
  1690. "weights": null,
  1691. "top_k": 1,
  1692. "score_threshold_enabled": false,
  1693. "score_threshold": null
  1694. }
  1695. }'`}
  1696. >
  1697. ```bash {{ title: 'cURL' }}
  1698. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
  1699. --header 'Authorization: Bearer {api_key}' \
  1700. --header 'Content-Type: application/json' \
  1701. --data-raw '{
  1702. "query": "test",
  1703. "retrieval_model": {
  1704. "search_method": "keyword_search",
  1705. "reranking_enable": false,
  1706. "reranking_mode": null,
  1707. "reranking_model": {
  1708. "reranking_provider_name": "",
  1709. "reranking_model_name": ""
  1710. },
  1711. "weights": null,
  1712. "top_k": 2,
  1713. "score_threshold_enabled": false,
  1714. "score_threshold": null
  1715. }
  1716. }'
  1717. ```
  1718. </CodeGroup>
  1719. <CodeGroup title="Response">
  1720. ```json {{ title: 'Response' }}
  1721. {
  1722. "query": {
  1723. "content": "test"
  1724. },
  1725. "records": [
  1726. {
  1727. "segment": {
  1728. "id": "7fa6f24f-8679-48b3-bc9d-bdf28d73f218",
  1729. "position": 1,
  1730. "document_id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
  1731. "content": "Operation guide",
  1732. "answer": null,
  1733. "word_count": 847,
  1734. "tokens": 280,
  1735. "keywords": [
  1736. "install",
  1737. "java",
  1738. "base",
  1739. "scripts",
  1740. "jdk",
  1741. "manual",
  1742. "internal",
  1743. "opens",
  1744. "add",
  1745. "vmoptions"
  1746. ],
  1747. "index_node_id": "39dd8443-d960-45a8-bb46-7275ad7fbc8e",
  1748. "index_node_hash": "0189157697b3c6a418ccf8264a09699f25858975578f3467c76d6bfc94df1d73",
  1749. "hit_count": 0,
  1750. "enabled": true,
  1751. "disabled_at": null,
  1752. "disabled_by": null,
  1753. "status": "completed",
  1754. "created_by": "dbcb1ab5-90c8-41a7-8b78-73b235eb6f6f",
  1755. "created_at": 1728734540,
  1756. "indexing_at": 1728734552,
  1757. "completed_at": 1728734584,
  1758. "error": null,
  1759. "stopped_at": null,
  1760. "document": {
  1761. "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
  1762. "data_source_type": "upload_file",
  1763. "name": "readme.txt",
  1764. }
  1765. },
  1766. "score": 3.730463140527718e-05,
  1767. "tsne_position": null
  1768. }
  1769. ]
  1770. }
  1771. ```
  1772. </CodeGroup>
  1773. </Col>
  1774. </Row>
  1775. <hr className='ml-0 mr-0' />
  1776. <Heading
  1777. url='/datasets/{dataset_id}/metadata'
  1778. method='POST'
  1779. title='Create a Knowledge Metadata'
  1780. name='#create_metadata'
  1781. />
  1782. <Row>
  1783. <Col>
  1784. ### Path
  1785. <Properties>
  1786. <Property name='dataset_id' type='string' key='dataset_id'>
  1787. Knowledge ID
  1788. </Property>
  1789. </Properties>
  1790. ### Request Body
  1791. <Properties>
  1792. <Property name='segment' type='object' key='segment'>
  1793. - <code>type</code> (string) Metadata type, required
  1794. - <code>name</code> (string) Metadata name, required
  1795. </Property>
  1796. </Properties>
  1797. </Col>
  1798. <Col sticky>
  1799. <CodeGroup
  1800. title="Request"
  1801. tag="POST"
  1802. label="/datasets/{dataset_id}/metadata"
  1803. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"type": "string", "name": "test"}'`}
  1804. >
  1805. ```bash {{ title: 'cURL' }}
  1806. ```
  1807. </CodeGroup>
  1808. <CodeGroup title="Response">
  1809. ```json {{ title: 'Response' }}
  1810. {
  1811. "id": "abc",
  1812. "type": "string",
  1813. "name": "test",
  1814. }
  1815. ```
  1816. </CodeGroup>
  1817. </Col>
  1818. </Row>
  1819. <hr className='ml-0 mr-0' />
  1820. <Heading
  1821. url='/datasets/{dataset_id}/metadata/{metadata_id}'
  1822. method='PATCH'
  1823. title='Update a Knowledge Metadata'
  1824. name='#update_metadata'
  1825. />
  1826. <Row>
  1827. <Col>
  1828. ### Path
  1829. <Properties>
  1830. <Property name='dataset_id' type='string' key='dataset_id'>
  1831. Knowledge ID
  1832. </Property>
  1833. <Property name='metadata_id' type='string' key='metadata_id'>
  1834. Metadata ID
  1835. </Property>
  1836. </Properties>
  1837. ### Request Body
  1838. <Properties>
  1839. <Property name='segment' type='object' key='segment'>
  1840. - <code>name</code> (string) Metadata name, required
  1841. </Property>
  1842. </Properties>
  1843. </Col>
  1844. <Col sticky>
  1845. <CodeGroup
  1846. title="Request"
  1847. tag="PATCH"
  1848. label="/datasets/{dataset_id}/metadata/{metadata_id}"
  1849. targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"name": "test"}'`}
  1850. >
  1851. ```bash {{ title: 'cURL' }}
  1852. ```
  1853. </CodeGroup>
  1854. <CodeGroup title="Response">
  1855. ```json {{ title: 'Response' }}
  1856. {
  1857. "id": "abc",
  1858. "type": "string",
  1859. "name": "test",
  1860. }
  1861. ```
  1862. </CodeGroup>
  1863. </Col>
  1864. </Row>
  1865. <hr className='ml-0 mr-0' />
  1866. <Heading
  1867. url='/datasets/{dataset_id}/metadata/{metadata_id}'
  1868. method='DELETE'
  1869. title='Delete a Knowledge Metadata'
  1870. name='#delete_metadata'
  1871. />
  1872. <Row>
  1873. <Col>
  1874. ### Path
  1875. <Properties>
  1876. <Property name='dataset_id' type='string' key='dataset_id'>
  1877. Knowledge ID
  1878. </Property>
  1879. <Property name='metadata_id' type='string' key='metadata_id'>
  1880. Metadata ID
  1881. </Property>
  1882. </Properties>
  1883. </Col>
  1884. <Col sticky>
  1885. <CodeGroup
  1886. title="Request"
  1887. tag="DELETE"
  1888. label="/datasets/{dataset_id}/metadata/{metadata_id}"
  1889. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1890. >
  1891. ```bash {{ title: 'cURL' }}
  1892. ```
  1893. </CodeGroup>
  1894. </Col>
  1895. </Row>
  1896. <hr className='ml-0 mr-0' />
  1897. <Heading
  1898. url='/datasets/{dataset_id}/metadata/built-in/{action}'
  1899. method='POST'
  1900. title='Disable Or Enable Built-in Metadata'
  1901. name='#toggle_metadata'
  1902. />
  1903. <Row>
  1904. <Col>
  1905. ### Path
  1906. <Properties>
  1907. <Property name='dataset_id' type='string' key='dataset_id'>
  1908. Knowledge ID
  1909. </Property>
  1910. <Property name='action' type='string' key='action'>
  1911. disable/enable
  1912. </Property>
  1913. </Properties>
  1914. </Col>
  1915. <Col sticky>
  1916. <CodeGroup
  1917. title="Request"
  1918. tag="POST"
  1919. label="/datasets/{dataset_id}/metadata/built-in/{action}"
  1920. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/built-in/{action}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1921. >
  1922. ```bash {{ title: 'cURL' }}
  1923. ```
  1924. </CodeGroup>
  1925. </Col>
  1926. </Row>
  1927. <hr className='ml-0 mr-0' />
  1928. <Heading
  1929. url='/datasets/{dataset_id}/documents/metadata'
  1930. method='POST'
  1931. title='Update Documents Metadata'
  1932. name='#update_documents_metadata'
  1933. />
  1934. <Row>
  1935. <Col>
  1936. ### Path
  1937. <Properties>
  1938. <Property name='dataset_id' type='string' key='dataset_id'>
  1939. Knowledge ID
  1940. </Property>
  1941. </Properties>
  1942. ### Request Body
  1943. <Properties>
  1944. <Property name='operation_data' type='object list' key='segments'>
  1945. - <code>document_id</code> (string) Document ID
  1946. - <code>metadata_list</code> (list) Metadata list
  1947. - <code>id</code> (string) Metadata ID
  1948. - <code>value</code> (string) Metadata value
  1949. - <code>name</code> (string) Metadata name
  1950. </Property>
  1951. </Properties>
  1952. </Col>
  1953. <Col sticky>
  1954. <CodeGroup
  1955. title="Request"
  1956. tag="POST"
  1957. label="/datasets/{dataset_id}/documents/metadata"
  1958. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"operation_data": [{"document_id": "document_id", "metadata_list": [{"id": "id", "value": "value", "name": "name"}]}]}'`}
  1959. >
  1960. ```bash {{ title: 'cURL' }}
  1961. ```
  1962. </CodeGroup>
  1963. </Col>
  1964. </Row>
  1965. <hr className='ml-0 mr-0' />
  1966. <Heading
  1967. url='/datasets/{dataset_id}/metadata'
  1968. method='GET'
  1969. title='Get Knowledge Metadata List'
  1970. name='#dataset_metadata_list'
  1971. />
  1972. <Row>
  1973. <Col>
  1974. ### Params
  1975. <Properties>
  1976. <Property name='dataset_id' type='string' key='dataset_id'>
  1977. Knowledge ID
  1978. </Property>
  1979. </Properties>
  1980. </Col>
  1981. <Col sticky>
  1982. <CodeGroup
  1983. title="Request"
  1984. tag="GET"
  1985. label="/datasets/{dataset_id}/metadata"
  1986. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}'`}
  1987. >
  1988. ```bash {{ title: 'cURL' }}
  1989. ```
  1990. </CodeGroup>
  1991. <CodeGroup title="Response">
  1992. ```json {{ title: 'Response' }}
  1993. {
  1994. "doc_metadata": [
  1995. {
  1996. "id": "",
  1997. "name": "name",
  1998. "type": "string",
  1999. "use_count": 0,
  2000. },
  2001. ...
  2002. ],
  2003. "built_in_field_enabled": true
  2004. }
  2005. ```
  2006. </CodeGroup>
  2007. </Col>
  2008. </Row>
  2009. <hr className='ml-0 mr-0' />
  2010. <Heading
  2011. url='/workspaces/current/models/model-types/text-embedding'
  2012. method='GET'
  2013. title='Get available embedding models'
  2014. name='#model_type_list'
  2015. />
  2016. <Row>
  2017. <Col>
  2018. ### Query
  2019. <Properties>
  2020. </Properties>
  2021. </Col>
  2022. <Col sticky>
  2023. <CodeGroup
  2024. title="Request"
  2025. tag="GET"
  2026. label="/datasets/{dataset_id}"
  2027. targetCode={`curl --location --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' `}
  2028. >
  2029. ```bash {{ title: 'cURL' }}
  2030. curl --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \
  2031. --header 'Authorization: Bearer {api_key}' \
  2032. --header 'Content-Type: application/json' \
  2033. ```
  2034. </CodeGroup>
  2035. <CodeGroup title="Response">
  2036. ```json {{ title: 'Response' }}
  2037. {
  2038. "data": [
  2039. {
  2040. "provider": "zhipuai",
  2041. "label": {
  2042. "zh_Hans": "智谱 AI",
  2043. "en_US": "ZHIPU AI"
  2044. },
  2045. "icon_small": {
  2046. "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/zh_Hans",
  2047. "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/en_US"
  2048. },
  2049. "icon_large": {
  2050. "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/zh_Hans",
  2051. "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/en_US"
  2052. },
  2053. "status": "active",
  2054. "models": [
  2055. {
  2056. "model": "embedding-3",
  2057. "label": {
  2058. "zh_Hans": "embedding-3",
  2059. "en_US": "embedding-3"
  2060. },
  2061. "model_type": "text-embedding",
  2062. "features": null,
  2063. "fetch_from": "predefined-model",
  2064. "model_properties": {
  2065. "context_size": 8192
  2066. },
  2067. "deprecated": false,
  2068. "status": "active",
  2069. "load_balancing_enabled": false
  2070. },
  2071. {
  2072. "model": "embedding-2",
  2073. "label": {
  2074. "zh_Hans": "embedding-2",
  2075. "en_US": "embedding-2"
  2076. },
  2077. "model_type": "text-embedding",
  2078. "features": null,
  2079. "fetch_from": "predefined-model",
  2080. "model_properties": {
  2081. "context_size": 8192
  2082. },
  2083. "deprecated": false,
  2084. "status": "active",
  2085. "load_balancing_enabled": false
  2086. },
  2087. {
  2088. "model": "text_embedding",
  2089. "label": {
  2090. "zh_Hans": "text_embedding",
  2091. "en_US": "text_embedding"
  2092. },
  2093. "model_type": "text-embedding",
  2094. "features": null,
  2095. "fetch_from": "predefined-model",
  2096. "model_properties": {
  2097. "context_size": 512
  2098. },
  2099. "deprecated": false,
  2100. "status": "active",
  2101. "load_balancing_enabled": false
  2102. }
  2103. ]
  2104. }
  2105. ]
  2106. }
  2107. ```
  2108. </CodeGroup>
  2109. </Col>
  2110. </Row>
  2111. <hr className='ml-0 mr-0' />
  2112. <Row>
  2113. <Col>
  2114. ### Error message
  2115. <Properties>
  2116. <Property name='code' type='string' key='code'>
  2117. Error code
  2118. </Property>
  2119. </Properties>
  2120. <Properties>
  2121. <Property name='status' type='number' key='status'>
  2122. Error status
  2123. </Property>
  2124. </Properties>
  2125. <Properties>
  2126. <Property name='message' type='string' key='message'>
  2127. Error message
  2128. </Property>
  2129. </Properties>
  2130. </Col>
  2131. <Col>
  2132. <CodeGroup title="Example">
  2133. ```json {{ title: 'Response' }}
  2134. {
  2135. "code": "no_file_uploaded",
  2136. "message": "Please upload your file.",
  2137. "status": 400
  2138. }
  2139. ```
  2140. </CodeGroup>
  2141. </Col>
  2142. </Row>
  2143. <table className="max-w-auto border-collapse border border-slate-400" style={{ maxWidth: 'none', width: 'auto' }}>
  2144. <thead style={{ background: '#f9fafc' }}>
  2145. <tr>
  2146. <th className="p-2 border border-slate-300">code</th>
  2147. <th className="p-2 border border-slate-300">status</th>
  2148. <th className="p-2 border border-slate-300">message</th>
  2149. </tr>
  2150. </thead>
  2151. <tbody>
  2152. <tr>
  2153. <td className="p-2 border border-slate-300">no_file_uploaded</td>
  2154. <td className="p-2 border border-slate-300">400</td>
  2155. <td className="p-2 border border-slate-300">Please upload your file.</td>
  2156. </tr>
  2157. <tr>
  2158. <td className="p-2 border border-slate-300">too_many_files</td>
  2159. <td className="p-2 border border-slate-300">400</td>
  2160. <td className="p-2 border border-slate-300">Only one file is allowed.</td>
  2161. </tr>
  2162. <tr>
  2163. <td className="p-2 border border-slate-300">file_too_large</td>
  2164. <td className="p-2 border border-slate-300">413</td>
  2165. <td className="p-2 border border-slate-300">File size exceeded.</td>
  2166. </tr>
  2167. <tr>
  2168. <td className="p-2 border border-slate-300">unsupported_file_type</td>
  2169. <td className="p-2 border border-slate-300">415</td>
  2170. <td className="p-2 border border-slate-300">File type not allowed.</td>
  2171. </tr>
  2172. <tr>
  2173. <td className="p-2 border border-slate-300">high_quality_dataset_only</td>
  2174. <td className="p-2 border border-slate-300">400</td>
  2175. <td className="p-2 border border-slate-300">Current operation only supports 'high-quality' datasets.</td>
  2176. </tr>
  2177. <tr>
  2178. <td className="p-2 border border-slate-300">dataset_not_initialized</td>
  2179. <td className="p-2 border border-slate-300">400</td>
  2180. <td className="p-2 border border-slate-300">The dataset is still being initialized or indexing. Please wait a moment.</td>
  2181. </tr>
  2182. <tr>
  2183. <td className="p-2 border border-slate-300">archived_document_immutable</td>
  2184. <td className="p-2 border border-slate-300">403</td>
  2185. <td className="p-2 border border-slate-300">The archived document is not editable.</td>
  2186. </tr>
  2187. <tr>
  2188. <td className="p-2 border border-slate-300">dataset_name_duplicate</td>
  2189. <td className="p-2 border border-slate-300">409</td>
  2190. <td className="p-2 border border-slate-300">The dataset name already exists. Please modify your dataset name.</td>
  2191. </tr>
  2192. <tr>
  2193. <td className="p-2 border border-slate-300">invalid_action</td>
  2194. <td className="p-2 border border-slate-300">400</td>
  2195. <td className="p-2 border border-slate-300">Invalid action.</td>
  2196. </tr>
  2197. <tr>
  2198. <td className="p-2 border border-slate-300">document_already_finished</td>
  2199. <td className="p-2 border border-slate-300">400</td>
  2200. <td className="p-2 border border-slate-300">The document has been processed. Please refresh the page or go to the document details.</td>
  2201. </tr>
  2202. <tr>
  2203. <td className="p-2 border border-slate-300">document_indexing</td>
  2204. <td className="p-2 border border-slate-300">400</td>
  2205. <td className="p-2 border border-slate-300">The document is being processed and cannot be edited.</td>
  2206. </tr>
  2207. <tr>
  2208. <td className="p-2 border border-slate-300">invalid_metadata</td>
  2209. <td className="p-2 border border-slate-300">400</td>
  2210. <td className="p-2 border border-slate-300">The metadata content is incorrect. Please check and verify.</td>
  2211. </tr>
  2212. </tbody>
  2213. </table>
  2214. <div className="pb-4" />