You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344
  1. {/**
  2. * @typedef Props
  3. * @property {string} apiBaseUrl
  4. */}
  5. import { CodeGroup } from '@/app/components/develop/code.tsx'
  6. import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'
  7. # 知识库 API
  8. <div>
  9. ### 鉴权
  10. Dify Service API 使用 `API-Key` 进行鉴权。
  11. 建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。
  12. 所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示:
  13. <CodeGroup title="Code">
  14. ```javascript
  15. Authorization: Bearer {API_KEY}
  16. ```
  17. </CodeGroup>
  18. </div>
  19. <hr className='ml-0 mr-0' />
  20. <Heading
  21. url='/datasets/{dataset_id}/document/create-by-text'
  22. method='POST'
  23. title='通过文本创建文档'
  24. name='#create-by-text'
  25. />
  26. <Row>
  27. <Col>
  28. 此接口基于已存在知识库,在此知识库的基础上通过文本创建新的文档
  29. ### Path
  30. <Properties>
  31. <Property name='dataset_id' type='string' key='dataset_id'>
  32. 知识库 ID
  33. </Property>
  34. </Properties>
  35. ### Request Body
  36. <Properties>
  37. <Property name='name' type='string' key='name'>
  38. 文档名称
  39. </Property>
  40. <Property name='text' type='string' key='text'>
  41. 文档内容
  42. </Property>
  43. <Property name='indexing_technique' type='string' key='indexing_technique'>
  44. 索引方式
  45. - <code>high_quality</code> 高质量:使用
  46. ding 模型进行嵌入,构建为向量数据库索引
  47. - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
  48. </Property>
  49. <Property name='doc_form' type='string' key='doc_form'>
  50. 索引内容的形式
  51. - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
  52. - <code>hierarchical_model</code> parent-child 模式
  53. - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
  54. </Property>
  55. <Property name='doc_language' type='string' key='doc_language'>
  56. 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
  57. </Property>
  58. <Property name='process_rule' type='object' key='process_rule'>
  59. 处理规则
  60. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  61. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  62. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  63. - <code>id</code> (string) 预处理规则的唯一标识符
  64. - 枚举:
  65. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  66. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  67. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  68. - <code>segmentation</code> (object) 分段规则
  69. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 <code>\n</code>
  70. - <code>max_tokens</code> 最大长度(token)默认为 1000
  71. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  72. - <code>subchunk_segmentation</code> (object) 子分段规则
  73. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  74. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  75. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  76. </Property>
  77. <PropertyInstruction>当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项:</PropertyInstruction>
  78. <Property name='retrieval_model' type='object' key='retrieval_model'>
  79. 检索模式
  80. - <code>search_method</code> (string) 检索方法
  81. - <code>hybrid_search</code> 混合检索
  82. - <code>semantic_search</code> 语义检索
  83. - <code>full_text_search</code> 全文检索
  84. - <code>reranking_enable</code> (bool) 是否开启rerank
  85. - <code>reranking_model</code> (object) Rerank 模型配置
  86. - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
  87. - <code>reranking_model_name</code> (string) Rerank 模型的名称
  88. - <code>top_k</code> (int) 召回条数
  89. - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
  90. - <code>score_threshold</code> (float) 召回分数限制
  91. </Property>
  92. <Property name='embedding_model' type='string' key='embedding_model'>
  93. Embedding 模型名称
  94. </Property>
  95. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  96. Embedding 模型供应商
  97. </Property>
  98. </Properties>
  99. </Col>
  100. <Col sticky>
  101. <CodeGroup
  102. title="Request"
  103. tag="POST"
  104. label="/datasets/{dataset_id}/document/create-by-text"
  105. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
  106. >
  107. ```bash {{ title: 'cURL' }}
  108. curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
  109. --header 'Authorization: Bearer {api_key}' \
  110. --header 'Content-Type: application/json' \
  111. --data-raw '{
  112. "name": "text",
  113. "text": "text",
  114. "indexing_technique": "high_quality",
  115. "process_rule": {
  116. "mode": "automatic"
  117. }
  118. }'
  119. ```
  120. </CodeGroup>
  121. <CodeGroup title="Response">
  122. ```json {{ title: 'Response' }}
  123. {
  124. "document": {
  125. "id": "",
  126. "position": 1,
  127. "data_source_type": "upload_file",
  128. "data_source_info": {
  129. "upload_file_id": ""
  130. },
  131. "dataset_process_rule_id": "",
  132. "name": "text.txt",
  133. "created_from": "api",
  134. "created_by": "",
  135. "created_at": 1695690280,
  136. "tokens": 0,
  137. "indexing_status": "waiting",
  138. "error": null,
  139. "enabled": true,
  140. "disabled_at": null,
  141. "disabled_by": null,
  142. "archived": false,
  143. "display_status": "queuing",
  144. "word_count": 0,
  145. "hit_count": 0,
  146. "doc_form": "text_model"
  147. },
  148. "batch": ""
  149. }
  150. ```
  151. </CodeGroup>
  152. </Col>
  153. </Row>
  154. <hr className='ml-0 mr-0' />
  155. <Heading
  156. url='/datasets/{dataset_id}/document/create-by-file'
  157. method='POST'
  158. title='通过文件创建文档 '
  159. name='#create-by-file'
  160. />
  161. <Row>
  162. <Col>
  163. 此接口基于已存在知识库,在此知识库的基础上通过文件创建新的文档
  164. ### Path
  165. <Properties>
  166. <Property name='dataset_id' type='string' key='dataset_id'>
  167. 知识库 ID
  168. </Property>
  169. </Properties>
  170. ### Request Body
  171. <Properties>
  172. <Property name='data' type='multipart/form-data json string' key='data'>
  173. - <code>original_document_id</code> 源文档 ID(选填)
  174. - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
  175. - 源文档不可为归档的文档
  176. - 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
  177. - 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
  178. - <code>indexing_technique</code> 索引方式
  179. - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
  180. - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
  181. - <code>doc_form</code> 索引内容的形式
  182. - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
  183. - <code>hierarchical_model</code> parent-child 模式
  184. - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
  185. - <code>doc_language</code> 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
  186. - <code>process_rule</code> 处理规则
  187. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  188. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  189. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  190. - <code>id</code> (string) 预处理规则的唯一标识符
  191. - 枚举:
  192. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  193. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  194. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  195. - <code>segmentation</code> (object) 分段规则
  196. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  197. - <code>max_tokens</code> 最大长度(token)默认为 1000
  198. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  199. - <code>subchunk_segmentation</code> (object) 子分段规则
  200. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  201. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  202. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  203. </Property>
  204. <Property name='file' type='multipart/form-data' key='file'>
  205. 需要上传的文件。
  206. </Property>
  207. <PropertyInstruction>当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项:</PropertyInstruction>
  208. <Property name='retrieval_model' type='object' key='retrieval_model'>
  209. 检索模式
  210. - <code>search_method</code> (string) 检索方法
  211. - <code>hybrid_search</code> 混合检索
  212. - <code>semantic_search</code> 语义检索
  213. - <code>full_text_search</code> 全文检索
  214. - <code>reranking_enable</code> (bool) 是否开启rerank
  215. - <code>reranking_model</code> (object) Rerank 模型配置
  216. - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
  217. - <code>reranking_model_name</code> (string) Rerank 模型的名称
  218. - <code>top_k</code> (int) 召回条数
  219. - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
  220. - <code>score_threshold</code> (float) 召回分数限制
  221. </Property>
  222. <Property name='embedding_model' type='string' key='embedding_model'>
  223. Embedding 模型名称
  224. </Property>
  225. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  226. Embedding 模型供应商
  227. </Property>
  228. </Properties>
  229. </Col>
  230. <Col sticky>
  231. <CodeGroup
  232. title="Request"
  233. tag="POST"
  234. label="/datasets/{dataset_id}/document/create-by-file"
  235. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  236. >
  237. ```bash {{ title: 'cURL' }}
  238. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
  239. --header 'Authorization: Bearer {api_key}' \
  240. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  241. --form 'file=@"/path/to/file"'
  242. ```
  243. </CodeGroup>
  244. <CodeGroup title="Response">
  245. ```json {{ title: 'Response' }}
  246. {
  247. "document": {
  248. "id": "",
  249. "position": 1,
  250. "data_source_type": "upload_file",
  251. "data_source_info": {
  252. "upload_file_id": ""
  253. },
  254. "dataset_process_rule_id": "",
  255. "name": "Dify.txt",
  256. "created_from": "api",
  257. "created_by": "",
  258. "created_at": 1695308667,
  259. "tokens": 0,
  260. "indexing_status": "waiting",
  261. "error": null,
  262. "enabled": true,
  263. "disabled_at": null,
  264. "disabled_by": null,
  265. "archived": false,
  266. "display_status": "queuing",
  267. "word_count": 0,
  268. "hit_count": 0,
  269. "doc_form": "text_model"
  270. },
  271. "batch": ""
  272. }
  273. ```
  274. </CodeGroup>
  275. </Col>
  276. </Row>
  277. <hr className='ml-0 mr-0' />
  278. <Heading
  279. url='/datasets'
  280. method='POST'
  281. title='创建空知识库'
  282. name='#create_empty_dataset'
  283. />
  284. <Row>
  285. <Col>
  286. ### Request Body
  287. <Properties>
  288. <Property name='name' type='string' key='name'>
  289. 知识库名称(必填)
  290. </Property>
  291. <Property name='description' type='string' key='description'>
  292. 知识库描述(选填)
  293. </Property>
  294. <Property name='indexing_technique' type='string' key='indexing_technique'>
  295. 索引模式(选填,建议填写)
  296. - <code>high_quality</code> 高质量
  297. - <code>economy</code> 经济
  298. </Property>
  299. <Property name='permission' type='string' key='permission'>
  300. 权限(选填,默认 only_me)
  301. - <code>only_me</code> 仅自己
  302. - <code>all_team_members</code> 所有团队成员
  303. - <code>partial_members</code> 部分团队成员
  304. </Property>
  305. <Property name='provider' type='string' key='provider'>
  306. Provider(选填,默认 vendor)
  307. - <code>vendor</code> 上传文件
  308. - <code>external</code> 外部知识库
  309. </Property>
  310. <Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
  311. 外部知识库 API_ID(选填)
  312. </Property>
  313. <Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
  314. 外部知识库 ID(选填)
  315. </Property>
  316. <Property name='embedding_model' type='str' key='embedding_model'>
  317. Embedding 模型名称
  318. </Property>
  319. <Property name='embedding_provider_name' type='str' key='embedding_provider_name'>
  320. Embedding 模型供应商
  321. </Property>
  322. <Property name='retrieval_model' type='object' key='retrieval_model'>
  323. 检索模式
  324. - <code>search_method</code> (string) 检索方法
  325. - <code>hybrid_search</code> 混合检索
  326. - <code>semantic_search</code> 语义检索
  327. - <code>full_text_search</code> 全文检索
  328. - <code>reranking_enable</code> (bool) 是否开启rerank
  329. - <code>reranking_model</code> (object) Rerank 模型配置
  330. - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
  331. - <code>reranking_model_name</code> (string) Rerank 模型的名称
  332. - <code>top_k</code> (int) 召回条数
  333. - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
  334. - <code>score_threshold</code> (float) 召回分数限制
  335. </Property>
  336. </Properties>
  337. </Col>
  338. <Col sticky>
  339. <CodeGroup
  340. title="Request"
  341. tag="POST"
  342. label="/datasets"
  343. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
  344. >
  345. ```bash {{ title: 'cURL' }}
  346. curl --location --request POST '${props.apiBaseUrl}/datasets' \
  347. --header 'Authorization: Bearer {api_key}' \
  348. --header 'Content-Type: application/json' \
  349. --data-raw '{
  350. "name": "name",
  351. "permission": "only_me"
  352. }'
  353. ```
  354. </CodeGroup>
  355. <CodeGroup title="Response">
  356. ```json {{ title: 'Response' }}
  357. {
  358. "id": "",
  359. "name": "name",
  360. "description": null,
  361. "provider": "vendor",
  362. "permission": "only_me",
  363. "data_source_type": null,
  364. "indexing_technique": null,
  365. "app_count": 0,
  366. "document_count": 0,
  367. "word_count": 0,
  368. "created_by": "",
  369. "created_at": 1695636173,
  370. "updated_by": "",
  371. "updated_at": 1695636173,
  372. "embedding_model": null,
  373. "embedding_model_provider": null,
  374. "embedding_available": null
  375. }
  376. ```
  377. </CodeGroup>
  378. </Col>
  379. </Row>
  380. <hr className='ml-0 mr-0' />
  381. <Heading
  382. url='/datasets'
  383. method='GET'
  384. title='知识库列表'
  385. name='#dataset_list'
  386. />
  387. <Row>
  388. <Col>
  389. ### Query
  390. <Properties>
  391. <Property name='keyword' type='string' key='keyword'>
  392. 搜索关键词,可选
  393. </Property>
  394. <Property name='tag_ids' type='array[string]' key='tag_ids'>
  395. 标签 ID 列表,可选
  396. </Property>
  397. <Property name='page' type='integer' key='page'>
  398. 页码,可选,默认为 1
  399. </Property>
  400. <Property name='limit' type='string' key='limit'>
  401. 返回条数,可选,默认 20,范围 1-100
  402. </Property>
  403. <Property name='include_all' type='boolean' key='include_all'>
  404. 是否包含所有数据集(仅对所有者生效),可选,默认为 false
  405. </Property>
  406. </Properties>
  407. </Col>
  408. <Col sticky>
  409. <CodeGroup
  410. title="Request"
  411. tag="GET"
  412. label="/datasets"
  413. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  414. >
  415. ```bash {{ title: 'cURL' }}
  416. curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
  417. --header 'Authorization: Bearer {api_key}'
  418. ```
  419. </CodeGroup>
  420. <CodeGroup title="Response">
  421. ```json {{ title: 'Response' }}
  422. {
  423. "data": [
  424. {
  425. "id": "",
  426. "name": "知识库名称",
  427. "description": "描述信息",
  428. "permission": "only_me",
  429. "data_source_type": "upload_file",
  430. "indexing_technique": "",
  431. "app_count": 2,
  432. "document_count": 10,
  433. "word_count": 1200,
  434. "created_by": "",
  435. "created_at": "",
  436. "updated_by": "",
  437. "updated_at": ""
  438. },
  439. ...
  440. ],
  441. "has_more": true,
  442. "limit": 20,
  443. "total": 50,
  444. "page": 1
  445. }
  446. ```
  447. </CodeGroup>
  448. </Col>
  449. </Row>
  450. <hr className='ml-0 mr-0' />
  451. <Heading
  452. url='/datasets/{dataset_id}'
  453. method='GET'
  454. title='查看知识库详情'
  455. name='#view_dataset'
  456. />
  457. <Row>
  458. <Col>
  459. ### Path
  460. <Properties>
  461. <Property name='dataset_id' type='string' key='dataset_id'>
  462. 知识库 ID
  463. </Property>
  464. </Properties>
  465. </Col>
  466. <Col sticky>
  467. <CodeGroup
  468. title="Request"
  469. tag="GET"
  470. label="/datasets/{dataset_id}"
  471. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  472. >
  473. ```bash {{ title: 'cURL' }}
  474. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \
  475. --header 'Authorization: Bearer {api_key}'
  476. ```
  477. </CodeGroup>
  478. <CodeGroup title="Response">
  479. ```json {{ title: 'Response' }}
  480. {
  481. "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f",
  482. "name": "Test Knowledge Base",
  483. "description": "",
  484. "provider": "vendor",
  485. "permission": "only_me",
  486. "data_source_type": null,
  487. "indexing_technique": null,
  488. "app_count": 0,
  489. "document_count": 0,
  490. "word_count": 0,
  491. "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  492. "created_at": 1735620612,
  493. "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  494. "updated_at": 1735620612,
  495. "embedding_model": null,
  496. "embedding_model_provider": null,
  497. "embedding_available": true,
  498. "retrieval_model_dict": {
  499. "search_method": "semantic_search",
  500. "reranking_enable": false,
  501. "reranking_mode": null,
  502. "reranking_model": {
  503. "reranking_provider_name": "",
  504. "reranking_model_name": ""
  505. },
  506. "weights": null,
  507. "top_k": 2,
  508. "score_threshold_enabled": false,
  509. "score_threshold": null
  510. },
  511. "tags": [],
  512. "doc_form": null,
  513. "external_knowledge_info": {
  514. "external_knowledge_id": null,
  515. "external_knowledge_api_id": null,
  516. "external_knowledge_api_name": null,
  517. "external_knowledge_api_endpoint": null
  518. },
  519. "external_retrieval_model": {
  520. "top_k": 2,
  521. "score_threshold": 0.0,
  522. "score_threshold_enabled": null
  523. }
  524. }
  525. ```
  526. </CodeGroup>
  527. </Col>
  528. </Row>
  529. <hr className='ml-0 mr-0' />
  530. <Heading
  531. url='/datasets/{dataset_id}'
  532. method='POST'
  533. title='修改知识库详情'
  534. name='#update_dataset'
  535. />
  536. <Row>
  537. <Col>
  538. ### Path
  539. <Properties>
  540. <Property name='dataset_id' type='string' key='dataset_id'>
  541. 知识库 ID
  542. </Property>
  543. </Properties>
  544. ### Request Body
  545. <Properties>
  546. <Property name='indexing_technique' type='string' key='indexing_technique'>
  547. 索引模式(选填,建议填写)
  548. - <code>high_quality</code> 高质量
  549. - <code>economy</code> 经济
  550. </Property>
  551. <Property name='permission' type='string' key='permission'>
  552. 权限(选填,默认 only_me)
  553. - <code>only_me</code> 仅自己
  554. - <code>all_team_members</code> 所有团队成员
  555. - <code>partial_members</code> 部分团队成员
  556. </Property>
  557. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  558. 嵌入模型提供商(选填), 必须先在系统内设定好接入的模型,对应的是provider字段
  559. </Property>
  560. <Property name='embedding_model' type='string' key='embedding_model'>
  561. 嵌入模型(选填)
  562. </Property>
  563. <Property name='retrieval_model' type='string' key='retrieval_model'>
  564. 检索模型(选填)
  565. </Property>
  566. <Property name='partial_member_list' type='array' key='partial_member_list'>
  567. 部分团队成员 ID 列表(选填)
  568. </Property>
  569. </Properties>
  570. </Col>
  571. <Col sticky>
  572. <CodeGroup
  573. title="Request"
  574. tag="POST"
  575. label="/datasets/{dataset_id}"
  576. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `}
  577. >
  578. ```bash {{ title: 'cURL' }}
  579. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \
  580. --header 'Authorization: Bearer {api_key}' \
  581. --header 'Content-Type: application/json' \
  582. --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\
  583. "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}'
  584. ```
  585. </CodeGroup>
  586. <CodeGroup title="Response">
  587. ```json {{ title: 'Response' }}
  588. {
  589. "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f",
  590. "name": "Test Knowledge Base",
  591. "description": "",
  592. "provider": "vendor",
  593. "permission": "only_me",
  594. "data_source_type": null,
  595. "indexing_technique": "high_quality",
  596. "app_count": 0,
  597. "document_count": 0,
  598. "word_count": 0,
  599. "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  600. "created_at": 1735620612,
  601. "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  602. "updated_at": 1735622679,
  603. "embedding_model": "embedding-3",
  604. "embedding_model_provider": "zhipuai",
  605. "embedding_available": null,
  606. "retrieval_model_dict": {
  607. "search_method": "semantic_search",
  608. "reranking_enable": false,
  609. "reranking_mode": null,
  610. "reranking_model": {
  611. "reranking_provider_name": "",
  612. "reranking_model_name": ""
  613. },
  614. "weights": null,
  615. "top_k": 2,
  616. "score_threshold_enabled": false,
  617. "score_threshold": null
  618. },
  619. "tags": [],
  620. "doc_form": null,
  621. "external_knowledge_info": {
  622. "external_knowledge_id": null,
  623. "external_knowledge_api_id": null,
  624. "external_knowledge_api_name": null,
  625. "external_knowledge_api_endpoint": null
  626. },
  627. "external_retrieval_model": {
  628. "top_k": 2,
  629. "score_threshold": 0.0,
  630. "score_threshold_enabled": null
  631. },
  632. "partial_member_list": []
  633. }
  634. ```
  635. </CodeGroup>
  636. </Col>
  637. </Row>
  638. <hr className='ml-0 mr-0' />
  639. <Heading
  640. url='/datasets/{dataset_id}'
  641. method='DELETE'
  642. title='删除知识库'
  643. name='#delete_dataset'
  644. />
  645. <Row>
  646. <Col>
  647. ### Path
  648. <Properties>
  649. <Property name='dataset_id' type='string' key='dataset_id'>
  650. 知识库 ID
  651. </Property>
  652. </Properties>
  653. </Col>
  654. <Col sticky>
  655. <CodeGroup
  656. title="Request"
  657. tag="DELETE"
  658. label="/datasets/{dataset_id}"
  659. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  660. >
  661. ```bash {{ title: 'cURL' }}
  662. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}' \
  663. --header 'Authorization: Bearer {api_key}'
  664. ```
  665. </CodeGroup>
  666. <CodeGroup title="Response">
  667. ```text {{ title: 'Response' }}
  668. 204 No Content
  669. ```
  670. </CodeGroup>
  671. </Col>
  672. </Row>
  673. <hr className='ml-0 mr-0' />
  674. <Heading
  675. url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
  676. method='POST'
  677. title='通过文本更新文档'
  678. name='#update-by-text'
  679. />
  680. <Row>
  681. <Col>
  682. 此接口基于已存在知识库,在此知识库的基础上通过文本更新文档
  683. ### Path
  684. <Properties>
  685. <Property name='dataset_id' type='string' key='dataset_id'>
  686. 知识库 ID
  687. </Property>
  688. <Property name='document_id' type='string' key='document_id'>
  689. 文档 ID
  690. </Property>
  691. </Properties>
  692. ### Request Body
  693. <Properties>
  694. <Property name='name' type='string' key='name'>
  695. 文档名称(选填)
  696. </Property>
  697. <Property name='text' type='string' key='text'>
  698. 文档内容(选填)
  699. </Property>
  700. <Property name='process_rule' type='object' key='process_rule'>
  701. 处理规则(选填)
  702. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  703. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  704. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  705. - <code>id</code> (string) 预处理规则的唯一标识符
  706. - 枚举:
  707. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  708. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  709. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  710. - <code>segmentation</code> (object) 分段规则
  711. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  712. - <code>max_tokens</code> 最大长度(token)默认为 1000
  713. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  714. - <code>subchunk_segmentation</code> (object) 子分段规则
  715. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  716. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  717. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  718. </Property>
  719. </Properties>
  720. </Col>
  721. <Col sticky>
  722. <CodeGroup
  723. title="Request"
  724. tag="POST"
  725. label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
  726. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
  727. >
  728. ```bash {{ title: 'cURL' }}
  729. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
  730. --header 'Authorization: Bearer {api_key}' \
  731. --header 'Content-Type: application/json' \
  732. --data-raw '{
  733. "name": "name",
  734. "text": "text"
  735. }'
  736. ```
  737. </CodeGroup>
  738. <CodeGroup title="Response">
  739. ```json {{ title: 'Response' }}
  740. {
  741. "document": {
  742. "id": "",
  743. "position": 1,
  744. "data_source_type": "upload_file",
  745. "data_source_info": {
  746. "upload_file_id": ""
  747. },
  748. "dataset_process_rule_id": "",
  749. "name": "name.txt",
  750. "created_from": "api",
  751. "created_by": "",
  752. "created_at": 1695308667,
  753. "tokens": 0,
  754. "indexing_status": "waiting",
  755. "error": null,
  756. "enabled": true,
  757. "disabled_at": null,
  758. "disabled_by": null,
  759. "archived": false,
  760. "display_status": "queuing",
  761. "word_count": 0,
  762. "hit_count": 0,
  763. "doc_form": "text_model"
  764. },
  765. "batch": ""
  766. }
  767. ```
  768. </CodeGroup>
  769. </Col>
  770. </Row>
  771. <hr className='ml-0 mr-0' />
  772. <Heading
  773. url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
  774. method='POST'
  775. title='通过文件更新文档'
  776. name='#update-by-file'
  777. />
  778. <Row>
  779. <Col>
  780. 此接口基于已存在知识库,在此知识库的基础上通过文件更新文档的操作。
  781. ### Path
  782. <Properties>
  783. <Property name='dataset_id' type='string' key='dataset_id'>
  784. 知识库 ID
  785. </Property>
  786. <Property name='document_id' type='string' key='document_id'>
  787. 文档 ID
  788. </Property>
  789. </Properties>
  790. ### Request Body
  791. <Properties>
  792. <Property name='name' type='string' key='name'>
  793. 文档名称(选填)
  794. </Property>
  795. <Property name='file' type='multipart/form-data' key='file'>
  796. 需要上传的文件
  797. </Property>
  798. <Property name='process_rule' type='object' key='process_rule'>
  799. 处理规则(选填)
  800. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  801. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  802. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  803. - <code>id</code> (string) 预处理规则的唯一标识符
  804. - 枚举:
  805. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  806. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  807. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  808. - <code>segmentation</code> (object) 分段规则
  809. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  810. - <code>max_tokens</code> 最大长度(token)默认为 1000
  811. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  812. - <code>subchunk_segmentation</code> (object) 子分段规则
  813. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  814. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  815. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  816. </Property>
  817. </Properties>
  818. </Col>
  819. <Col sticky>
  820. <CodeGroup
  821. title="Request"
  822. tag="POST"
  823. label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
  824. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  825. >
  826. ```bash {{ title: 'cURL' }}
  827. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
  828. --header 'Authorization: Bearer {api_key}' \
  829. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  830. --form 'file=@"/path/to/file"'
  831. ```
  832. </CodeGroup>
  833. <CodeGroup title="Response">
  834. ```json {{ title: 'Response' }}
  835. {
  836. "document": {
  837. "id": "",
  838. "position": 1,
  839. "data_source_type": "upload_file",
  840. "data_source_info": {
  841. "upload_file_id": ""
  842. },
  843. "dataset_process_rule_id": "",
  844. "name": "Dify.txt",
  845. "created_from": "api",
  846. "created_by": "",
  847. "created_at": 1695308667,
  848. "tokens": 0,
  849. "indexing_status": "waiting",
  850. "error": null,
  851. "enabled": true,
  852. "disabled_at": null,
  853. "disabled_by": null,
  854. "archived": false,
  855. "display_status": "queuing",
  856. "word_count": 0,
  857. "hit_count": 0,
  858. "doc_form": "text_model"
  859. },
  860. "batch": "20230921150427533684"
  861. }
  862. ```
  863. </CodeGroup>
  864. </Col>
  865. </Row>
  866. <hr className='ml-0 mr-0' />
  867. <Heading
  868. url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
  869. method='GET'
  870. title='获取文档嵌入状态(进度)'
  871. name='#indexing_status'
  872. />
  873. <Row>
  874. <Col>
  875. ### Path
  876. <Properties>
  877. <Property name='dataset_id' type='string' key='dataset_id'>
  878. 知识库 ID
  879. </Property>
  880. <Property name='batch' type='string' key='batch'>
  881. 上传文档的批次号
  882. </Property>
  883. </Properties>
  884. </Col>
  885. <Col sticky>
  886. <CodeGroup
  887. title="Request"
  888. tag="GET"
  889. label="/datasets/{dataset_id}/documents/{batch}/indexing-status"
  890. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
  891. >
  892. ```bash {{ title: 'cURL' }}
  893. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
  894. --header 'Authorization: Bearer {api_key}' \
  895. ```
  896. </CodeGroup>
  897. <CodeGroup title="Response">
  898. ```json {{ title: 'Response' }}
  899. {
  900. "data":[{
  901. "id": "",
  902. "indexing_status": "indexing",
  903. "processing_started_at": 1681623462.0,
  904. "parsing_completed_at": 1681623462.0,
  905. "cleaning_completed_at": 1681623462.0,
  906. "splitting_completed_at": 1681623462.0,
  907. "completed_at": null,
  908. "paused_at": null,
  909. "error": null,
  910. "stopped_at": null,
  911. "completed_segments": 24,
  912. "total_segments": 100
  913. }]
  914. }
  915. ```
  916. </CodeGroup>
  917. </Col>
  918. </Row>
  919. <hr className='ml-0 mr-0' />
  920. <Heading
  921. url='/datasets/{dataset_id}/documents/{document_id}'
  922. method='DELETE'
  923. title='删除文档'
  924. name='#delete_document'
  925. />
  926. <Row>
  927. <Col>
  928. ### Path
  929. <Properties>
  930. <Property name='dataset_id' type='string' key='dataset_id'>
  931. 知识库 ID
  932. </Property>
  933. <Property name='document_id' type='string' key='document_id'>
  934. 文档 ID
  935. </Property>
  936. </Properties>
  937. </Col>
  938. <Col sticky>
  939. <CodeGroup
  940. title="Request"
  941. tag="DELETE"
  942. label="/datasets/{dataset_id}/documents/{document_id}"
  943. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  944. >
  945. ```bash {{ title: 'cURL' }}
  946. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
  947. --header 'Authorization: Bearer {api_key}' \
  948. ```
  949. </CodeGroup>
  950. <CodeGroup title="Response">
  951. ```json {{ title: 'Response' }}
  952. {
  953. "result": "success"
  954. }
  955. ```
  956. </CodeGroup>
  957. </Col>
  958. </Row>
  959. <hr className='ml-0 mr-0' />
  960. <Heading
  961. url='/datasets/{dataset_id}/documents'
  962. method='GET'
  963. title='知识库文档列表'
  964. name='#dataset_document_list'
  965. />
  966. <Row>
  967. <Col>
  968. ### Path
  969. <Properties>
  970. <Property name='dataset_id' type='string' key='dataset_id'>
  971. 知识库 ID
  972. </Property>
  973. </Properties>
  974. ### Query
  975. <Properties>
  976. <Property name='keyword' type='string' key='keyword'>
  977. 搜索关键词,可选,目前仅搜索文档名称
  978. </Property>
  979. <Property name='page' type='string' key='page'>
  980. 页码,可选
  981. </Property>
  982. <Property name='limit' type='string' key='limit'>
  983. 返回条数,可选,默认 20,范围 1-100
  984. </Property>
  985. </Properties>
  986. </Col>
  987. <Col sticky>
  988. <CodeGroup
  989. title="Request"
  990. tag="GET"
  991. label="/datasets/{dataset_id}/documents"
  992. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
  993. >
  994. ```bash {{ title: 'cURL' }}
  995. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
  996. --header 'Authorization: Bearer {api_key}' \
  997. ```
  998. </CodeGroup>
  999. <CodeGroup title="Response">
  1000. ```json {{ title: 'Response' }}
  1001. {
  1002. "data": [
  1003. {
  1004. "id": "",
  1005. "position": 1,
  1006. "data_source_type": "file_upload",
  1007. "data_source_info": null,
  1008. "dataset_process_rule_id": null,
  1009. "name": "dify",
  1010. "created_from": "",
  1011. "created_by": "",
  1012. "created_at": 1681623639,
  1013. "tokens": 0,
  1014. "indexing_status": "waiting",
  1015. "error": null,
  1016. "enabled": true,
  1017. "disabled_at": null,
  1018. "disabled_by": null,
  1019. "archived": false
  1020. },
  1021. ],
  1022. "has_more": false,
  1023. "limit": 20,
  1024. "total": 9,
  1025. "page": 1
  1026. }
  1027. ```
  1028. </CodeGroup>
  1029. </Col>
  1030. </Row>
  1031. <hr className='ml-0 mr-0' />
  1032. <Heading
  1033. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  1034. method='POST'
  1035. title='新增分段'
  1036. name='#create_new_segment'
  1037. />
  1038. <Row>
  1039. <Col>
  1040. ### Path
  1041. <Properties>
  1042. <Property name='dataset_id' type='string' key='dataset_id'>
  1043. 知识库 ID
  1044. </Property>
  1045. <Property name='document_id' type='string' key='document_id'>
  1046. 文档 ID
  1047. </Property>
  1048. </Properties>
  1049. ### Request Body
  1050. <Properties>
  1051. <Property name='segments' type='object list' key='segments'>
  1052. - <code>content</code> (text) 文本内容/问题内容,必填
  1053. - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
  1054. - <code>keywords</code> (list) 关键字,非必填
  1055. </Property>
  1056. </Properties>
  1057. </Col>
  1058. <Col sticky>
  1059. <CodeGroup
  1060. title="Request"
  1061. tag="POST"
  1062. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  1063. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
  1064. >
  1065. ```bash {{ title: 'cURL' }}
  1066. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  1067. --header 'Authorization: Bearer {api_key}' \
  1068. --header 'Content-Type: application/json' \
  1069. --data-raw '{
  1070. "segments": [
  1071. {
  1072. "content": "1",
  1073. "answer": "1",
  1074. "keywords": ["a"]
  1075. }
  1076. ]
  1077. }'
  1078. ```
  1079. </CodeGroup>
  1080. <CodeGroup title="Response">
  1081. ```json {{ title: 'Response' }}
  1082. {
  1083. "data": [{
  1084. "id": "",
  1085. "position": 1,
  1086. "document_id": "",
  1087. "content": "1",
  1088. "answer": "1",
  1089. "word_count": 25,
  1090. "tokens": 0,
  1091. "keywords": [
  1092. "a"
  1093. ],
  1094. "index_node_id": "",
  1095. "index_node_hash": "",
  1096. "hit_count": 0,
  1097. "enabled": true,
  1098. "disabled_at": null,
  1099. "disabled_by": null,
  1100. "status": "completed",
  1101. "created_by": "",
  1102. "created_at": 1695312007,
  1103. "indexing_at": 1695312007,
  1104. "completed_at": 1695312007,
  1105. "error": null,
  1106. "stopped_at": null
  1107. }],
  1108. "doc_form": "text_model"
  1109. }
  1110. ```
  1111. </CodeGroup>
  1112. </Col>
  1113. </Row>
  1114. <hr className='ml-0 mr-0' />
  1115. <Heading
  1116. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  1117. method='GET'
  1118. title='查询文档分段'
  1119. name='#get_segment'
  1120. />
  1121. <Row>
  1122. <Col>
  1123. ### Path
  1124. <Properties>
  1125. <Property name='dataset_id' type='string' key='dataset_id'>
  1126. 知识库 ID
  1127. </Property>
  1128. <Property name='document_id' type='string' key='document_id'>
  1129. 文档 ID
  1130. </Property>
  1131. </Properties>
  1132. ### Query
  1133. <Properties>
  1134. <Property name='keyword' type='string' key='keyword'>
  1135. 搜索关键词,可选
  1136. </Property>
  1137. <Property name='status' type='string' key='status'>
  1138. 搜索状态,completed
  1139. </Property>
  1140. <Property name='page' type='string' key='page'>
  1141. 页码,可选
  1142. </Property>
  1143. <Property name='limit' type='string' key='limit'>
  1144. 返回条数,可选,默认 20,范围 1-100
  1145. </Property>
  1146. </Properties>
  1147. </Col>
  1148. <Col sticky>
  1149. <CodeGroup
  1150. title="Request"
  1151. tag="GET"
  1152. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  1153. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1154. >
  1155. ```bash {{ title: 'cURL' }}
  1156. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  1157. --header 'Authorization: Bearer {api_key}' \
  1158. --header 'Content-Type: application/json'
  1159. ```
  1160. </CodeGroup>
  1161. <CodeGroup title="Response">
  1162. ```json {{ title: 'Response' }}
  1163. {
  1164. "data": [{
  1165. "id": "",
  1166. "position": 1,
  1167. "document_id": "",
  1168. "content": "1",
  1169. "answer": "1",
  1170. "word_count": 25,
  1171. "tokens": 0,
  1172. "keywords": [
  1173. "a"
  1174. ],
  1175. "index_node_id": "",
  1176. "index_node_hash": "",
  1177. "hit_count": 0,
  1178. "enabled": true,
  1179. "disabled_at": null,
  1180. "disabled_by": null,
  1181. "status": "completed",
  1182. "created_by": "",
  1183. "created_at": 1695312007,
  1184. "indexing_at": 1695312007,
  1185. "completed_at": 1695312007,
  1186. "error": null,
  1187. "stopped_at": null
  1188. }],
  1189. "doc_form": "text_model",
  1190. "has_more": false,
  1191. "limit": 20,
  1192. "total": 9,
  1193. "page": 1
  1194. }
  1195. ```
  1196. </CodeGroup>
  1197. </Col>
  1198. </Row>
  1199. <hr className='ml-0 mr-0' />
  1200. <Heading
  1201. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
  1202. method='DELETE'
  1203. title='删除文档分段'
  1204. name='#delete_segment'
  1205. />
  1206. <Row>
  1207. <Col>
  1208. ### Path
  1209. <Properties>
  1210. <Property name='dataset_id' type='string' key='dataset_id'>
  1211. 知识库 ID
  1212. </Property>
  1213. <Property name='document_id' type='string' key='document_id'>
  1214. 文档 ID
  1215. </Property>
  1216. <Property name='segment_id' type='string' key='segment_id'>
  1217. 文档分段ID
  1218. </Property>
  1219. </Properties>
  1220. </Col>
  1221. <Col sticky>
  1222. <CodeGroup
  1223. title="Request"
  1224. tag="DELETE"
  1225. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
  1226. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1227. >
  1228. ```bash {{ title: 'cURL' }}
  1229. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  1230. --header 'Authorization: Bearer {api_key}' \
  1231. --header 'Content-Type: application/json'
  1232. ```
  1233. </CodeGroup>
  1234. <CodeGroup title="Response">
  1235. ```json {{ title: 'Response' }}
  1236. {
  1237. "result": "success"
  1238. }
  1239. ```
  1240. </CodeGroup>
  1241. </Col>
  1242. </Row>
  1243. <hr className='ml-0 mr-0' />
  1244. <Heading
  1245. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
  1246. method='POST'
  1247. title='更新文档分段'
  1248. name='#update_segment'
  1249. />
  1250. <Row>
  1251. <Col>
  1252. ### POST
  1253. <Properties>
  1254. <Property name='dataset_id' type='string' key='dataset_id'>
  1255. 知识库 ID
  1256. </Property>
  1257. <Property name='document_id' type='string' key='document_id'>
  1258. 文档 ID
  1259. </Property>
  1260. <Property name='segment_id' type='string' key='segment_id'>
  1261. 文档分段ID
  1262. </Property>
  1263. </Properties>
  1264. ### Request Body
  1265. <Properties>
  1266. <Property name='segment' type='object' key='segment'>
  1267. - <code>content</code> (text) 文本内容/问题内容,必填
  1268. - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
  1269. - <code>keywords</code> (list) 关键字,非必填
  1270. - <code>enabled</code> (bool) false/true,非必填
  1271. - <code>regenerate_child_chunks</code> (bool) 是否重新生成子分段,非必填
  1272. </Property>
  1273. </Properties>
  1274. </Col>
  1275. <Col sticky>
  1276. <CodeGroup
  1277. title="Request"
  1278. tag="POST"
  1279. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
  1280. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segment\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
  1281. >
  1282. ```bash {{ title: 'cURL' }}
  1283. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  1284. --header 'Authorization: Bearer {api_key}' \
  1285. --header 'Content-Type: application/json' \
  1286. --data-raw '{
  1287. "segment": {
  1288. "content": "1",
  1289. "answer": "1",
  1290. "keywords": ["a"],
  1291. "enabled": false
  1292. }
  1293. }'
  1294. ```
  1295. </CodeGroup>
  1296. <CodeGroup title="Response">
  1297. ```json {{ title: 'Response' }}
  1298. {
  1299. "data": {
  1300. "id": "",
  1301. "position": 1,
  1302. "document_id": "",
  1303. "content": "1",
  1304. "answer": "1",
  1305. "word_count": 25,
  1306. "tokens": 0,
  1307. "keywords": [
  1308. "a"
  1309. ],
  1310. "index_node_id": "",
  1311. "index_node_hash": "",
  1312. "hit_count": 0,
  1313. "enabled": true,
  1314. "disabled_at": null,
  1315. "disabled_by": null,
  1316. "status": "completed",
  1317. "created_by": "",
  1318. "created_at": 1695312007,
  1319. "indexing_at": 1695312007,
  1320. "completed_at": 1695312007,
  1321. "error": null,
  1322. "stopped_at": null
  1323. },
  1324. "doc_form": "text_model"
  1325. }
  1326. ```
  1327. </CodeGroup>
  1328. </Col>
  1329. </Row>
  1330. <hr className='ml-0 mr-0' />
  1331. <Heading
  1332. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks'
  1333. method='POST'
  1334. title='新增文档子分段'
  1335. name='#create_child_chunk'
  1336. />
  1337. <Row>
  1338. <Col>
  1339. ### Path
  1340. <Properties>
  1341. <Property name='dataset_id' type='string' key='dataset_id'>
  1342. 知识库 ID
  1343. </Property>
  1344. <Property name='document_id' type='string' key='document_id'>
  1345. 文档 ID
  1346. </Property>
  1347. <Property name='segment_id' type='string' key='segment_id'>
  1348. 分段 ID
  1349. </Property>
  1350. </Properties>
  1351. ### Request Body
  1352. <Properties>
  1353. <Property name='content' type='string' key='content'>
  1354. 子分段内容
  1355. </Property>
  1356. </Properties>
  1357. </Col>
  1358. <Col sticky>
  1359. <CodeGroup
  1360. title="Request"
  1361. tag="POST"
  1362. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks"
  1363. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "子分段内容"}'`}
  1364. >
  1365. ```bash {{ title: 'cURL' }}
  1366. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \
  1367. --header 'Authorization: Bearer {api_key}' \
  1368. --header 'Content-Type: application/json' \
  1369. --data-raw '{
  1370. "content": "子分段内容"
  1371. }'
  1372. ```
  1373. </CodeGroup>
  1374. <CodeGroup title="Response">
  1375. ```json {{ title: 'Response' }}
  1376. {
  1377. "data": {
  1378. "id": "",
  1379. "segment_id": "",
  1380. "content": "子分段内容",
  1381. "word_count": 25,
  1382. "tokens": 0,
  1383. "index_node_id": "",
  1384. "index_node_hash": "",
  1385. "status": "completed",
  1386. "created_by": "",
  1387. "created_at": 1695312007,
  1388. "indexing_at": 1695312007,
  1389. "completed_at": 1695312007,
  1390. "error": null,
  1391. "stopped_at": null
  1392. }
  1393. }
  1394. ```
  1395. </CodeGroup>
  1396. </Col>
  1397. </Row>
  1398. <hr className='ml-0 mr-0' />
  1399. <Heading
  1400. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks'
  1401. method='GET'
  1402. title='查询文档子分段'
  1403. name='#get_child_chunks'
  1404. />
  1405. <Row>
  1406. <Col>
  1407. ### Path
  1408. <Properties>
  1409. <Property name='dataset_id' type='string' key='dataset_id'>
  1410. 知识库 ID
  1411. </Property>
  1412. <Property name='document_id' type='string' key='document_id'>
  1413. 文档 ID
  1414. </Property>
  1415. <Property name='segment_id' type='string' key='segment_id'>
  1416. 分段 ID
  1417. </Property>
  1418. </Properties>
  1419. ### Query
  1420. <Properties>
  1421. <Property name='keyword' type='string' key='keyword'>
  1422. 搜索关键词(选填)
  1423. </Property>
  1424. <Property name='page' type='integer' key='page'>
  1425. 页码(选填,默认1)
  1426. </Property>
  1427. <Property name='limit' type='integer' key='limit'>
  1428. 每页数量(选填,默认20,最大100)
  1429. </Property>
  1430. </Properties>
  1431. </Col>
  1432. <Col sticky>
  1433. <CodeGroup
  1434. title="Request"
  1435. tag="GET"
  1436. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks"
  1437. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  1438. >
  1439. ```bash {{ title: 'cURL' }}
  1440. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \
  1441. --header 'Authorization: Bearer {api_key}'
  1442. ```
  1443. </CodeGroup>
  1444. <CodeGroup title="Response">
  1445. ```json {{ title: 'Response' }}
  1446. {
  1447. "data": [{
  1448. "id": "",
  1449. "segment_id": "",
  1450. "content": "子分段内容",
  1451. "word_count": 25,
  1452. "tokens": 0,
  1453. "index_node_id": "",
  1454. "index_node_hash": "",
  1455. "status": "completed",
  1456. "created_by": "",
  1457. "created_at": 1695312007,
  1458. "indexing_at": 1695312007,
  1459. "completed_at": 1695312007,
  1460. "error": null,
  1461. "stopped_at": null
  1462. }],
  1463. "total": 1,
  1464. "total_pages": 1,
  1465. "page": 1,
  1466. "limit": 20
  1467. }
  1468. ```
  1469. </CodeGroup>
  1470. </Col>
  1471. </Row>
  1472. <hr className='ml-0 mr-0' />
  1473. <Heading
  1474. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}'
  1475. method='DELETE'
  1476. title='删除文档子分段'
  1477. name='#delete_child_chunk'
  1478. />
  1479. <Row>
  1480. <Col>
  1481. ### Path
  1482. <Properties>
  1483. <Property name='dataset_id' type='string' key='dataset_id'>
  1484. 知识库 ID
  1485. </Property>
  1486. <Property name='document_id' type='string' key='document_id'>
  1487. 文档 ID
  1488. </Property>
  1489. <Property name='segment_id' type='string' key='segment_id'>
  1490. 分段 ID
  1491. </Property>
  1492. <Property name='child_chunk_id' type='string' key='child_chunk_id'>
  1493. 子分段 ID
  1494. </Property>
  1495. </Properties>
  1496. </Col>
  1497. <Col sticky>
  1498. <CodeGroup
  1499. title="Request"
  1500. tag="DELETE"
  1501. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}"
  1502. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1503. >
  1504. ```bash {{ title: 'cURL' }}
  1505. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \
  1506. --header 'Authorization: Bearer {api_key}'
  1507. ```
  1508. </CodeGroup>
  1509. <CodeGroup title="Response">
  1510. ```json {{ title: 'Response' }}
  1511. {
  1512. "result": "success"
  1513. }
  1514. ```
  1515. </CodeGroup>
  1516. </Col>
  1517. </Row>
  1518. <hr className='ml-0 mr-0' />
  1519. <Row>
  1520. <Col>
  1521. ### 错误信息
  1522. <Properties>
  1523. <Property name='code' type='string' key='code'>
  1524. 返回的错误代码
  1525. </Property>
  1526. </Properties>
  1527. <Properties>
  1528. <Property name='status' type='number' key='status'>
  1529. 返回的错误状态
  1530. </Property>
  1531. </Properties>
  1532. <Properties>
  1533. <Property name='message' type='string' key='message'>
  1534. 返回的错误信息
  1535. </Property>
  1536. </Properties>
  1537. </Col>
  1538. <Col>
  1539. <CodeGroup title="Example">
  1540. ```json {{ title: 'Response' }}
  1541. {
  1542. "code": "no_file_uploaded",
  1543. "message": "Please upload your file.",
  1544. "status": 400
  1545. }
  1546. ```
  1547. </CodeGroup>
  1548. </Col>
  1549. </Row>
  1550. <hr className='ml-0 mr-0' />
  1551. <Heading
  1552. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}'
  1553. method='PATCH'
  1554. title='更新文档子分段'
  1555. name='#update_child_chunk'
  1556. />
  1557. <Row>
  1558. <Col>
  1559. ### Path
  1560. <Properties>
  1561. <Property name='dataset_id' type='string' key='dataset_id'>
  1562. 知识库 ID
  1563. </Property>
  1564. <Property name='document_id' type='string' key='document_id'>
  1565. 文档 ID
  1566. </Property>
  1567. <Property name='segment_id' type='string' key='segment_id'>
  1568. 分段 ID
  1569. </Property>
  1570. <Property name='child_chunk_id' type='string' key='child_chunk_id'>
  1571. 子分段 ID
  1572. </Property>
  1573. </Properties>
  1574. ### Request Body
  1575. <Properties>
  1576. <Property name='content' type='string' key='content'>
  1577. 子分段内容
  1578. </Property>
  1579. </Properties>
  1580. </Col>
  1581. <Col sticky>
  1582. <CodeGroup
  1583. title="Request"
  1584. tag="PATCH"
  1585. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}"
  1586. targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "更新的子分段内容"}'`}
  1587. >
  1588. ```bash {{ title: 'cURL' }}
  1589. curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \
  1590. --header 'Authorization: Bearer {api_key}' \
  1591. --header 'Content-Type: application/json' \
  1592. --data-raw '{
  1593. "content": "更新的子分段内容"
  1594. }'
  1595. ```
  1596. </CodeGroup>
  1597. <CodeGroup title="Response">
  1598. ```json {{ title: 'Response' }}
  1599. {
  1600. "data": {
  1601. "id": "",
  1602. "segment_id": "",
  1603. "content": "更新的子分段内容",
  1604. "word_count": 25,
  1605. "tokens": 0,
  1606. "index_node_id": "",
  1607. "index_node_hash": "",
  1608. "status": "completed",
  1609. "created_by": "",
  1610. "created_at": 1695312007,
  1611. "indexing_at": 1695312007,
  1612. "completed_at": 1695312007,
  1613. "error": null,
  1614. "stopped_at": null
  1615. }
  1616. }
  1617. ```
  1618. </CodeGroup>
  1619. </Col>
  1620. </Row>
  1621. <hr className='ml-0 mr-0' />
  1622. <Heading
  1623. url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
  1624. method='GET'
  1625. title='获取上传文件'
  1626. name='#get_upload_file'
  1627. />
  1628. <Row>
  1629. <Col>
  1630. ### Path
  1631. <Properties>
  1632. <Property name='dataset_id' type='string' key='dataset_id'>
  1633. 知识库 ID
  1634. </Property>
  1635. <Property name='document_id' type='string' key='document_id'>
  1636. 文档 ID
  1637. </Property>
  1638. </Properties>
  1639. </Col>
  1640. <Col sticky>
  1641. <CodeGroup
  1642. title="Request"
  1643. tag="GET"
  1644. label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
  1645. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1646. >
  1647. ```bash {{ title: 'cURL' }}
  1648. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
  1649. --header 'Authorization: Bearer {api_key}' \
  1650. --header 'Content-Type: application/json'
  1651. ```
  1652. </CodeGroup>
  1653. <CodeGroup title="Response">
  1654. ```json {{ title: 'Response' }}
  1655. {
  1656. "id": "file_id",
  1657. "name": "file_name",
  1658. "size": 1024,
  1659. "extension": "txt",
  1660. "url": "preview_url",
  1661. "download_url": "download_url",
  1662. "mime_type": "text/plain",
  1663. "created_by": "user_id",
  1664. "created_at": 1728734540,
  1665. }
  1666. ```
  1667. </CodeGroup>
  1668. </Col>
  1669. </Row>
  1670. <hr className='ml-0 mr-0' />
  1671. <Heading
  1672. url='/datasets/{dataset_id}/retrieve'
  1673. method='POST'
  1674. title='检索知识库'
  1675. name='#dataset_retrieval'
  1676. />
  1677. <Row>
  1678. <Col>
  1679. ### Path
  1680. <Properties>
  1681. <Property name='dataset_id' type='string' key='dataset_id'>
  1682. 知识库 ID
  1683. </Property>
  1684. </Properties>
  1685. ### Request Body
  1686. <Properties>
  1687. <Property name='query' type='string' key='query'>
  1688. 检索关键词
  1689. </Property>
  1690. <Property name='retrieval_model' type='object' key='retrieval_model'>
  1691. 检索参数(选填,如不填,按照默认方式召回)
  1692. - <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
  1693. - <code>keyword_search</code> 关键字检索
  1694. - <code>semantic_search</code> 语义检索
  1695. - <code>full_text_search</code> 全文检索
  1696. - <code>hybrid_search</code> 混合检索
  1697. - <code>reranking_enable</code> (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
  1698. - <code>reranking_mode</code> (object) Rerank 模型配置,非必填,如果启用了 reranking 则传值
  1699. - <code>reranking_provider_name</code> (string) Rerank 模型提供商
  1700. - <code>reranking_model_name</code> (string) Rerank 模型名称
  1701. - <code>weights</code> (float) 混合检索模式下语意检索的权重设置
  1702. - <code>top_k</code> (integer) 返回结果数量,非必填
  1703. - <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
  1704. - <code>score_threshold</code> (float) Score 阈值
  1705. </Property>
  1706. <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
  1707. 未启用字段
  1708. </Property>
  1709. </Properties>
  1710. </Col>
  1711. <Col sticky>
  1712. <CodeGroup
  1713. title="Request"
  1714. tag="POST"
  1715. label="/datasets/{dataset_id}/retrieve"
  1716. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
  1717. "query": "test",
  1718. "retrieval_model": {
  1719. "search_method": "keyword_search",
  1720. "reranking_enable": false,
  1721. "reranking_mode": null,
  1722. "reranking_model": {
  1723. "reranking_provider_name": "",
  1724. "reranking_model_name": ""
  1725. },
  1726. "weights": null,
  1727. "top_k": 1,
  1728. "score_threshold_enabled": false,
  1729. "score_threshold": null
  1730. }
  1731. }'`}
  1732. >
  1733. ```bash {{ title: 'cURL' }}
  1734. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
  1735. --header 'Authorization: Bearer {api_key}' \
  1736. --header 'Content-Type: application/json' \
  1737. --data-raw '{
  1738. "query": "test",
  1739. "retrieval_model": {
  1740. "search_method": "keyword_search",
  1741. "reranking_enable": false,
  1742. "reranking_mode": null,
  1743. "reranking_model": {
  1744. "reranking_provider_name": "",
  1745. "reranking_model_name": ""
  1746. },
  1747. "weights": null,
  1748. "top_k": 2,
  1749. "score_threshold_enabled": false,
  1750. "score_threshold": null
  1751. }
  1752. }'
  1753. ```
  1754. </CodeGroup>
  1755. <CodeGroup title="Response">
  1756. ```json {{ title: 'Response' }}
  1757. {
  1758. "query": {
  1759. "content": "test"
  1760. },
  1761. "records": [
  1762. {
  1763. "segment": {
  1764. "id": "7fa6f24f-8679-48b3-bc9d-bdf28d73f218",
  1765. "position": 1,
  1766. "document_id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
  1767. "content": "Operation guide",
  1768. "answer": null,
  1769. "word_count": 847,
  1770. "tokens": 280,
  1771. "keywords": [
  1772. "install",
  1773. "java",
  1774. "base",
  1775. "scripts",
  1776. "jdk",
  1777. "manual",
  1778. "internal",
  1779. "opens",
  1780. "add",
  1781. "vmoptions"
  1782. ],
  1783. "index_node_id": "39dd8443-d960-45a8-bb46-7275ad7fbc8e",
  1784. "index_node_hash": "0189157697b3c6a418ccf8264a09699f25858975578f3467c76d6bfc94df1d73",
  1785. "hit_count": 0,
  1786. "enabled": true,
  1787. "disabled_at": null,
  1788. "disabled_by": null,
  1789. "status": "completed",
  1790. "created_by": "dbcb1ab5-90c8-41a7-8b78-73b235eb6f6f",
  1791. "created_at": 1728734540,
  1792. "indexing_at": 1728734552,
  1793. "completed_at": 1728734584,
  1794. "error": null,
  1795. "stopped_at": null,
  1796. "document": {
  1797. "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
  1798. "data_source_type": "upload_file",
  1799. "name": "readme.txt",
  1800. }
  1801. },
  1802. "score": 3.730463140527718e-05,
  1803. "tsne_position": null
  1804. }
  1805. ]
  1806. }
  1807. ```
  1808. </CodeGroup>
  1809. </Col>
  1810. </Row>
  1811. <hr className='ml-0 mr-0' />
  1812. <Heading
  1813. url='/datasets/{dataset_id}/metadata'
  1814. method='POST'
  1815. title='新增元数据'
  1816. name='#create_metadata'
  1817. />
  1818. <Row>
  1819. <Col>
  1820. ### Params
  1821. <Properties>
  1822. <Property name='dataset_id' type='string' key='dataset_id'>
  1823. 知识库 ID
  1824. </Property>
  1825. </Properties>
  1826. ### Request Body
  1827. <Properties>
  1828. <Property name='segment' type='object' key='segment'>
  1829. - <code>type</code> (string) 元数据类型,必填
  1830. - <code>name</code> (string) 元数据名称,必填
  1831. </Property>
  1832. </Properties>
  1833. </Col>
  1834. <Col sticky>
  1835. <CodeGroup
  1836. title="Request"
  1837. tag="POST"
  1838. label="/datasets/{dataset_id}/metadata"
  1839. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"type": "string", "name": "test"}'`}
  1840. >
  1841. ```bash {{ title: 'cURL' }}
  1842. ```
  1843. </CodeGroup>
  1844. <CodeGroup title="Response">
  1845. ```json {{ title: 'Response' }}
  1846. {
  1847. "id": "abc",
  1848. "type": "string",
  1849. "name": "test",
  1850. }
  1851. ```
  1852. </CodeGroup>
  1853. </Col>
  1854. </Row>
  1855. <hr className='ml-0 mr-0' />
  1856. <Heading
  1857. url='/datasets/{dataset_id}/metadata/{metadata_id}'
  1858. method='PATCH'
  1859. title='更新元数据'
  1860. name='#update_metadata'
  1861. />
  1862. <Row>
  1863. <Col>
  1864. ### Path
  1865. <Properties>
  1866. <Property name='dataset_id' type='string' key='dataset_id'>
  1867. 知识库 ID
  1868. </Property>
  1869. <Property name='metadata_id' type='string' key='metadata_id'>
  1870. 元数据 ID
  1871. </Property>
  1872. </Properties>
  1873. ### Request Body
  1874. <Properties>
  1875. <Property name='segment' type='object' key='segment'>
  1876. - <code>name</code> (string) 元数据名称,必填
  1877. </Property>
  1878. </Properties>
  1879. </Col>
  1880. <Col sticky>
  1881. <CodeGroup
  1882. title="Request"
  1883. tag="PATCH"
  1884. label="/datasets/{dataset_id}/metadata/{metadata_id}"
  1885. targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"name": "test"}'`}
  1886. >
  1887. ```bash {{ title: 'cURL' }}
  1888. ```
  1889. </CodeGroup>
  1890. <CodeGroup title="Response">
  1891. ```json {{ title: 'Response' }}
  1892. {
  1893. "id": "abc",
  1894. "type": "string",
  1895. "name": "test",
  1896. }
  1897. ```
  1898. </CodeGroup>
  1899. </Col>
  1900. </Row>
  1901. <hr className='ml-0 mr-0' />
  1902. <Heading
  1903. url='/datasets/{dataset_id}/metadata/{metadata_id}'
  1904. method='DELETE'
  1905. title='删除元数据'
  1906. name='#delete_metadata'
  1907. />
  1908. <Row>
  1909. <Col>
  1910. ### Path
  1911. <Properties>
  1912. <Property name='dataset_id' type='string' key='dataset_id'>
  1913. 知识库 ID
  1914. </Property>
  1915. <Property name='metadata_id' type='string' key='metadata_id'>
  1916. 元数据 ID
  1917. </Property>
  1918. </Properties>
  1919. </Col>
  1920. <Col sticky>
  1921. <CodeGroup
  1922. title="Request"
  1923. tag="DELETE"
  1924. label="/datasets/{dataset_id}/metadata/{metadata_id}"
  1925. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1926. >
  1927. ```bash {{ title: 'cURL' }}
  1928. ```
  1929. </CodeGroup>
  1930. </Col>
  1931. </Row>
  1932. <hr className='ml-0 mr-0' />
  1933. <Heading
  1934. url='/datasets/{dataset_id}/metadata/built-in/{action}'
  1935. method='POST'
  1936. title='启用/禁用内置元数据'
  1937. name='#toggle_metadata'
  1938. />
  1939. <Row>
  1940. <Col>
  1941. ### Path
  1942. <Properties>
  1943. <Property name='dataset_id' type='string' key='dataset_id'>
  1944. 知识库 ID
  1945. </Property>
  1946. <Property name='action' type='string' key='action'>
  1947. disable/enable
  1948. </Property>
  1949. </Properties>
  1950. </Col>
  1951. <Col sticky>
  1952. <CodeGroup
  1953. title="Request"
  1954. tag="POST"
  1955. label="/datasets/{dataset_id}/metadata/built-in/{action}"
  1956. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/built-in/{action}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1957. >
  1958. ```bash {{ title: 'cURL' }}
  1959. ```
  1960. </CodeGroup>
  1961. </Col>
  1962. </Row>
  1963. <hr className='ml-0 mr-0' />
  1964. <Heading
  1965. url='/datasets/{dataset_id}/documents/metadata'
  1966. method='POST'
  1967. title='更新文档元数据'
  1968. name='#update_documents_metadata'
  1969. />
  1970. <Row>
  1971. <Col>
  1972. ### Path
  1973. <Properties>
  1974. <Property name='dataset_id' type='string' key='dataset_id'>
  1975. 知识库 ID
  1976. </Property>
  1977. </Properties>
  1978. ### Request Body
  1979. <Properties>
  1980. <Property name='operation_data' type='object list' key='segments'>
  1981. - <code>document_id</code> (string) 文档 ID
  1982. - <code>metadata_list</code> (list) 元数据列表
  1983. - <code>id</code> (string) 元数据 ID
  1984. - <code>type</code> (string) 元数据类型
  1985. - <code>name</code> (string) 元数据名称
  1986. </Property>
  1987. </Properties>
  1988. </Col>
  1989. <Col sticky>
  1990. <CodeGroup
  1991. title="Request"
  1992. tag="POST"
  1993. label="/datasets/{dataset_id}/documents/metadata"
  1994. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"operation_data": [{"document_id": "document_id", "metadata_list": [{"id": "id", "value": "value", "name": "name"}]}]}'`}
  1995. >
  1996. ```bash {{ title: 'cURL' }}
  1997. ```
  1998. </CodeGroup>
  1999. </Col>
  2000. </Row>
  2001. <hr className='ml-0 mr-0' />
  2002. <Heading
  2003. url='/datasets/{dataset_id}/metadata'
  2004. method='GET'
  2005. title='查询知识库元数据列表'
  2006. name='#dataset_metadata_list'
  2007. />
  2008. <Row>
  2009. <Col>
  2010. ### Path
  2011. <Properties>
  2012. <Property name='dataset_id' type='string' key='dataset_id'>
  2013. 知识库 ID
  2014. </Property>
  2015. </Properties>
  2016. </Col>
  2017. <Col sticky>
  2018. <CodeGroup
  2019. title="Request"
  2020. tag="GET"
  2021. label="/datasets/{dataset_id}/metadata"
  2022. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}'`}
  2023. >
  2024. ```bash {{ title: 'cURL' }}
  2025. ```
  2026. </CodeGroup>
  2027. <CodeGroup title="Response">
  2028. ```json {{ title: 'Response' }}
  2029. {
  2030. "doc_metadata": [
  2031. {
  2032. "id": "",
  2033. "name": "name",
  2034. "type": "string",
  2035. "use_count": 0,
  2036. },
  2037. ...
  2038. ],
  2039. "built_in_field_enabled": true
  2040. }
  2041. ```
  2042. </CodeGroup>
  2043. </Col>
  2044. </Row>
  2045. <hr className='ml-0 mr-0' />
  2046. <Heading
  2047. url='/workspaces/current/models/model-types/text-embedding'
  2048. method='GET'
  2049. title='获取嵌入模型列表'
  2050. name='#model_type_list'
  2051. />
  2052. <Row>
  2053. <Col>
  2054. ### Query
  2055. <Properties>
  2056. </Properties>
  2057. </Col>
  2058. <Col sticky>
  2059. <CodeGroup
  2060. title="Request"
  2061. tag="GET"
  2062. label="/datasets/{dataset_id}"
  2063. targetCode={`curl --location --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' `}
  2064. >
  2065. ```bash {{ title: 'cURL' }}
  2066. curl --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \
  2067. --header 'Authorization: Bearer {api_key}' \
  2068. --header 'Content-Type: application/json' \
  2069. ```
  2070. </CodeGroup>
  2071. <CodeGroup title="Response">
  2072. ```json {{ title: 'Response' }}
  2073. {
  2074. "data": [
  2075. {
  2076. "provider": "zhipuai",
  2077. "label": {
  2078. "zh_Hans": "智谱 AI",
  2079. "en_US": "ZHIPU AI"
  2080. },
  2081. "icon_small": {
  2082. "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/zh_Hans",
  2083. "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/en_US"
  2084. },
  2085. "icon_large": {
  2086. "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/zh_Hans",
  2087. "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/en_US"
  2088. },
  2089. "status": "active",
  2090. "models": [
  2091. {
  2092. "model": "embedding-3",
  2093. "label": {
  2094. "zh_Hans": "embedding-3",
  2095. "en_US": "embedding-3"
  2096. },
  2097. "model_type": "text-embedding",
  2098. "features": null,
  2099. "fetch_from": "predefined-model",
  2100. "model_properties": {
  2101. "context_size": 8192
  2102. },
  2103. "deprecated": false,
  2104. "status": "active",
  2105. "load_balancing_enabled": false
  2106. },
  2107. {
  2108. "model": "embedding-2",
  2109. "label": {
  2110. "zh_Hans": "embedding-2",
  2111. "en_US": "embedding-2"
  2112. },
  2113. "model_type": "text-embedding",
  2114. "features": null,
  2115. "fetch_from": "predefined-model",
  2116. "model_properties": {
  2117. "context_size": 8192
  2118. },
  2119. "deprecated": false,
  2120. "status": "active",
  2121. "load_balancing_enabled": false
  2122. },
  2123. {
  2124. "model": "text_embedding",
  2125. "label": {
  2126. "zh_Hans": "text_embedding",
  2127. "en_US": "text_embedding"
  2128. },
  2129. "model_type": "text-embedding",
  2130. "features": null,
  2131. "fetch_from": "predefined-model",
  2132. "model_properties": {
  2133. "context_size": 512
  2134. },
  2135. "deprecated": false,
  2136. "status": "active",
  2137. "load_balancing_enabled": false
  2138. }
  2139. ]
  2140. }
  2141. ]
  2142. }
  2143. ```
  2144. </CodeGroup>
  2145. </Col>
  2146. </Row>
  2147. <hr className='ml-0 mr-0' />
  2148. <Row>
  2149. <Col>
  2150. ### 错误信息
  2151. <Properties>
  2152. <Property name='code' type='string' key='code'>
  2153. 返回的错误代码
  2154. </Property>
  2155. </Properties>
  2156. <Properties>
  2157. <Property name='status' type='number' key='status'>
  2158. 返回的错误状态
  2159. </Property>
  2160. </Properties>
  2161. <Properties>
  2162. <Property name='message' type='string' key='message'>
  2163. 返回的错误信息
  2164. </Property>
  2165. </Properties>
  2166. </Col>
  2167. <Col>
  2168. <CodeGroup title="Example">
  2169. ```json {{ title: 'Response' }}
  2170. {
  2171. "code": "no_file_uploaded",
  2172. "message": "Please upload your file.",
  2173. "status": 400
  2174. }
  2175. ```
  2176. </CodeGroup>
  2177. </Col>
  2178. </Row>
  2179. <table className="max-w-auto border-collapse border border-slate-400" style={{ maxWidth: 'none', width: 'auto' }}>
  2180. <thead style={{ background: '#f9fafc' }}>
  2181. <tr>
  2182. <th className="p-2 border border-slate-300">code</th>
  2183. <th className="p-2 border border-slate-300">status</th>
  2184. <th className="p-2 border border-slate-300">message</th>
  2185. </tr>
  2186. </thead>
  2187. <tbody>
  2188. <tr>
  2189. <td className="p-2 border border-slate-300">no_file_uploaded</td>
  2190. <td className="p-2 border border-slate-300">400</td>
  2191. <td className="p-2 border border-slate-300">Please upload your file.</td>
  2192. </tr>
  2193. <tr>
  2194. <td className="p-2 border border-slate-300">too_many_files</td>
  2195. <td className="p-2 border border-slate-300">400</td>
  2196. <td className="p-2 border border-slate-300">Only one file is allowed.</td>
  2197. </tr>
  2198. <tr>
  2199. <td className="p-2 border border-slate-300">file_too_large</td>
  2200. <td className="p-2 border border-slate-300">413</td>
  2201. <td className="p-2 border border-slate-300">File size exceeded.</td>
  2202. </tr>
  2203. <tr>
  2204. <td className="p-2 border border-slate-300">unsupported_file_type</td>
  2205. <td className="p-2 border border-slate-300">415</td>
  2206. <td className="p-2 border border-slate-300">File type not allowed.</td>
  2207. </tr>
  2208. <tr>
  2209. <td className="p-2 border border-slate-300">high_quality_dataset_only</td>
  2210. <td className="p-2 border border-slate-300">400</td>
  2211. <td className="p-2 border border-slate-300">Current operation only supports 'high-quality' datasets.</td>
  2212. </tr>
  2213. <tr>
  2214. <td className="p-2 border border-slate-300">dataset_not_initialized</td>
  2215. <td className="p-2 border border-slate-300">400</td>
  2216. <td className="p-2 border border-slate-300">The dataset is still being initialized or indexing. Please wait a moment.</td>
  2217. </tr>
  2218. <tr>
  2219. <td className="p-2 border border-slate-300">archived_document_immutable</td>
  2220. <td className="p-2 border border-slate-300">403</td>
  2221. <td className="p-2 border border-slate-300">The archived document is not editable.</td>
  2222. </tr>
  2223. <tr>
  2224. <td className="p-2 border border-slate-300">dataset_name_duplicate</td>
  2225. <td className="p-2 border border-slate-300">409</td>
  2226. <td className="p-2 border border-slate-300">The dataset name already exists. Please modify your dataset name.</td>
  2227. </tr>
  2228. <tr>
  2229. <td className="p-2 border border-slate-300">invalid_action</td>
  2230. <td className="p-2 border border-slate-300">400</td>
  2231. <td className="p-2 border border-slate-300">Invalid action.</td>
  2232. </tr>
  2233. <tr>
  2234. <td className="p-2 border border-slate-300">document_already_finished</td>
  2235. <td className="p-2 border border-slate-300">400</td>
  2236. <td className="p-2 border border-slate-300">The document has been processed. Please refresh the page or go to the document details.</td>
  2237. </tr>
  2238. <tr>
  2239. <td className="p-2 border border-slate-300">document_indexing</td>
  2240. <td className="p-2 border border-slate-300">400</td>
  2241. <td className="p-2 border border-slate-300">The document is being processed and cannot be edited.</td>
  2242. </tr>
  2243. <tr>
  2244. <td className="p-2 border border-slate-300">invalid_metadata</td>
  2245. <td className="p-2 border border-slate-300">400</td>
  2246. <td className="p-2 border border-slate-300">The metadata content is incorrect. Please check and verify.</td>
  2247. </tr>
  2248. </tbody>
  2249. </table>
  2250. <div className="pb-4" />