Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320
  1. {/**
  2. * @typedef Props
  3. * @property {string} apiBaseUrl
  4. */}
  5. import { CodeGroup } from '@/app/components/develop/code.tsx'
  6. import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'
  7. # 知识库 API
  8. <div>
  9. ### 鉴权
  10. Dify Service API 使用 `API-Key` 进行鉴权。
  11. 建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。
  12. 所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示:
  13. <CodeGroup title="Code">
  14. ```javascript
  15. Authorization: Bearer {API_KEY}
  16. ```
  17. </CodeGroup>
  18. </div>
  19. <hr className='ml-0 mr-0' />
  20. <Heading
  21. url='/datasets/{dataset_id}/document/create-by-text'
  22. method='POST'
  23. title='通过文本创建文档'
  24. name='#create-by-text'
  25. />
  26. <Row>
  27. <Col>
  28. 此接口基于已存在知识库,在此知识库的基础上通过文本创建新的文档
  29. ### Path
  30. <Properties>
  31. <Property name='dataset_id' type='string' key='dataset_id'>
  32. 知识库 ID
  33. </Property>
  34. </Properties>
  35. ### Request Body
  36. <Properties>
  37. <Property name='name' type='string' key='name'>
  38. 文档名称
  39. </Property>
  40. <Property name='text' type='string' key='text'>
  41. 文档内容
  42. </Property>
  43. <Property name='indexing_technique' type='string' key='indexing_technique'>
  44. 索引方式
  45. - <code>high_quality</code> 高质量:使用
  46. ding 模型进行嵌入,构建为向量数据库索引
  47. - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
  48. </Property>
  49. <Property name='doc_form' type='string' key='doc_form'>
  50. 索引内容的形式
  51. - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
  52. - <code>hierarchical_model</code> parent-child 模式
  53. - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
  54. </Property>
  55. <Property name='doc_language' type='string' key='doc_language'>
  56. 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
  57. </Property>
  58. <Property name='process_rule' type='object' key='process_rule'>
  59. 处理规则
  60. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  61. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  62. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  63. - <code>id</code> (string) 预处理规则的唯一标识符
  64. - 枚举:
  65. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  66. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  67. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  68. - <code>segmentation</code> (object) 分段规则
  69. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 <code>\n</code>
  70. - <code>max_tokens</code> 最大长度(token)默认为 1000
  71. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  72. - <code>subchunk_segmentation</code> (object) 子分段规则
  73. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  74. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  75. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  76. </Property>
  77. <PropertyInstruction>当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项:</PropertyInstruction>
  78. <Property name='retrieval_model' type='object' key='retrieval_model'>
  79. 检索模式
  80. - <code>search_method</code> (string) 检索方法
  81. - <code>hybrid_search</code> 混合检索
  82. - <code>semantic_search</code> 语义检索
  83. - <code>full_text_search</code> 全文检索
  84. - <code>reranking_enable</code> (bool) 是否开启rerank
  85. - <code>reranking_model</code> (object) Rerank 模型配置
  86. - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
  87. - <code>reranking_model_name</code> (string) Rerank 模型的名称
  88. - <code>top_k</code> (int) 召回条数
  89. - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
  90. - <code>score_threshold</code> (float) 召回分数限制
  91. </Property>
  92. <Property name='embedding_model' type='string' key='embedding_model'>
  93. Embedding 模型名称
  94. </Property>
  95. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  96. Embedding 模型供应商
  97. </Property>
  98. </Properties>
  99. </Col>
  100. <Col sticky>
  101. <CodeGroup
  102. title="Request"
  103. tag="POST"
  104. label="/datasets/{dataset_id}/document/create-by-text"
  105. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
  106. >
  107. ```bash {{ title: 'cURL' }}
  108. curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
  109. --header 'Authorization: Bearer {api_key}' \
  110. --header 'Content-Type: application/json' \
  111. --data-raw '{
  112. "name": "text",
  113. "text": "text",
  114. "indexing_technique": "high_quality",
  115. "process_rule": {
  116. "mode": "automatic"
  117. }
  118. }'
  119. ```
  120. </CodeGroup>
  121. <CodeGroup title="Response">
  122. ```json {{ title: 'Response' }}
  123. {
  124. "document": {
  125. "id": "",
  126. "position": 1,
  127. "data_source_type": "upload_file",
  128. "data_source_info": {
  129. "upload_file_id": ""
  130. },
  131. "dataset_process_rule_id": "",
  132. "name": "text.txt",
  133. "created_from": "api",
  134. "created_by": "",
  135. "created_at": 1695690280,
  136. "tokens": 0,
  137. "indexing_status": "waiting",
  138. "error": null,
  139. "enabled": true,
  140. "disabled_at": null,
  141. "disabled_by": null,
  142. "archived": false,
  143. "display_status": "queuing",
  144. "word_count": 0,
  145. "hit_count": 0,
  146. "doc_form": "text_model"
  147. },
  148. "batch": ""
  149. }
  150. ```
  151. </CodeGroup>
  152. </Col>
  153. </Row>
  154. <hr className='ml-0 mr-0' />
  155. <Heading
  156. url='/datasets/{dataset_id}/document/create-by-file'
  157. method='POST'
  158. title='通过文件创建文档 '
  159. name='#create-by-file'
  160. />
  161. <Row>
  162. <Col>
  163. 此接口基于已存在知识库,在此知识库的基础上通过文件创建新的文档
  164. ### Path
  165. <Properties>
  166. <Property name='dataset_id' type='string' key='dataset_id'>
  167. 知识库 ID
  168. </Property>
  169. </Properties>
  170. ### Request Body
  171. <Properties>
  172. <Property name='data' type='multipart/form-data json string' key='data'>
  173. - <code>original_document_id</code> 源文档 ID(选填)
  174. - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
  175. - 源文档不可为归档的文档
  176. - 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
  177. - 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
  178. - <code>indexing_technique</code> 索引方式
  179. - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
  180. - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
  181. - <code>doc_form</code> 索引内容的形式
  182. - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
  183. - <code>hierarchical_model</code> parent-child 模式
  184. - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
  185. - <code>doc_language</code> 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
  186. - <code>process_rule</code> 处理规则
  187. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  188. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  189. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  190. - <code>id</code> (string) 预处理规则的唯一标识符
  191. - 枚举:
  192. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  193. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  194. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  195. - <code>segmentation</code> (object) 分段规则
  196. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  197. - <code>max_tokens</code> 最大长度(token)默认为 1000
  198. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  199. - <code>subchunk_segmentation</code> (object) 子分段规则
  200. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  201. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  202. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  203. </Property>
  204. <Property name='file' type='multipart/form-data' key='file'>
  205. 需要上传的文件。
  206. </Property>
  207. <PropertyInstruction>当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项:</PropertyInstruction>
  208. <Property name='retrieval_model' type='object' key='retrieval_model'>
  209. 检索模式
  210. - <code>search_method</code> (string) 检索方法
  211. - <code>hybrid_search</code> 混合检索
  212. - <code>semantic_search</code> 语义检索
  213. - <code>full_text_search</code> 全文检索
  214. - <code>reranking_enable</code> (bool) 是否开启rerank
  215. - <code>reranking_model</code> (object) Rerank 模型配置
  216. - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
  217. - <code>reranking_model_name</code> (string) Rerank 模型的名称
  218. - <code>top_k</code> (int) 召回条数
  219. - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
  220. - <code>score_threshold</code> (float) 召回分数限制
  221. </Property>
  222. <Property name='embedding_model' type='string' key='embedding_model'>
  223. Embedding 模型名称
  224. </Property>
  225. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  226. Embedding 模型供应商
  227. </Property>
  228. </Properties>
  229. </Col>
  230. <Col sticky>
  231. <CodeGroup
  232. title="Request"
  233. tag="POST"
  234. label="/datasets/{dataset_id}/document/create-by-file"
  235. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  236. >
  237. ```bash {{ title: 'cURL' }}
  238. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
  239. --header 'Authorization: Bearer {api_key}' \
  240. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  241. --form 'file=@"/path/to/file"'
  242. ```
  243. </CodeGroup>
  244. <CodeGroup title="Response">
  245. ```json {{ title: 'Response' }}
  246. {
  247. "document": {
  248. "id": "",
  249. "position": 1,
  250. "data_source_type": "upload_file",
  251. "data_source_info": {
  252. "upload_file_id": ""
  253. },
  254. "dataset_process_rule_id": "",
  255. "name": "Dify.txt",
  256. "created_from": "api",
  257. "created_by": "",
  258. "created_at": 1695308667,
  259. "tokens": 0,
  260. "indexing_status": "waiting",
  261. "error": null,
  262. "enabled": true,
  263. "disabled_at": null,
  264. "disabled_by": null,
  265. "archived": false,
  266. "display_status": "queuing",
  267. "word_count": 0,
  268. "hit_count": 0,
  269. "doc_form": "text_model"
  270. },
  271. "batch": ""
  272. }
  273. ```
  274. </CodeGroup>
  275. </Col>
  276. </Row>
  277. <hr className='ml-0 mr-0' />
  278. <Heading
  279. url='/datasets'
  280. method='POST'
  281. title='创建空知识库'
  282. name='#create_empty_dataset'
  283. />
  284. <Row>
  285. <Col>
  286. ### Request Body
  287. <Properties>
  288. <Property name='name' type='string' key='name'>
  289. 知识库名称(必填)
  290. </Property>
  291. <Property name='description' type='string' key='description'>
  292. 知识库描述(选填)
  293. </Property>
  294. <Property name='indexing_technique' type='string' key='indexing_technique'>
  295. 索引模式(选填,建议填写)
  296. - <code>high_quality</code> 高质量
  297. - <code>economy</code> 经济
  298. </Property>
  299. <Property name='permission' type='string' key='permission'>
  300. 权限(选填,默认 only_me)
  301. - <code>only_me</code> 仅自己
  302. - <code>all_team_members</code> 所有团队成员
  303. - <code>partial_members</code> 部分团队成员
  304. </Property>
  305. <Property name='provider' type='string' key='provider'>
  306. Provider(选填,默认 vendor)
  307. - <code>vendor</code> 上传文件
  308. - <code>external</code> 外部知识库
  309. </Property>
  310. <Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
  311. 外部知识库 API_ID(选填)
  312. </Property>
  313. <Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
  314. 外部知识库 ID(选填)
  315. </Property>
  316. </Properties>
  317. </Col>
  318. <Col sticky>
  319. <CodeGroup
  320. title="Request"
  321. tag="POST"
  322. label="/datasets"
  323. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
  324. >
  325. ```bash {{ title: 'cURL' }}
  326. curl --location --request POST '${props.apiBaseUrl}/datasets' \
  327. --header 'Authorization: Bearer {api_key}' \
  328. --header 'Content-Type: application/json' \
  329. --data-raw '{
  330. "name": "name",
  331. "permission": "only_me"
  332. }'
  333. ```
  334. </CodeGroup>
  335. <CodeGroup title="Response">
  336. ```json {{ title: 'Response' }}
  337. {
  338. "id": "",
  339. "name": "name",
  340. "description": null,
  341. "provider": "vendor",
  342. "permission": "only_me",
  343. "data_source_type": null,
  344. "indexing_technique": null,
  345. "app_count": 0,
  346. "document_count": 0,
  347. "word_count": 0,
  348. "created_by": "",
  349. "created_at": 1695636173,
  350. "updated_by": "",
  351. "updated_at": 1695636173,
  352. "embedding_model": null,
  353. "embedding_model_provider": null,
  354. "embedding_available": null
  355. }
  356. ```
  357. </CodeGroup>
  358. </Col>
  359. </Row>
  360. <hr className='ml-0 mr-0' />
  361. <Heading
  362. url='/datasets'
  363. method='GET'
  364. title='知识库列表'
  365. name='#dataset_list'
  366. />
  367. <Row>
  368. <Col>
  369. ### Query
  370. <Properties>
  371. <Property name='keyword' type='string' key='keyword'>
  372. 搜索关键词,可选
  373. </Property>
  374. <Property name='tag_ids' type='array[string]' key='tag_ids'>
  375. 标签 ID 列表,可选
  376. </Property>
  377. <Property name='page' type='integer' key='page'>
  378. 页码,可选,默认为 1
  379. </Property>
  380. <Property name='limit' type='string' key='limit'>
  381. 返回条数,可选,默认 20,范围 1-100
  382. </Property>
  383. <Property name='include_all' type='boolean' key='include_all'>
  384. 是否包含所有数据集(仅对所有者生效),可选,默认为 false
  385. </Property>
  386. </Properties>
  387. </Col>
  388. <Col sticky>
  389. <CodeGroup
  390. title="Request"
  391. tag="GET"
  392. label="/datasets"
  393. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  394. >
  395. ```bash {{ title: 'cURL' }}
  396. curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
  397. --header 'Authorization: Bearer {api_key}'
  398. ```
  399. </CodeGroup>
  400. <CodeGroup title="Response">
  401. ```json {{ title: 'Response' }}
  402. {
  403. "data": [
  404. {
  405. "id": "",
  406. "name": "知识库名称",
  407. "description": "描述信息",
  408. "permission": "only_me",
  409. "data_source_type": "upload_file",
  410. "indexing_technique": "",
  411. "app_count": 2,
  412. "document_count": 10,
  413. "word_count": 1200,
  414. "created_by": "",
  415. "created_at": "",
  416. "updated_by": "",
  417. "updated_at": ""
  418. },
  419. ...
  420. ],
  421. "has_more": true,
  422. "limit": 20,
  423. "total": 50,
  424. "page": 1
  425. }
  426. ```
  427. </CodeGroup>
  428. </Col>
  429. </Row>
  430. <hr className='ml-0 mr-0' />
  431. <Heading
  432. url='/datasets/{dataset_id}'
  433. method='GET'
  434. title='查看知识库详情'
  435. name='#view_dataset'
  436. />
  437. <Row>
  438. <Col>
  439. ### Query
  440. <Properties>
  441. <Property name='dataset_id' type='string' key='dataset_id'>
  442. 知识库 ID
  443. </Property>
  444. </Properties>
  445. </Col>
  446. <Col sticky>
  447. <CodeGroup
  448. title="Request"
  449. tag="GET"
  450. label="/datasets/{dataset_id}"
  451. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  452. >
  453. ```bash {{ title: 'cURL' }}
  454. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \
  455. --header 'Authorization: Bearer {api_key}'
  456. ```
  457. </CodeGroup>
  458. <CodeGroup title="Response">
  459. ```json {{ title: 'Response' }}
  460. {
  461. "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f",
  462. "name": "Test Knowledge Base",
  463. "description": "",
  464. "provider": "vendor",
  465. "permission": "only_me",
  466. "data_source_type": null,
  467. "indexing_technique": null,
  468. "app_count": 0,
  469. "document_count": 0,
  470. "word_count": 0,
  471. "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  472. "created_at": 1735620612,
  473. "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  474. "updated_at": 1735620612,
  475. "embedding_model": null,
  476. "embedding_model_provider": null,
  477. "embedding_available": true,
  478. "retrieval_model_dict": {
  479. "search_method": "semantic_search",
  480. "reranking_enable": false,
  481. "reranking_mode": null,
  482. "reranking_model": {
  483. "reranking_provider_name": "",
  484. "reranking_model_name": ""
  485. },
  486. "weights": null,
  487. "top_k": 2,
  488. "score_threshold_enabled": false,
  489. "score_threshold": null
  490. },
  491. "tags": [],
  492. "doc_form": null,
  493. "external_knowledge_info": {
  494. "external_knowledge_id": null,
  495. "external_knowledge_api_id": null,
  496. "external_knowledge_api_name": null,
  497. "external_knowledge_api_endpoint": null
  498. },
  499. "external_retrieval_model": {
  500. "top_k": 2,
  501. "score_threshold": 0.0,
  502. "score_threshold_enabled": null
  503. }
  504. }
  505. ```
  506. </CodeGroup>
  507. </Col>
  508. </Row>
  509. <hr className='ml-0 mr-0' />
  510. <Heading
  511. url='/datasets/{dataset_id}'
  512. method='POST'
  513. title='修改知识库详情'
  514. name='#update_dataset'
  515. />
  516. <Row>
  517. <Col>
  518. ### Query
  519. <Properties>
  520. <Property name='dataset_id' type='string' key='dataset_id'>
  521. 知识库 ID
  522. </Property>
  523. <Property name='indexing_technique' type='string' key='indexing_technique'>
  524. 索引模式(选填,建议填写)
  525. - <code>high_quality</code> 高质量
  526. - <code>economy</code> 经济
  527. </Property>
  528. <Property name='permission' type='string' key='permission'>
  529. 权限(选填,默认 only_me)
  530. - <code>only_me</code> 仅自己
  531. - <code>all_team_members</code> 所有团队成员
  532. - <code>partial_members</code> 部分团队成员
  533. </Property>
  534. <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
  535. 嵌入模型提供商(选填), 必须先在系统内设定好接入的模型,对应的是provider字段
  536. </Property>
  537. <Property name='embedding_model' type='string' key='embedding_model'>
  538. 嵌入模型(选填)
  539. </Property>
  540. <Property name='retrieval_model' type='string' key='retrieval_model'>
  541. 检索模型(选填)
  542. </Property>
  543. <Property name='partial_member_list' type='array' key='partial_member_list'>
  544. 部分团队成员 ID 列表(选填)
  545. </Property>
  546. </Properties>
  547. </Col>
  548. <Col sticky>
  549. <CodeGroup
  550. title="Request"
  551. tag="POST"
  552. label="/datasets/{dataset_id}"
  553. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me", "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}' `}
  554. >
  555. ```bash {{ title: 'cURL' }}
  556. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}' \
  557. --header 'Authorization: Bearer {api_key}' \
  558. --header 'Content-Type: application/json' \
  559. --data-raw '{"name": "Test Knowledge Base", "indexing_technique": "high_quality", "permission": "only_me",\
  560. "embedding_model_provider": "zhipuai", "embedding_model": "embedding-3", "retrieval_model": "", "partial_member_list": []}'
  561. ```
  562. </CodeGroup>
  563. <CodeGroup title="Response">
  564. ```json {{ title: 'Response' }}
  565. {
  566. "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f",
  567. "name": "Test Knowledge Base",
  568. "description": "",
  569. "provider": "vendor",
  570. "permission": "only_me",
  571. "data_source_type": null,
  572. "indexing_technique": "high_quality",
  573. "app_count": 0,
  574. "document_count": 0,
  575. "word_count": 0,
  576. "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  577. "created_at": 1735620612,
  578. "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6",
  579. "updated_at": 1735622679,
  580. "embedding_model": "embedding-3",
  581. "embedding_model_provider": "zhipuai",
  582. "embedding_available": null,
  583. "retrieval_model_dict": {
  584. "search_method": "semantic_search",
  585. "reranking_enable": false,
  586. "reranking_mode": null,
  587. "reranking_model": {
  588. "reranking_provider_name": "",
  589. "reranking_model_name": ""
  590. },
  591. "weights": null,
  592. "top_k": 2,
  593. "score_threshold_enabled": false,
  594. "score_threshold": null
  595. },
  596. "tags": [],
  597. "doc_form": null,
  598. "external_knowledge_info": {
  599. "external_knowledge_id": null,
  600. "external_knowledge_api_id": null,
  601. "external_knowledge_api_name": null,
  602. "external_knowledge_api_endpoint": null
  603. },
  604. "external_retrieval_model": {
  605. "top_k": 2,
  606. "score_threshold": 0.0,
  607. "score_threshold_enabled": null
  608. },
  609. "partial_member_list": []
  610. }
  611. ```
  612. </CodeGroup>
  613. </Col>
  614. </Row>
  615. <hr className='ml-0 mr-0' />
  616. <Heading
  617. url='/datasets/{dataset_id}'
  618. method='DELETE'
  619. title='删除知识库'
  620. name='#delete_dataset'
  621. />
  622. <Row>
  623. <Col>
  624. ### Path
  625. <Properties>
  626. <Property name='dataset_id' type='string' key='dataset_id'>
  627. 知识库 ID
  628. </Property>
  629. </Properties>
  630. </Col>
  631. <Col sticky>
  632. <CodeGroup
  633. title="Request"
  634. tag="DELETE"
  635. label="/datasets/{dataset_id}"
  636. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  637. >
  638. ```bash {{ title: 'cURL' }}
  639. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}' \
  640. --header 'Authorization: Bearer {api_key}'
  641. ```
  642. </CodeGroup>
  643. <CodeGroup title="Response">
  644. ```text {{ title: 'Response' }}
  645. 204 No Content
  646. ```
  647. </CodeGroup>
  648. </Col>
  649. </Row>
  650. <hr className='ml-0 mr-0' />
  651. <Heading
  652. url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
  653. method='POST'
  654. title='通过文本更新文档'
  655. name='#update-by-text'
  656. />
  657. <Row>
  658. <Col>
  659. 此接口基于已存在知识库,在此知识库的基础上通过文本更新文档
  660. ### Path
  661. <Properties>
  662. <Property name='dataset_id' type='string' key='dataset_id'>
  663. 知识库 ID
  664. </Property>
  665. <Property name='document_id' type='string' key='document_id'>
  666. 文档 ID
  667. </Property>
  668. </Properties>
  669. ### Request Body
  670. <Properties>
  671. <Property name='name' type='string' key='name'>
  672. 文档名称(选填)
  673. </Property>
  674. <Property name='text' type='string' key='text'>
  675. 文档内容(选填)
  676. </Property>
  677. <Property name='process_rule' type='object' key='process_rule'>
  678. 处理规则(选填)
  679. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  680. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  681. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  682. - <code>id</code> (string) 预处理规则的唯一标识符
  683. - 枚举:
  684. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  685. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  686. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  687. - <code>segmentation</code> (object) 分段规则
  688. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  689. - <code>max_tokens</code> 最大长度(token)默认为 1000
  690. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  691. - <code>subchunk_segmentation</code> (object) 子分段规则
  692. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  693. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  694. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  695. </Property>
  696. </Properties>
  697. </Col>
  698. <Col sticky>
  699. <CodeGroup
  700. title="Request"
  701. tag="POST"
  702. label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
  703. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
  704. >
  705. ```bash {{ title: 'cURL' }}
  706. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
  707. --header 'Authorization: Bearer {api_key}' \
  708. --header 'Content-Type: application/json' \
  709. --data-raw '{
  710. "name": "name",
  711. "text": "text"
  712. }'
  713. ```
  714. </CodeGroup>
  715. <CodeGroup title="Response">
  716. ```json {{ title: 'Response' }}
  717. {
  718. "document": {
  719. "id": "",
  720. "position": 1,
  721. "data_source_type": "upload_file",
  722. "data_source_info": {
  723. "upload_file_id": ""
  724. },
  725. "dataset_process_rule_id": "",
  726. "name": "name.txt",
  727. "created_from": "api",
  728. "created_by": "",
  729. "created_at": 1695308667,
  730. "tokens": 0,
  731. "indexing_status": "waiting",
  732. "error": null,
  733. "enabled": true,
  734. "disabled_at": null,
  735. "disabled_by": null,
  736. "archived": false,
  737. "display_status": "queuing",
  738. "word_count": 0,
  739. "hit_count": 0,
  740. "doc_form": "text_model"
  741. },
  742. "batch": ""
  743. }
  744. ```
  745. </CodeGroup>
  746. </Col>
  747. </Row>
  748. <hr className='ml-0 mr-0' />
  749. <Heading
  750. url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
  751. method='POST'
  752. title='通过文件更新文档'
  753. name='#update-by-file'
  754. />
  755. <Row>
  756. <Col>
  757. 此接口基于已存在知识库,在此知识库的基础上通过文件更新文档的操作。
  758. ### Path
  759. <Properties>
  760. <Property name='dataset_id' type='string' key='dataset_id'>
  761. 知识库 ID
  762. </Property>
  763. <Property name='document_id' type='string' key='document_id'>
  764. 文档 ID
  765. </Property>
  766. </Properties>
  767. ### Request Body
  768. <Properties>
  769. <Property name='name' type='string' key='name'>
  770. 文档名称(选填)
  771. </Property>
  772. <Property name='file' type='multipart/form-data' key='file'>
  773. 需要上传的文件
  774. </Property>
  775. <Property name='process_rule' type='object' key='process_rule'>
  776. 处理规则(选填)
  777. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  778. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  779. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  780. - <code>id</code> (string) 预处理规则的唯一标识符
  781. - 枚举:
  782. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  783. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  784. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  785. - <code>segmentation</code> (object) 分段规则
  786. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  787. - <code>max_tokens</code> 最大长度(token)默认为 1000
  788. - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
  789. - <code>subchunk_segmentation</code> (object) 子分段规则
  790. - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
  791. - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
  792. - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
  793. </Property>
  794. </Properties>
  795. </Col>
  796. <Col sticky>
  797. <CodeGroup
  798. title="Request"
  799. tag="POST"
  800. label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
  801. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  802. >
  803. ```bash {{ title: 'cURL' }}
  804. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
  805. --header 'Authorization: Bearer {api_key}' \
  806. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  807. --form 'file=@"/path/to/file"'
  808. ```
  809. </CodeGroup>
  810. <CodeGroup title="Response">
  811. ```json {{ title: 'Response' }}
  812. {
  813. "document": {
  814. "id": "",
  815. "position": 1,
  816. "data_source_type": "upload_file",
  817. "data_source_info": {
  818. "upload_file_id": ""
  819. },
  820. "dataset_process_rule_id": "",
  821. "name": "Dify.txt",
  822. "created_from": "api",
  823. "created_by": "",
  824. "created_at": 1695308667,
  825. "tokens": 0,
  826. "indexing_status": "waiting",
  827. "error": null,
  828. "enabled": true,
  829. "disabled_at": null,
  830. "disabled_by": null,
  831. "archived": false,
  832. "display_status": "queuing",
  833. "word_count": 0,
  834. "hit_count": 0,
  835. "doc_form": "text_model"
  836. },
  837. "batch": "20230921150427533684"
  838. }
  839. ```
  840. </CodeGroup>
  841. </Col>
  842. </Row>
  843. <hr className='ml-0 mr-0' />
  844. <Heading
  845. url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
  846. method='GET'
  847. title='获取文档嵌入状态(进度)'
  848. name='#indexing_status'
  849. />
  850. <Row>
  851. <Col>
  852. ### Path
  853. <Properties>
  854. <Property name='dataset_id' type='string' key='dataset_id'>
  855. 知识库 ID
  856. </Property>
  857. <Property name='batch' type='string' key='batch'>
  858. 上传文档的批次号
  859. </Property>
  860. </Properties>
  861. </Col>
  862. <Col sticky>
  863. <CodeGroup
  864. title="Request"
  865. tag="GET"
  866. label="/datasets/{dataset_id}/documents/{batch}/indexing-status"
  867. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
  868. >
  869. ```bash {{ title: 'cURL' }}
  870. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
  871. --header 'Authorization: Bearer {api_key}' \
  872. ```
  873. </CodeGroup>
  874. <CodeGroup title="Response">
  875. ```json {{ title: 'Response' }}
  876. {
  877. "data":[{
  878. "id": "",
  879. "indexing_status": "indexing",
  880. "processing_started_at": 1681623462.0,
  881. "parsing_completed_at": 1681623462.0,
  882. "cleaning_completed_at": 1681623462.0,
  883. "splitting_completed_at": 1681623462.0,
  884. "completed_at": null,
  885. "paused_at": null,
  886. "error": null,
  887. "stopped_at": null,
  888. "completed_segments": 24,
  889. "total_segments": 100
  890. }]
  891. }
  892. ```
  893. </CodeGroup>
  894. </Col>
  895. </Row>
  896. <hr className='ml-0 mr-0' />
  897. <Heading
  898. url='/datasets/{dataset_id}/documents/{document_id}'
  899. method='DELETE'
  900. title='删除文档'
  901. name='#delete_document'
  902. />
  903. <Row>
  904. <Col>
  905. ### Path
  906. <Properties>
  907. <Property name='dataset_id' type='string' key='dataset_id'>
  908. 知识库 ID
  909. </Property>
  910. <Property name='document_id' type='string' key='document_id'>
  911. 文档 ID
  912. </Property>
  913. </Properties>
  914. </Col>
  915. <Col sticky>
  916. <CodeGroup
  917. title="Request"
  918. tag="DELETE"
  919. label="/datasets/{dataset_id}/documents/{document_id}"
  920. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  921. >
  922. ```bash {{ title: 'cURL' }}
  923. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
  924. --header 'Authorization: Bearer {api_key}' \
  925. ```
  926. </CodeGroup>
  927. <CodeGroup title="Response">
  928. ```json {{ title: 'Response' }}
  929. {
  930. "result": "success"
  931. }
  932. ```
  933. </CodeGroup>
  934. </Col>
  935. </Row>
  936. <hr className='ml-0 mr-0' />
  937. <Heading
  938. url='/datasets/{dataset_id}/documents'
  939. method='GET'
  940. title='知识库文档列表'
  941. name='#dataset_document_list'
  942. />
  943. <Row>
  944. <Col>
  945. ### Path
  946. <Properties>
  947. <Property name='dataset_id' type='string' key='dataset_id'>
  948. 知识库 ID
  949. </Property>
  950. </Properties>
  951. ### Query
  952. <Properties>
  953. <Property name='keyword' type='string' key='keyword'>
  954. 搜索关键词,可选,目前仅搜索文档名称
  955. </Property>
  956. <Property name='page' type='string' key='page'>
  957. 页码,可选
  958. </Property>
  959. <Property name='limit' type='string' key='limit'>
  960. 返回条数,可选,默认 20,范围 1-100
  961. </Property>
  962. </Properties>
  963. </Col>
  964. <Col sticky>
  965. <CodeGroup
  966. title="Request"
  967. tag="GET"
  968. label="/datasets/{dataset_id}/documents"
  969. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
  970. >
  971. ```bash {{ title: 'cURL' }}
  972. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
  973. --header 'Authorization: Bearer {api_key}' \
  974. ```
  975. </CodeGroup>
  976. <CodeGroup title="Response">
  977. ```json {{ title: 'Response' }}
  978. {
  979. "data": [
  980. {
  981. "id": "",
  982. "position": 1,
  983. "data_source_type": "file_upload",
  984. "data_source_info": null,
  985. "dataset_process_rule_id": null,
  986. "name": "dify",
  987. "created_from": "",
  988. "created_by": "",
  989. "created_at": 1681623639,
  990. "tokens": 0,
  991. "indexing_status": "waiting",
  992. "error": null,
  993. "enabled": true,
  994. "disabled_at": null,
  995. "disabled_by": null,
  996. "archived": false
  997. },
  998. ],
  999. "has_more": false,
  1000. "limit": 20,
  1001. "total": 9,
  1002. "page": 1
  1003. }
  1004. ```
  1005. </CodeGroup>
  1006. </Col>
  1007. </Row>
  1008. <hr className='ml-0 mr-0' />
  1009. <Heading
  1010. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  1011. method='POST'
  1012. title='新增分段'
  1013. name='#create_new_segment'
  1014. />
  1015. <Row>
  1016. <Col>
  1017. ### Path
  1018. <Properties>
  1019. <Property name='dataset_id' type='string' key='dataset_id'>
  1020. 知识库 ID
  1021. </Property>
  1022. <Property name='document_id' type='string' key='document_id'>
  1023. 文档 ID
  1024. </Property>
  1025. </Properties>
  1026. ### Request Body
  1027. <Properties>
  1028. <Property name='segments' type='object list' key='segments'>
  1029. - <code>content</code> (text) 文本内容/问题内容,必填
  1030. - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
  1031. - <code>keywords</code> (list) 关键字,非必填
  1032. </Property>
  1033. </Properties>
  1034. </Col>
  1035. <Col sticky>
  1036. <CodeGroup
  1037. title="Request"
  1038. tag="POST"
  1039. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  1040. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
  1041. >
  1042. ```bash {{ title: 'cURL' }}
  1043. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  1044. --header 'Authorization: Bearer {api_key}' \
  1045. --header 'Content-Type: application/json' \
  1046. --data-raw '{
  1047. "segments": [
  1048. {
  1049. "content": "1",
  1050. "answer": "1",
  1051. "keywords": ["a"]
  1052. }
  1053. ]
  1054. }'
  1055. ```
  1056. </CodeGroup>
  1057. <CodeGroup title="Response">
  1058. ```json {{ title: 'Response' }}
  1059. {
  1060. "data": [{
  1061. "id": "",
  1062. "position": 1,
  1063. "document_id": "",
  1064. "content": "1",
  1065. "answer": "1",
  1066. "word_count": 25,
  1067. "tokens": 0,
  1068. "keywords": [
  1069. "a"
  1070. ],
  1071. "index_node_id": "",
  1072. "index_node_hash": "",
  1073. "hit_count": 0,
  1074. "enabled": true,
  1075. "disabled_at": null,
  1076. "disabled_by": null,
  1077. "status": "completed",
  1078. "created_by": "",
  1079. "created_at": 1695312007,
  1080. "indexing_at": 1695312007,
  1081. "completed_at": 1695312007,
  1082. "error": null,
  1083. "stopped_at": null
  1084. }],
  1085. "doc_form": "text_model"
  1086. }
  1087. ```
  1088. </CodeGroup>
  1089. </Col>
  1090. </Row>
  1091. <hr className='ml-0 mr-0' />
  1092. <Heading
  1093. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  1094. method='GET'
  1095. title='查询文档分段'
  1096. name='#get_segment'
  1097. />
  1098. <Row>
  1099. <Col>
  1100. ### Path
  1101. <Properties>
  1102. <Property name='dataset_id' type='string' key='dataset_id'>
  1103. 知识库 ID
  1104. </Property>
  1105. <Property name='document_id' type='string' key='document_id'>
  1106. 文档 ID
  1107. </Property>
  1108. </Properties>
  1109. ### Query
  1110. <Properties>
  1111. <Property name='keyword' type='string' key='keyword'>
  1112. 搜索关键词,可选
  1113. </Property>
  1114. <Property name='status' type='string' key='status'>
  1115. 搜索状态,completed
  1116. </Property>
  1117. <Property name='page' type='string' key='page'>
  1118. 页码,可选
  1119. </Property>
  1120. <Property name='limit' type='string' key='limit'>
  1121. 返回条数,可选,默认 20,范围 1-100
  1122. </Property>
  1123. </Properties>
  1124. </Col>
  1125. <Col sticky>
  1126. <CodeGroup
  1127. title="Request"
  1128. tag="GET"
  1129. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  1130. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1131. >
  1132. ```bash {{ title: 'cURL' }}
  1133. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  1134. --header 'Authorization: Bearer {api_key}' \
  1135. --header 'Content-Type: application/json'
  1136. ```
  1137. </CodeGroup>
  1138. <CodeGroup title="Response">
  1139. ```json {{ title: 'Response' }}
  1140. {
  1141. "data": [{
  1142. "id": "",
  1143. "position": 1,
  1144. "document_id": "",
  1145. "content": "1",
  1146. "answer": "1",
  1147. "word_count": 25,
  1148. "tokens": 0,
  1149. "keywords": [
  1150. "a"
  1151. ],
  1152. "index_node_id": "",
  1153. "index_node_hash": "",
  1154. "hit_count": 0,
  1155. "enabled": true,
  1156. "disabled_at": null,
  1157. "disabled_by": null,
  1158. "status": "completed",
  1159. "created_by": "",
  1160. "created_at": 1695312007,
  1161. "indexing_at": 1695312007,
  1162. "completed_at": 1695312007,
  1163. "error": null,
  1164. "stopped_at": null
  1165. }],
  1166. "doc_form": "text_model",
  1167. "has_more": false,
  1168. "limit": 20,
  1169. "total": 9,
  1170. "page": 1
  1171. }
  1172. ```
  1173. </CodeGroup>
  1174. </Col>
  1175. </Row>
  1176. <hr className='ml-0 mr-0' />
  1177. <Heading
  1178. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
  1179. method='DELETE'
  1180. title='删除文档分段'
  1181. name='#delete_segment'
  1182. />
  1183. <Row>
  1184. <Col>
  1185. ### Path
  1186. <Properties>
  1187. <Property name='dataset_id' type='string' key='dataset_id'>
  1188. 知识库 ID
  1189. </Property>
  1190. <Property name='document_id' type='string' key='document_id'>
  1191. 文档 ID
  1192. </Property>
  1193. <Property name='segment_id' type='string' key='segment_id'>
  1194. 文档分段ID
  1195. </Property>
  1196. </Properties>
  1197. </Col>
  1198. <Col sticky>
  1199. <CodeGroup
  1200. title="Request"
  1201. tag="DELETE"
  1202. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
  1203. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1204. >
  1205. ```bash {{ title: 'cURL' }}
  1206. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  1207. --header 'Authorization: Bearer {api_key}' \
  1208. --header 'Content-Type: application/json'
  1209. ```
  1210. </CodeGroup>
  1211. <CodeGroup title="Response">
  1212. ```json {{ title: 'Response' }}
  1213. {
  1214. "result": "success"
  1215. }
  1216. ```
  1217. </CodeGroup>
  1218. </Col>
  1219. </Row>
  1220. <hr className='ml-0 mr-0' />
  1221. <Heading
  1222. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
  1223. method='POST'
  1224. title='更新文档分段'
  1225. name='#update_segment'
  1226. />
  1227. <Row>
  1228. <Col>
  1229. ### POST
  1230. <Properties>
  1231. <Property name='dataset_id' type='string' key='dataset_id'>
  1232. 知识库 ID
  1233. </Property>
  1234. <Property name='document_id' type='string' key='document_id'>
  1235. 文档 ID
  1236. </Property>
  1237. <Property name='segment_id' type='string' key='segment_id'>
  1238. 文档分段ID
  1239. </Property>
  1240. </Properties>
  1241. ### Request Body
  1242. <Properties>
  1243. <Property name='segment' type='object' key='segment'>
  1244. - <code>content</code> (text) 文本内容/问题内容,必填
  1245. - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
  1246. - <code>keywords</code> (list) 关键字,非必填
  1247. - <code>enabled</code> (bool) false/true,非必填
  1248. - <code>regenerate_child_chunks</code> (bool) 是否重新生成子分段,非必填
  1249. </Property>
  1250. </Properties>
  1251. </Col>
  1252. <Col sticky>
  1253. <CodeGroup
  1254. title="Request"
  1255. tag="POST"
  1256. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
  1257. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segment\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
  1258. >
  1259. ```bash {{ title: 'cURL' }}
  1260. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  1261. --header 'Authorization: Bearer {api_key}' \
  1262. --header 'Content-Type: application/json' \
  1263. --data-raw '{
  1264. "segment": {
  1265. "content": "1",
  1266. "answer": "1",
  1267. "keywords": ["a"],
  1268. "enabled": false
  1269. }
  1270. }'
  1271. ```
  1272. </CodeGroup>
  1273. <CodeGroup title="Response">
  1274. ```json {{ title: 'Response' }}
  1275. {
  1276. "data": {
  1277. "id": "",
  1278. "position": 1,
  1279. "document_id": "",
  1280. "content": "1",
  1281. "answer": "1",
  1282. "word_count": 25,
  1283. "tokens": 0,
  1284. "keywords": [
  1285. "a"
  1286. ],
  1287. "index_node_id": "",
  1288. "index_node_hash": "",
  1289. "hit_count": 0,
  1290. "enabled": true,
  1291. "disabled_at": null,
  1292. "disabled_by": null,
  1293. "status": "completed",
  1294. "created_by": "",
  1295. "created_at": 1695312007,
  1296. "indexing_at": 1695312007,
  1297. "completed_at": 1695312007,
  1298. "error": null,
  1299. "stopped_at": null
  1300. },
  1301. "doc_form": "text_model"
  1302. }
  1303. ```
  1304. </CodeGroup>
  1305. </Col>
  1306. </Row>
  1307. <hr className='ml-0 mr-0' />
  1308. <Heading
  1309. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks'
  1310. method='POST'
  1311. title='新增文档子分段'
  1312. name='#create_child_chunk'
  1313. />
  1314. <Row>
  1315. <Col>
  1316. ### Path
  1317. <Properties>
  1318. <Property name='dataset_id' type='string' key='dataset_id'>
  1319. 知识库 ID
  1320. </Property>
  1321. <Property name='document_id' type='string' key='document_id'>
  1322. 文档 ID
  1323. </Property>
  1324. <Property name='segment_id' type='string' key='segment_id'>
  1325. 分段 ID
  1326. </Property>
  1327. </Properties>
  1328. ### Request Body
  1329. <Properties>
  1330. <Property name='content' type='string' key='content'>
  1331. 子分段内容
  1332. </Property>
  1333. </Properties>
  1334. </Col>
  1335. <Col sticky>
  1336. <CodeGroup
  1337. title="Request"
  1338. tag="POST"
  1339. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks"
  1340. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "子分段内容"}'`}
  1341. >
  1342. ```bash {{ title: 'cURL' }}
  1343. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \
  1344. --header 'Authorization: Bearer {api_key}' \
  1345. --header 'Content-Type: application/json' \
  1346. --data-raw '{
  1347. "content": "子分段内容"
  1348. }'
  1349. ```
  1350. </CodeGroup>
  1351. <CodeGroup title="Response">
  1352. ```json {{ title: 'Response' }}
  1353. {
  1354. "data": {
  1355. "id": "",
  1356. "segment_id": "",
  1357. "content": "子分段内容",
  1358. "word_count": 25,
  1359. "tokens": 0,
  1360. "index_node_id": "",
  1361. "index_node_hash": "",
  1362. "status": "completed",
  1363. "created_by": "",
  1364. "created_at": 1695312007,
  1365. "indexing_at": 1695312007,
  1366. "completed_at": 1695312007,
  1367. "error": null,
  1368. "stopped_at": null
  1369. }
  1370. }
  1371. ```
  1372. </CodeGroup>
  1373. </Col>
  1374. </Row>
  1375. <hr className='ml-0 mr-0' />
  1376. <Heading
  1377. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks'
  1378. method='GET'
  1379. title='查询文档子分段'
  1380. name='#get_child_chunks'
  1381. />
  1382. <Row>
  1383. <Col>
  1384. ### Path
  1385. <Properties>
  1386. <Property name='dataset_id' type='string' key='dataset_id'>
  1387. 知识库 ID
  1388. </Property>
  1389. <Property name='document_id' type='string' key='document_id'>
  1390. 文档 ID
  1391. </Property>
  1392. <Property name='segment_id' type='string' key='segment_id'>
  1393. 分段 ID
  1394. </Property>
  1395. </Properties>
  1396. ### Query
  1397. <Properties>
  1398. <Property name='keyword' type='string' key='keyword'>
  1399. 搜索关键词(选填)
  1400. </Property>
  1401. <Property name='page' type='integer' key='page'>
  1402. 页码(选填,默认1)
  1403. </Property>
  1404. <Property name='limit' type='integer' key='limit'>
  1405. 每页数量(选填,默认20,最大100)
  1406. </Property>
  1407. </Properties>
  1408. </Col>
  1409. <Col sticky>
  1410. <CodeGroup
  1411. title="Request"
  1412. tag="GET"
  1413. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks"
  1414. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  1415. >
  1416. ```bash {{ title: 'cURL' }}
  1417. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \
  1418. --header 'Authorization: Bearer {api_key}'
  1419. ```
  1420. </CodeGroup>
  1421. <CodeGroup title="Response">
  1422. ```json {{ title: 'Response' }}
  1423. {
  1424. "data": [{
  1425. "id": "",
  1426. "segment_id": "",
  1427. "content": "子分段内容",
  1428. "word_count": 25,
  1429. "tokens": 0,
  1430. "index_node_id": "",
  1431. "index_node_hash": "",
  1432. "status": "completed",
  1433. "created_by": "",
  1434. "created_at": 1695312007,
  1435. "indexing_at": 1695312007,
  1436. "completed_at": 1695312007,
  1437. "error": null,
  1438. "stopped_at": null
  1439. }],
  1440. "total": 1,
  1441. "total_pages": 1,
  1442. "page": 1,
  1443. "limit": 20
  1444. }
  1445. ```
  1446. </CodeGroup>
  1447. </Col>
  1448. </Row>
  1449. <hr className='ml-0 mr-0' />
  1450. <Heading
  1451. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}'
  1452. method='DELETE'
  1453. title='删除文档子分段'
  1454. name='#delete_child_chunk'
  1455. />
  1456. <Row>
  1457. <Col>
  1458. ### Path
  1459. <Properties>
  1460. <Property name='dataset_id' type='string' key='dataset_id'>
  1461. 知识库 ID
  1462. </Property>
  1463. <Property name='document_id' type='string' key='document_id'>
  1464. 文档 ID
  1465. </Property>
  1466. <Property name='segment_id' type='string' key='segment_id'>
  1467. 分段 ID
  1468. </Property>
  1469. <Property name='child_chunk_id' type='string' key='child_chunk_id'>
  1470. 子分段 ID
  1471. </Property>
  1472. </Properties>
  1473. </Col>
  1474. <Col sticky>
  1475. <CodeGroup
  1476. title="Request"
  1477. tag="DELETE"
  1478. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}"
  1479. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1480. >
  1481. ```bash {{ title: 'cURL' }}
  1482. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \
  1483. --header 'Authorization: Bearer {api_key}'
  1484. ```
  1485. </CodeGroup>
  1486. <CodeGroup title="Response">
  1487. ```json {{ title: 'Response' }}
  1488. {
  1489. "result": "success"
  1490. }
  1491. ```
  1492. </CodeGroup>
  1493. </Col>
  1494. </Row>
  1495. <hr className='ml-0 mr-0' />
  1496. <Row>
  1497. <Col>
  1498. ### 错误信息
  1499. <Properties>
  1500. <Property name='code' type='string' key='code'>
  1501. 返回的错误代码
  1502. </Property>
  1503. </Properties>
  1504. <Properties>
  1505. <Property name='status' type='number' key='status'>
  1506. 返回的错误状态
  1507. </Property>
  1508. </Properties>
  1509. <Properties>
  1510. <Property name='message' type='string' key='message'>
  1511. 返回的错误信息
  1512. </Property>
  1513. </Properties>
  1514. </Col>
  1515. <Col>
  1516. <CodeGroup title="Example">
  1517. ```json {{ title: 'Response' }}
  1518. {
  1519. "code": "no_file_uploaded",
  1520. "message": "Please upload your file.",
  1521. "status": 400
  1522. }
  1523. ```
  1524. </CodeGroup>
  1525. </Col>
  1526. </Row>
  1527. <hr className='ml-0 mr-0' />
  1528. <Heading
  1529. url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}'
  1530. method='PATCH'
  1531. title='更新文档子分段'
  1532. name='#update_child_chunk'
  1533. />
  1534. <Row>
  1535. <Col>
  1536. ### Path
  1537. <Properties>
  1538. <Property name='dataset_id' type='string' key='dataset_id'>
  1539. 知识库 ID
  1540. </Property>
  1541. <Property name='document_id' type='string' key='document_id'>
  1542. 文档 ID
  1543. </Property>
  1544. <Property name='segment_id' type='string' key='segment_id'>
  1545. 分段 ID
  1546. </Property>
  1547. <Property name='child_chunk_id' type='string' key='child_chunk_id'>
  1548. 子分段 ID
  1549. </Property>
  1550. </Properties>
  1551. ### Request Body
  1552. <Properties>
  1553. <Property name='content' type='string' key='content'>
  1554. 子分段内容
  1555. </Property>
  1556. </Properties>
  1557. </Col>
  1558. <Col sticky>
  1559. <CodeGroup
  1560. title="Request"
  1561. tag="PATCH"
  1562. label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}"
  1563. targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "更新的子分段内容"}'`}
  1564. >
  1565. ```bash {{ title: 'cURL' }}
  1566. curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \
  1567. --header 'Authorization: Bearer {api_key}' \
  1568. --header 'Content-Type: application/json' \
  1569. --data-raw '{
  1570. "content": "更新的子分段内容"
  1571. }'
  1572. ```
  1573. </CodeGroup>
  1574. <CodeGroup title="Response">
  1575. ```json {{ title: 'Response' }}
  1576. {
  1577. "data": {
  1578. "id": "",
  1579. "segment_id": "",
  1580. "content": "更新的子分段内容",
  1581. "word_count": 25,
  1582. "tokens": 0,
  1583. "index_node_id": "",
  1584. "index_node_hash": "",
  1585. "status": "completed",
  1586. "created_by": "",
  1587. "created_at": 1695312007,
  1588. "indexing_at": 1695312007,
  1589. "completed_at": 1695312007,
  1590. "error": null,
  1591. "stopped_at": null
  1592. }
  1593. }
  1594. ```
  1595. </CodeGroup>
  1596. </Col>
  1597. </Row>
  1598. <hr className='ml-0 mr-0' />
  1599. <Heading
  1600. url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
  1601. method='GET'
  1602. title='获取上传文件'
  1603. name='#get_upload_file'
  1604. />
  1605. <Row>
  1606. <Col>
  1607. ### Path
  1608. <Properties>
  1609. <Property name='dataset_id' type='string' key='dataset_id'>
  1610. 知识库 ID
  1611. </Property>
  1612. <Property name='document_id' type='string' key='document_id'>
  1613. 文档 ID
  1614. </Property>
  1615. </Properties>
  1616. </Col>
  1617. <Col sticky>
  1618. <CodeGroup
  1619. title="Request"
  1620. tag="GET"
  1621. label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
  1622. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  1623. >
  1624. ```bash {{ title: 'cURL' }}
  1625. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
  1626. --header 'Authorization: Bearer {api_key}' \
  1627. --header 'Content-Type: application/json'
  1628. ```
  1629. </CodeGroup>
  1630. <CodeGroup title="Response">
  1631. ```json {{ title: 'Response' }}
  1632. {
  1633. "id": "file_id",
  1634. "name": "file_name",
  1635. "size": 1024,
  1636. "extension": "txt",
  1637. "url": "preview_url",
  1638. "download_url": "download_url",
  1639. "mime_type": "text/plain",
  1640. "created_by": "user_id",
  1641. "created_at": 1728734540,
  1642. }
  1643. ```
  1644. </CodeGroup>
  1645. </Col>
  1646. </Row>
  1647. <hr className='ml-0 mr-0' />
  1648. <Heading
  1649. url='/datasets/{dataset_id}/retrieve'
  1650. method='POST'
  1651. title='检索知识库'
  1652. name='#dataset_retrieval'
  1653. />
  1654. <Row>
  1655. <Col>
  1656. ### Path
  1657. <Properties>
  1658. <Property name='dataset_id' type='string' key='dataset_id'>
  1659. 知识库 ID
  1660. </Property>
  1661. </Properties>
  1662. ### Request Body
  1663. <Properties>
  1664. <Property name='query' type='string' key='query'>
  1665. 检索关键词
  1666. </Property>
  1667. <Property name='retrieval_model' type='object' key='retrieval_model'>
  1668. 检索参数(选填,如不填,按照默认方式召回)
  1669. - <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
  1670. - <code>keyword_search</code> 关键字检索
  1671. - <code>semantic_search</code> 语义检索
  1672. - <code>full_text_search</code> 全文检索
  1673. - <code>hybrid_search</code> 混合检索
  1674. - <code>reranking_enable</code> (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
  1675. - <code>reranking_mode</code> (object) Rerank 模型配置,非必填,如果启用了 reranking 则传值
  1676. - <code>reranking_provider_name</code> (string) Rerank 模型提供商
  1677. - <code>reranking_model_name</code> (string) Rerank 模型名称
  1678. - <code>weights</code> (float) 混合检索模式下语意检索的权重设置
  1679. - <code>top_k</code> (integer) 返回结果数量,非必填
  1680. - <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
  1681. - <code>score_threshold</code> (float) Score 阈值
  1682. </Property>
  1683. <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
  1684. 未启用字段
  1685. </Property>
  1686. </Properties>
  1687. </Col>
  1688. <Col sticky>
  1689. <CodeGroup
  1690. title="Request"
  1691. tag="POST"
  1692. label="/datasets/{dataset_id}/retrieve"
  1693. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
  1694. "query": "test",
  1695. "retrieval_model": {
  1696. "search_method": "keyword_search",
  1697. "reranking_enable": false,
  1698. "reranking_mode": null,
  1699. "reranking_model": {
  1700. "reranking_provider_name": "",
  1701. "reranking_model_name": ""
  1702. },
  1703. "weights": null,
  1704. "top_k": 1,
  1705. "score_threshold_enabled": false,
  1706. "score_threshold": null
  1707. }
  1708. }'`}
  1709. >
  1710. ```bash {{ title: 'cURL' }}
  1711. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
  1712. --header 'Authorization: Bearer {api_key}' \
  1713. --header 'Content-Type: application/json' \
  1714. --data-raw '{
  1715. "query": "test",
  1716. "retrieval_model": {
  1717. "search_method": "keyword_search",
  1718. "reranking_enable": false,
  1719. "reranking_mode": null,
  1720. "reranking_model": {
  1721. "reranking_provider_name": "",
  1722. "reranking_model_name": ""
  1723. },
  1724. "weights": null,
  1725. "top_k": 2,
  1726. "score_threshold_enabled": false,
  1727. "score_threshold": null
  1728. }
  1729. }'
  1730. ```
  1731. </CodeGroup>
  1732. <CodeGroup title="Response">
  1733. ```json {{ title: 'Response' }}
  1734. {
  1735. "query": {
  1736. "content": "test"
  1737. },
  1738. "records": [
  1739. {
  1740. "segment": {
  1741. "id": "7fa6f24f-8679-48b3-bc9d-bdf28d73f218",
  1742. "position": 1,
  1743. "document_id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
  1744. "content": "Operation guide",
  1745. "answer": null,
  1746. "word_count": 847,
  1747. "tokens": 280,
  1748. "keywords": [
  1749. "install",
  1750. "java",
  1751. "base",
  1752. "scripts",
  1753. "jdk",
  1754. "manual",
  1755. "internal",
  1756. "opens",
  1757. "add",
  1758. "vmoptions"
  1759. ],
  1760. "index_node_id": "39dd8443-d960-45a8-bb46-7275ad7fbc8e",
  1761. "index_node_hash": "0189157697b3c6a418ccf8264a09699f25858975578f3467c76d6bfc94df1d73",
  1762. "hit_count": 0,
  1763. "enabled": true,
  1764. "disabled_at": null,
  1765. "disabled_by": null,
  1766. "status": "completed",
  1767. "created_by": "dbcb1ab5-90c8-41a7-8b78-73b235eb6f6f",
  1768. "created_at": 1728734540,
  1769. "indexing_at": 1728734552,
  1770. "completed_at": 1728734584,
  1771. "error": null,
  1772. "stopped_at": null,
  1773. "document": {
  1774. "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
  1775. "data_source_type": "upload_file",
  1776. "name": "readme.txt",
  1777. }
  1778. },
  1779. "score": 3.730463140527718e-05,
  1780. "tsne_position": null
  1781. }
  1782. ]
  1783. }
  1784. ```
  1785. </CodeGroup>
  1786. </Col>
  1787. </Row>
  1788. <hr className='ml-0 mr-0' />
  1789. <Heading
  1790. url='/datasets/{dataset_id}/metadata'
  1791. method='POST'
  1792. title='新增元数据'
  1793. name='#create_metadata'
  1794. />
  1795. <Row>
  1796. <Col>
  1797. ### Params
  1798. <Properties>
  1799. <Property name='dataset_id' type='string' key='dataset_id'>
  1800. 知识库 ID
  1801. </Property>
  1802. </Properties>
  1803. ### Request Body
  1804. <Properties>
  1805. <Property name='segment' type='object' key='segment'>
  1806. - <code>type</code> (string) 元数据类型,必填
  1807. - <code>name</code> (string) 元数据名称,必填
  1808. </Property>
  1809. </Properties>
  1810. </Col>
  1811. <Col sticky>
  1812. <CodeGroup
  1813. title="Request"
  1814. tag="POST"
  1815. label="/datasets/{dataset_id}/metadata"
  1816. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"type": "string", "name": "test"}'`}
  1817. >
  1818. ```bash {{ title: 'cURL' }}
  1819. ```
  1820. </CodeGroup>
  1821. <CodeGroup title="Response">
  1822. ```json {{ title: 'Response' }}
  1823. {
  1824. "id": "abc",
  1825. "type": "string",
  1826. "name": "test",
  1827. }
  1828. ```
  1829. </CodeGroup>
  1830. </Col>
  1831. </Row>
  1832. <hr className='ml-0 mr-0' />
  1833. <Heading
  1834. url='/datasets/{dataset_id}/metadata/{metadata_id}'
  1835. method='PATCH'
  1836. title='更新元数据'
  1837. name='#update_metadata'
  1838. />
  1839. <Row>
  1840. <Col>
  1841. ### Params
  1842. <Properties>
  1843. <Property name='dataset_id' type='string' key='dataset_id'>
  1844. 知识库 ID
  1845. </Property>
  1846. <Property name='metadata_id' type='string' key='metadata_id'>
  1847. 元数据 ID
  1848. </Property>
  1849. </Properties>
  1850. ### Request Body
  1851. <Properties>
  1852. <Property name='segment' type='object' key='segment'>
  1853. - <code>name</code> (string) 元数据名称,必填
  1854. </Property>
  1855. </Properties>
  1856. </Col>
  1857. <Col sticky>
  1858. <CodeGroup
  1859. title="Request"
  1860. tag="PATCH"
  1861. label="/datasets/{dataset_id}/metadata/{metadata_id}"
  1862. targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"name": "test"}'`}
  1863. >
  1864. ```bash {{ title: 'cURL' }}
  1865. ```
  1866. </CodeGroup>
  1867. <CodeGroup title="Response">
  1868. ```json {{ title: 'Response' }}
  1869. {
  1870. "id": "abc",
  1871. "type": "string",
  1872. "name": "test",
  1873. }
  1874. ```
  1875. </CodeGroup>
  1876. </Col>
  1877. </Row>
  1878. <hr className='ml-0 mr-0' />
  1879. <Heading
  1880. url='/datasets/{dataset_id}/metadata/{metadata_id}'
  1881. method='DELETE'
  1882. title='删除元数据'
  1883. name='#delete_metadata'
  1884. />
  1885. <Row>
  1886. <Col>
  1887. ### Params
  1888. <Properties>
  1889. <Property name='dataset_id' type='string' key='dataset_id'>
  1890. 知识库 ID
  1891. </Property>
  1892. <Property name='metadata_id' type='string' key='metadata_id'>
  1893. 元数据 ID
  1894. </Property>
  1895. </Properties>
  1896. </Col>
  1897. <Col sticky>
  1898. <CodeGroup
  1899. title="Request"
  1900. tag="DELETE"
  1901. label="/datasets/{dataset_id}/metadata/{metadata_id}"
  1902. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1903. >
  1904. ```bash {{ title: 'cURL' }}
  1905. ```
  1906. </CodeGroup>
  1907. </Col>
  1908. </Row>
  1909. <hr className='ml-0 mr-0' />
  1910. <Heading
  1911. url='/datasets/{dataset_id}/metadata/built-in/{action}'
  1912. method='POST'
  1913. title='启用/禁用内置元数据'
  1914. name='#toggle_metadata'
  1915. />
  1916. <Row>
  1917. <Col>
  1918. ### Params
  1919. <Properties>
  1920. <Property name='dataset_id' type='string' key='dataset_id'>
  1921. 知识库 ID
  1922. </Property>
  1923. <Property name='action' type='string' key='action'>
  1924. disable/enable
  1925. </Property>
  1926. </Properties>
  1927. </Col>
  1928. <Col sticky>
  1929. <CodeGroup
  1930. title="Request"
  1931. tag="POST"
  1932. label="/datasets/{dataset_id}/metadata/built-in/{action}"
  1933. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/built-in/{action}' \\\n--header 'Authorization: Bearer {api_key}'`}
  1934. >
  1935. ```bash {{ title: 'cURL' }}
  1936. ```
  1937. </CodeGroup>
  1938. </Col>
  1939. </Row>
  1940. <hr className='ml-0 mr-0' />
  1941. <Heading
  1942. url='/datasets/{dataset_id}/documents/metadata'
  1943. method='POST'
  1944. title='更新文档元数据'
  1945. name='#update_documents_metadata'
  1946. />
  1947. <Row>
  1948. <Col>
  1949. ### Params
  1950. <Properties>
  1951. <Property name='dataset_id' type='string' key='dataset_id'>
  1952. 知识库 ID
  1953. </Property>
  1954. </Properties>
  1955. ### Request Body
  1956. <Properties>
  1957. <Property name='operation_data' type='object list' key='segments'>
  1958. - <code>document_id</code> (string) 文档 ID
  1959. - <code>metadata_list</code> (list) 元数据列表
  1960. - <code>id</code> (string) 元数据 ID
  1961. - <code>type</code> (string) 元数据类型
  1962. - <code>name</code> (string) 元数据名称
  1963. </Property>
  1964. </Properties>
  1965. </Col>
  1966. <Col sticky>
  1967. <CodeGroup
  1968. title="Request"
  1969. tag="POST"
  1970. label="/datasets/{dataset_id}/documents/metadata"
  1971. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"operation_data": [{"document_id": "document_id", "metadata_list": [{"id": "id", "value": "value", "name": "name"}]}]}'`}
  1972. >
  1973. ```bash {{ title: 'cURL' }}
  1974. ```
  1975. </CodeGroup>
  1976. </Col>
  1977. </Row>
  1978. <hr className='ml-0 mr-0' />
  1979. <Heading
  1980. url='/datasets/{dataset_id}/metadata'
  1981. method='GET'
  1982. title='查询知识库元数据列表'
  1983. name='#dataset_metadata_list'
  1984. />
  1985. <Row>
  1986. <Col>
  1987. ### Query
  1988. <Properties>
  1989. <Property name='dataset_id' type='string' key='dataset_id'>
  1990. 知识库 ID
  1991. </Property>
  1992. </Properties>
  1993. </Col>
  1994. <Col sticky>
  1995. <CodeGroup
  1996. title="Request"
  1997. tag="GET"
  1998. label="/datasets/{dataset_id}/metadata"
  1999. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}'`}
  2000. >
  2001. ```bash {{ title: 'cURL' }}
  2002. ```
  2003. </CodeGroup>
  2004. <CodeGroup title="Response">
  2005. ```json {{ title: 'Response' }}
  2006. {
  2007. "doc_metadata": [
  2008. {
  2009. "id": "",
  2010. "name": "name",
  2011. "type": "string",
  2012. "use_count": 0,
  2013. },
  2014. ...
  2015. ],
  2016. "built_in_field_enabled": true
  2017. }
  2018. ```
  2019. </CodeGroup>
  2020. </Col>
  2021. </Row>
  2022. <hr className='ml-0 mr-0' />
  2023. <Heading
  2024. url='/workspaces/current/models/model-types/text-embedding'
  2025. method='GET'
  2026. title='获取嵌入模型列表'
  2027. name='#model_type_list'
  2028. />
  2029. <Row>
  2030. <Col>
  2031. ### Query
  2032. <Properties>
  2033. </Properties>
  2034. </Col>
  2035. <Col sticky>
  2036. <CodeGroup
  2037. title="Request"
  2038. tag="GET"
  2039. label="/datasets/{dataset_id}"
  2040. targetCode={`curl --location --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' `}
  2041. >
  2042. ```bash {{ title: 'cURL' }}
  2043. curl --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \
  2044. --header 'Authorization: Bearer {api_key}' \
  2045. --header 'Content-Type: application/json' \
  2046. ```
  2047. </CodeGroup>
  2048. <CodeGroup title="Response">
  2049. ```json {{ title: 'Response' }}
  2050. {
  2051. "data": [
  2052. {
  2053. "provider": "zhipuai",
  2054. "label": {
  2055. "zh_Hans": "智谱 AI",
  2056. "en_US": "ZHIPU AI"
  2057. },
  2058. "icon_small": {
  2059. "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/zh_Hans",
  2060. "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/en_US"
  2061. },
  2062. "icon_large": {
  2063. "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/zh_Hans",
  2064. "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/en_US"
  2065. },
  2066. "status": "active",
  2067. "models": [
  2068. {
  2069. "model": "embedding-3",
  2070. "label": {
  2071. "zh_Hans": "embedding-3",
  2072. "en_US": "embedding-3"
  2073. },
  2074. "model_type": "text-embedding",
  2075. "features": null,
  2076. "fetch_from": "predefined-model",
  2077. "model_properties": {
  2078. "context_size": 8192
  2079. },
  2080. "deprecated": false,
  2081. "status": "active",
  2082. "load_balancing_enabled": false
  2083. },
  2084. {
  2085. "model": "embedding-2",
  2086. "label": {
  2087. "zh_Hans": "embedding-2",
  2088. "en_US": "embedding-2"
  2089. },
  2090. "model_type": "text-embedding",
  2091. "features": null,
  2092. "fetch_from": "predefined-model",
  2093. "model_properties": {
  2094. "context_size": 8192
  2095. },
  2096. "deprecated": false,
  2097. "status": "active",
  2098. "load_balancing_enabled": false
  2099. },
  2100. {
  2101. "model": "text_embedding",
  2102. "label": {
  2103. "zh_Hans": "text_embedding",
  2104. "en_US": "text_embedding"
  2105. },
  2106. "model_type": "text-embedding",
  2107. "features": null,
  2108. "fetch_from": "predefined-model",
  2109. "model_properties": {
  2110. "context_size": 512
  2111. },
  2112. "deprecated": false,
  2113. "status": "active",
  2114. "load_balancing_enabled": false
  2115. }
  2116. ]
  2117. }
  2118. ]
  2119. }
  2120. ```
  2121. </CodeGroup>
  2122. </Col>
  2123. </Row>
  2124. <hr className='ml-0 mr-0' />
  2125. <Row>
  2126. <Col>
  2127. ### 错误信息
  2128. <Properties>
  2129. <Property name='code' type='string' key='code'>
  2130. 返回的错误代码
  2131. </Property>
  2132. </Properties>
  2133. <Properties>
  2134. <Property name='status' type='number' key='status'>
  2135. 返回的错误状态
  2136. </Property>
  2137. </Properties>
  2138. <Properties>
  2139. <Property name='message' type='string' key='message'>
  2140. 返回的错误信息
  2141. </Property>
  2142. </Properties>
  2143. </Col>
  2144. <Col>
  2145. <CodeGroup title="Example">
  2146. ```json {{ title: 'Response' }}
  2147. {
  2148. "code": "no_file_uploaded",
  2149. "message": "Please upload your file.",
  2150. "status": 400
  2151. }
  2152. ```
  2153. </CodeGroup>
  2154. </Col>
  2155. </Row>
  2156. <table className="max-w-auto border-collapse border border-slate-400" style={{ maxWidth: 'none', width: 'auto' }}>
  2157. <thead style={{ background: '#f9fafc' }}>
  2158. <tr>
  2159. <th className="p-2 border border-slate-300">code</th>
  2160. <th className="p-2 border border-slate-300">status</th>
  2161. <th className="p-2 border border-slate-300">message</th>
  2162. </tr>
  2163. </thead>
  2164. <tbody>
  2165. <tr>
  2166. <td className="p-2 border border-slate-300">no_file_uploaded</td>
  2167. <td className="p-2 border border-slate-300">400</td>
  2168. <td className="p-2 border border-slate-300">Please upload your file.</td>
  2169. </tr>
  2170. <tr>
  2171. <td className="p-2 border border-slate-300">too_many_files</td>
  2172. <td className="p-2 border border-slate-300">400</td>
  2173. <td className="p-2 border border-slate-300">Only one file is allowed.</td>
  2174. </tr>
  2175. <tr>
  2176. <td className="p-2 border border-slate-300">file_too_large</td>
  2177. <td className="p-2 border border-slate-300">413</td>
  2178. <td className="p-2 border border-slate-300">File size exceeded.</td>
  2179. </tr>
  2180. <tr>
  2181. <td className="p-2 border border-slate-300">unsupported_file_type</td>
  2182. <td className="p-2 border border-slate-300">415</td>
  2183. <td className="p-2 border border-slate-300">File type not allowed.</td>
  2184. </tr>
  2185. <tr>
  2186. <td className="p-2 border border-slate-300">high_quality_dataset_only</td>
  2187. <td className="p-2 border border-slate-300">400</td>
  2188. <td className="p-2 border border-slate-300">Current operation only supports 'high-quality' datasets.</td>
  2189. </tr>
  2190. <tr>
  2191. <td className="p-2 border border-slate-300">dataset_not_initialized</td>
  2192. <td className="p-2 border border-slate-300">400</td>
  2193. <td className="p-2 border border-slate-300">The dataset is still being initialized or indexing. Please wait a moment.</td>
  2194. </tr>
  2195. <tr>
  2196. <td className="p-2 border border-slate-300">archived_document_immutable</td>
  2197. <td className="p-2 border border-slate-300">403</td>
  2198. <td className="p-2 border border-slate-300">The archived document is not editable.</td>
  2199. </tr>
  2200. <tr>
  2201. <td className="p-2 border border-slate-300">dataset_name_duplicate</td>
  2202. <td className="p-2 border border-slate-300">409</td>
  2203. <td className="p-2 border border-slate-300">The dataset name already exists. Please modify your dataset name.</td>
  2204. </tr>
  2205. <tr>
  2206. <td className="p-2 border border-slate-300">invalid_action</td>
  2207. <td className="p-2 border border-slate-300">400</td>
  2208. <td className="p-2 border border-slate-300">Invalid action.</td>
  2209. </tr>
  2210. <tr>
  2211. <td className="p-2 border border-slate-300">document_already_finished</td>
  2212. <td className="p-2 border border-slate-300">400</td>
  2213. <td className="p-2 border border-slate-300">The document has been processed. Please refresh the page or go to the document details.</td>
  2214. </tr>
  2215. <tr>
  2216. <td className="p-2 border border-slate-300">document_indexing</td>
  2217. <td className="p-2 border border-slate-300">400</td>
  2218. <td className="p-2 border border-slate-300">The document is being processed and cannot be edited.</td>
  2219. </tr>
  2220. <tr>
  2221. <td className="p-2 border border-slate-300">invalid_metadata</td>
  2222. <td className="p-2 border border-slate-300">400</td>
  2223. <td className="p-2 border border-slate-300">The metadata content is incorrect. Please check and verify.</td>
  2224. </tr>
  2225. </tbody>
  2226. </table>
  2227. <div className="pb-4" />