You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

http_api.md 54KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826
  1. # HTTP API Reference
  2. ## Create dataset
  3. **POST** `/api/v1/dataset`
  4. Creates a dataset.
  5. ### Request
  6. - Method: POST
  7. - URL: `http://{address}/api/v1/dataset`
  8. - Headers:
  9. - `content-Type: application/json`
  10. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  11. - Body:
  12. - `"id"`: `string`
  13. - `"name"`: `string`
  14. - `"avatar"`: `string`
  15. - `"tenant_id"`: `string`
  16. - `"description"`: `string`
  17. - `"language"`: `string`
  18. - `"embedding_model"`: `string`
  19. - `"permission"`: `string`
  20. - `"document_count"`: `integer`
  21. - `"chunk_count"`: `integer`
  22. - `"parse_method"`: `string`
  23. - `"parser_config"`: `Dataset.ParserConfig`
  24. #### Request example
  25. ```bash
  26. # "id": id must not be provided.
  27. # "name": name is required and can't be duplicated.
  28. # "tenant_id": tenant_id must not be provided.
  29. # "embedding_model": embedding_model must not be provided.
  30. # "navie" means general.
  31. curl --request POST \
  32. --url http://{address}/api/v1/dataset \
  33. --header 'Content-Type: application/json' \
  34. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  35. --data '{
  36. "name": "test",
  37. "chunk_count": 0,
  38. "document_count": 0,
  39. "parse_method": "naive"
  40. }'
  41. ```
  42. #### Request parameters
  43. - `"id"`: (*Body parameter*)
  44. The ID of the created dataset used to uniquely identify different datasets.
  45. - If creating a dataset, `id` must not be provided.
  46. - `"name"`: (*Body parameter*)
  47. The name of the dataset, which must adhere to the following requirements:
  48. - Required when creating a dataset and must be unique.
  49. - If updating a dataset, `name` must still be unique.
  50. - `"avatar"`: (*Body parameter*)
  51. Base64 encoding of the avatar.
  52. - `"tenant_id"`: (*Body parameter*)
  53. The ID of the tenant associated with the dataset, used to link it with specific users.
  54. - If creating a dataset, `tenant_id` must not be provided.
  55. - If updating a dataset, `tenant_id` cannot be changed.
  56. - `"description"`: (*Body parameter*)
  57. The description of the dataset.
  58. - `"language"`: (*Body parameter*)
  59. The language setting for the dataset.
  60. - `"embedding_model"`: (*Body parameter*)
  61. Embedding model used in the dataset to generate vector embeddings.
  62. - If creating a dataset, `embedding_model` must not be provided.
  63. - If updating a dataset, `embedding_model` cannot be changed.
  64. - `"permission"`: (*Body parameter*)
  65. Specifies who can manipulate the dataset.
  66. - `"document_count"`: (*Body parameter*)
  67. Document count of the dataset.
  68. - If updating a dataset, `document_count` cannot be changed.
  69. - `"chunk_count"`: (*Body parameter*)
  70. Chunk count of the dataset.
  71. - If updating a dataset, `chunk_count` cannot be changed.
  72. - `"parse_method"`: (*Body parameter*)
  73. Parsing method of the dataset.
  74. - If updating `parse_method`, `chunk_count` must be greater than 0.
  75. - `"parser_config"`: (*Body parameter*)
  76. The configuration settings for the dataset parser.
  77. ### Response
  78. The successful response includes a JSON object like the following:
  79. ```json
  80. {
  81. "code": 0,
  82. "data": {
  83. "avatar": null,
  84. "chunk_count": 0,
  85. "create_date": "Thu, 10 Oct 2024 05:57:37 GMT",
  86. "create_time": 1728539857641,
  87. "created_by": "69736c5e723611efb51b0242ac120007",
  88. "description": null,
  89. "document_count": 0,
  90. "embedding_model": "BAAI/bge-large-zh-v1.5",
  91. "id": "8d73076886cc11ef8c270242ac120006",
  92. "language": "English",
  93. "name": "test_1",
  94. "parse_method": "naive",
  95. "parser_config": {
  96. "pages": [
  97. [
  98. 1,
  99. 1000000
  100. ]
  101. ]
  102. },
  103. "permission": "me",
  104. "similarity_threshold": 0.2,
  105. "status": "1",
  106. "tenant_id": "69736c5e723611efb51b0242ac120007",
  107. "token_num": 0,
  108. "update_date": "Thu, 10 Oct 2024 05:57:37 GMT",
  109. "update_time": 1728539857641,
  110. "vector_similarity_weight": 0.3
  111. }
  112. }
  113. ```
  114. - `"error_code"`: `integer`
  115. `0`: The operation succeeds.
  116. The error response includes a JSON object like the following:
  117. ```json
  118. {
  119. "code": 102,
  120. "message": "Duplicated knowledgebase name in creating dataset."
  121. }
  122. ```
  123. ## Delete datasets
  124. **DELETE** `/api/v1/dataset`
  125. Deletes datasets by ids.
  126. ### Request
  127. - Method: DELETE
  128. - URL: `http://{address}/api/v1/dataset`
  129. - Headers:
  130. - `content-Type: application/json`
  131. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  132. - Body:
  133. - `"ids"`: `List[string]`
  134. #### Request example
  135. ```bash
  136. # Either id or name must be provided, but not both.
  137. curl --request DELETE \
  138. --url http://{address}/api/v1/dataset \
  139. --header 'Content-Type: application/json' \
  140. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  141. --data '{
  142. "ids": ["test_1", "test_2"]
  143. }'
  144. ```
  145. #### Request parameters
  146. - `"ids"`: (*Body parameter*)
  147. Dataset IDs to delete.
  148. ### Response
  149. The successful response includes a JSON object like the following:
  150. ```json
  151. {
  152. "code": 0
  153. }
  154. ```
  155. - `"error_code"`: `integer`
  156. `0`: The operation succeeds.
  157. The error response includes a JSON object like the following:
  158. ```json
  159. {
  160. "code": 102,
  161. "message": "You don't own the dataset."
  162. }
  163. ```
  164. ## Update dataset
  165. **PUT** `/api/v1/dataset/{dataset_id}`
  166. Updates a dataset by its id.
  167. ### Request
  168. - Method: PUT
  169. - URL: `http://{address}/api/v1/dataset/{dataset_id}`
  170. - Headers:
  171. - `content-Type: application/json`
  172. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  173. - Body: (Refer to the "Create Dataset" for the complete structure of the request body.)
  174. #### Request example
  175. ```bash
  176. # "id": id is required.
  177. # "name": If you update name, it can't be duplicated.
  178. # "tenant_id": If you update tenant_id, it can't be changed
  179. # "embedding_model": If you update embedding_model, it can't be changed.
  180. # "chunk_count": If you update chunk_count, it can't be changed.
  181. # "document_count": If you update document_count, it can't be changed.
  182. # "parse_method": If you update parse_method, chunk_count must be 0.
  183. # "navie" means general.
  184. curl --request PUT \
  185. --url http://{address}/api/v1/dataset/{dataset_id} \
  186. --header 'Content-Type: application/json' \
  187. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  188. --data '{
  189. "name": "test",
  190. "tenant_id": "4fb0cd625f9311efba4a0242ac120006",
  191. "embedding_model": "BAAI/bge-zh-v1.5",
  192. "chunk_count": 0,
  193. "document_count": 0,
  194. "parse_method": "navie"
  195. }'
  196. ```
  197. #### Request parameters
  198. (Refer to the "Create Dataset" for the complete structure of the request parameters.)
  199. ### Response
  200. The successful response includes a JSON object like the following:
  201. ```json
  202. {
  203. "code": 0
  204. }
  205. ```
  206. - `"error_code"`: `integer`
  207. `0`: The operation succeeds.
  208. The error response includes a JSON object like the following:
  209. ```json
  210. {
  211. "code": 102,
  212. "message": "Can't change tenant_id."
  213. }
  214. ```
  215. ## List datasets
  216. **GET** `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  217. List all datasets
  218. ### Request
  219. - Method: GET
  220. - URL: `http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  221. - Headers:
  222. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  223. #### Request example
  224. ```bash
  225. # If no page parameter is passed, the default is 1
  226. # If no page_size parameter is passed, the default is 1024
  227. # If no order_by parameter is passed, the default is "create_time"
  228. # If no desc parameter is passed, the default is True
  229. curl --request GET \
  230. --url http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  231. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  232. ```
  233. #### Request parameters
  234. - `path`: (*Path parameter*)
  235. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  236. - `path_size`: (*Path parameter*)
  237. The number of records to retrieve per page. This controls how many records will be included in each page.
  238. - `orderby`: (*Path parameter*)
  239. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  240. - `desc`: (*Path parameter*)
  241. A boolean flag indicating whether the sorting should be in descending order.
  242. - `name`: (*Path parameter*)
  243. Dataset name
  244. - `"id"`: (*Path parameter*)
  245. The ID of the dataset to be retrieved.
  246. - `"name"`: (*Path parameter*)
  247. The name of the dataset to be retrieved.
  248. ### Response
  249. The successful response includes a JSON object like the following:
  250. ```json
  251. {
  252. "code": 0,
  253. "data": [
  254. {
  255. "avatar": "",
  256. "chunk_count": 59,
  257. "create_date": "Sat, 14 Sep 2024 01:12:37 GMT",
  258. "create_time": 1726276357324,
  259. "created_by": "69736c5e723611efb51b0242ac120007",
  260. "description": null,
  261. "document_count": 1,
  262. "embedding_model": "BAAI/bge-large-zh-v1.5",
  263. "id": "6e211ee0723611efa10a0242ac120007",
  264. "language": "English",
  265. "name": "mysql",
  266. "parse_method": "knowledge_graph",
  267. "parser_config": {
  268. "chunk_token_num": 8192,
  269. "delimiter": "\\n!?;。;!?",
  270. "entity_types": [
  271. "organization",
  272. "person",
  273. "location",
  274. "event",
  275. "time"
  276. ]
  277. },
  278. "permission": "me",
  279. "similarity_threshold": 0.2,
  280. "status": "1",
  281. "tenant_id": "69736c5e723611efb51b0242ac120007",
  282. "token_num": 12744,
  283. "update_date": "Thu, 10 Oct 2024 04:07:23 GMT",
  284. "update_time": 1728533243536,
  285. "vector_similarity_weight": 0.3
  286. }
  287. ]
  288. }
  289. ```
  290. The error response includes a JSON object like the following:
  291. ```json
  292. {
  293. "code": 102,
  294. "message": "The dataset doesn't exist"
  295. }
  296. ```
  297. ## Upload files to a dataset
  298. **POST** `/api/v1/dataset/{dataset_id}/document`
  299. Uploads files to a dataset.
  300. ### Request
  301. - Method: POST
  302. - URL: `/api/v1/dataset/{dataset_id}/document`
  303. - Headers:
  304. - 'Content-Type: multipart/form-data'
  305. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  306. - Form:
  307. - 'file=@{FILE_PATH}'
  308. #### Request example
  309. ```shell
  310. curl --request POST \
  311. --url http://{address}/api/v1/dataset/{dataset_id}/document \
  312. --header 'Content-Type: multipart/form-data' \
  313. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  314. --form 'file=@test.txt'
  315. ```
  316. #### Request parameters
  317. - `"dataset_id"`: (*Path parameter*)
  318. The dataset id
  319. - `"file"`: (*Body parameter*)
  320. The file to upload
  321. ### Response
  322. The successful response includes a JSON object like the following:
  323. ```shell
  324. {
  325. "code": 0
  326. }
  327. ```
  328. - `"error_code"`: `integer`
  329. `0`: The operation succeeds.
  330. The error response includes a JSON object like the following:
  331. ```shell
  332. {
  333. "code": 3016,
  334. "message": "Can't connect database"
  335. }
  336. ```
  337. ## Download a file from a dataset
  338. **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}`
  339. Downloads files from a dataset.
  340. ### Request
  341. - Method: GET
  342. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
  343. - Headers:
  344. - `content-Type: application/json`
  345. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  346. - Output:
  347. - '{FILE_NAME}'
  348. #### Request example
  349. ```shell
  350. curl --request GET \
  351. --url http://{address}/api/v1/dataset/{dataset_id}/document/{documents_id} \
  352. --header 'Content-Type: application/json' \
  353. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  354. --output '{FILE_NAME}'
  355. ```
  356. #### Request parameters
  357. - `"dataset_id"`: (*PATH parameter*)
  358. The dataset id
  359. - `"documents_id"`: (*PATH parameter*)
  360. The document id of the file.
  361. ### Response
  362. The successful response includes a JSON object like the following:
  363. ```shell
  364. {
  365. "code": 0
  366. }
  367. ```
  368. - `"error_code"`: `integer`
  369. `0`: The operation succeeds.
  370. The error response includes a JSON object like the following:
  371. ```shell
  372. {
  373. "code": 3016,
  374. "message": "Can't connect database"
  375. }
  376. ```
  377. ## List files of a dataset
  378. **GET** `/api/v1/dataset/{dataset_id}/info?keywords={keyword}&page={page}&page_size={limit}&orderby={orderby}&desc={desc}&name={name}`
  379. List files to a dataset.
  380. ### Request
  381. - Method: GET
  382. - URL: `/api/v1/dataset/{dataset_id}/info?keywords={keyword}&page={page}&page_size={limit}&orderby={orderby}&desc={desc}&name={name`
  383. - Headers:
  384. - `content-Type: application/json`
  385. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  386. #### Request example
  387. ```shell
  388. curl --request GET \
  389. --url http://{address}/api/v1/dataset/{dataset_id}/info?keywords=rag&page=0&page_size=10&orderby=create_time&desc=yes \
  390. --header 'Content-Type: application/json' \
  391. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  392. ```
  393. #### Request parameters
  394. - `"dataset_id"`: (*PATH parameter*)
  395. The dataset id
  396. - `keywords`: (*Filter parameter*)
  397. The keywords matches the search key workds;
  398. - `page`: (*Filter parameter*)
  399. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  400. - `page_size`: (*Filter parameter*)
  401. The number of records to retrieve per page. This controls how many records will be included in each page.
  402. - `orderby`: (*Filter parameter*)
  403. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  404. - `desc`: (*Filter parameter*)
  405. A boolean flag indicating whether the sorting should be in descending order.
  406. - `name`: (*Filter parameter*)
  407. File name.
  408. ### Response
  409. The successful response includes a JSON object like the following:
  410. ```shell
  411. {
  412. "code": 0,
  413. "data": {
  414. "docs": [
  415. {
  416. "chunk_count": 0,
  417. "create_date": "Wed, 18 Sep 2024 08:20:49 GMT",
  418. "create_time": 1726647649379,
  419. "created_by": "134408906b6811efbcd20242ac120005",
  420. "id": "e970a94a759611efae5b0242ac120004",
  421. "knowledgebase_id": "e95f574e759611efbc850242ac120004",
  422. "location": "Test Document222.txt",
  423. "name": "Test Document222.txt",
  424. "parser_config": {
  425. "chunk_token_count": 128,
  426. "delimiter": "\n!?。;!?",
  427. "layout_recognize": true,
  428. "task_page_size": 12
  429. },
  430. "parser_method": "naive",
  431. "process_begin_at": null,
  432. "process_duation": 0.0,
  433. "progress": 0.0,
  434. "progress_msg": "",
  435. "run": "0",
  436. "size": 46,
  437. "source_type": "local",
  438. "status": "1",
  439. "thumbnail": null,
  440. "token_count": 0,
  441. "type": "doc",
  442. "update_date": "Wed, 18 Sep 2024 08:20:49 GMT",
  443. "update_time": 1726647649379
  444. },
  445. {
  446. "chunk_count": 0,
  447. "create_date": "Wed, 18 Sep 2024 08:20:49 GMT",
  448. "create_time": 1726647649340,
  449. "created_by": "134408906b6811efbcd20242ac120005",
  450. "id": "e96aad9c759611ef9ab60242ac120004",
  451. "knowledgebase_id": "e95f574e759611efbc850242ac120004",
  452. "location": "Test Document111.txt",
  453. "name": "Test Document111.txt",
  454. "parser_config": {
  455. "chunk_token_count": 128,
  456. "delimiter": "\n!?。;!?",
  457. "layout_recognize": true,
  458. "task_page_size": 12
  459. },
  460. "parser_method": "naive",
  461. "process_begin_at": null,
  462. "process_duation": 0.0,
  463. "progress": 0.0,
  464. "progress_msg": "",
  465. "run": "0",
  466. "size": 46,
  467. "source_type": "local",
  468. "status": "1",
  469. "thumbnail": null,
  470. "token_count": 0,
  471. "type": "doc",
  472. "update_date": "Wed, 18 Sep 2024 08:20:49 GMT",
  473. "update_time": 1726647649340
  474. }
  475. ],
  476. "total": 2
  477. },
  478. }
  479. ```
  480. - `"error_code"`: `integer`
  481. `0`: The operation succeeds.
  482. The error response includes a JSON object like the following:
  483. ```shell
  484. {
  485. "code": 3016,
  486. "message": "Can't connect database"
  487. }
  488. ```
  489. ## Update a file information in dataset
  490. **PUT** `/api/v1/dataset/{dataset_id}/info/{document_id}`
  491. Update a file in a dataset
  492. ### Request
  493. - Method: PUT
  494. - URL: `/api/v1/dataset/{dataset_id}/document`
  495. - Headers:
  496. - `content-Type: application/json`
  497. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  498. #### Request example
  499. ```shell
  500. curl --request PUT \
  501. --url http://{address}/api/v1/dataset/{dataset_id}/info/{document_id} \
  502. --header 'Content-Type: application/json' \
  503. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  504. --raw '{
  505. "document_id": "f6b170ac758811efa0660242ac120004",
  506. "document_name": "manual.txt",
  507. "thumbnail": null,
  508. "knowledgebase_id": "779333c0758611ef910f0242ac120004",
  509. "parser_method": "manual",
  510. "parser_config": {"chunk_token_count": 128, "delimiter": "\n!?。;!?", "layout_recognize": true, "task_page_size": 12},
  511. "source_type": "local", "type": "doc",
  512. "created_by": "134408906b6811efbcd20242ac120005",
  513. "size": 0, "token_count": 0, "chunk_count": 0,
  514. "progress": 0.0,
  515. "progress_msg": "",
  516. "process_begin_at": null,
  517. "process_duration": 0.0
  518. }'
  519. ```
  520. #### Request parameters
  521. - `"document_id"`: (*Body parameter*)
  522. - `"document_name"`: (*Body parameter*)
  523. ### Response
  524. The successful response includes a JSON object like the following:
  525. ```shell
  526. {
  527. "code": 0
  528. }
  529. ```
  530. The error response includes a JSON object like the following:
  531. ```shell
  532. {
  533. "code": 3016,
  534. "message": "Can't connect database"
  535. }
  536. ```
  537. ## Parse files in dataset
  538. **POST** `/api/v1/dataset/{dataset_id}/chunk`
  539. Parse files into chunks in a dataset
  540. ### Request
  541. - Method: POST
  542. - URL: `/api/v1/dataset/{dataset_id}/chunk`
  543. - Headers:
  544. - `content-Type: application/json`
  545. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  546. #### Request example
  547. ```shell
  548. curl --request POST \
  549. --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
  550. --header 'Content-Type: application/json' \
  551. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  552. --raw '{
  553. "documents": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
  554. }'
  555. ```
  556. #### Request parameters
  557. - `"dataset_id"`: (*Path parameter*)
  558. - `"documents"`: (*Body parameter*)
  559. - Documents to parse
  560. ### Response
  561. The successful response includes a JSON object like the following:
  562. ```shell
  563. {
  564. "code": 0
  565. }
  566. ```
  567. The error response includes a JSON object like the following:
  568. ```shell
  569. {
  570. "code": 3016,
  571. "message": "Can't connect database"
  572. }
  573. ```
  574. ## Stop file parsing
  575. **DELETE** `/api/v1/dataset/{dataset_id}/chunk`
  576. Stop file parsing
  577. ### Request
  578. - Method: POST
  579. - URL: `/api/v1/dataset/{dataset_id}/chunk`
  580. - Headers:
  581. - `content-Type: application/json`
  582. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  583. #### Request example
  584. ```shell
  585. curl --request DELETE \
  586. --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
  587. --header 'Content-Type: application/json' \
  588. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  589. --raw '{
  590. "documents": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
  591. }'
  592. ```
  593. #### Request parameters
  594. - `"dataset_id"`: (*Path parameter*)
  595. - `"documents"`: (*Body parameter*)
  596. - Documents to stop parsing
  597. ### Response
  598. The successful response includes a JSON object like the following:
  599. ```shell
  600. {
  601. "code": 0
  602. }
  603. ```
  604. The error response includes a JSON object like the following:
  605. ```shell
  606. {
  607. "code": 3016,
  608. "message": "Can't connect database"
  609. }
  610. ```
  611. ## Get document chunk list
  612. **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  613. Get document chunk list
  614. ### Request
  615. - Method: GET
  616. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  617. - Headers:
  618. - `content-Type: application/json`
  619. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  620. #### Request example
  621. ```shell
  622. curl --request GET \
  623. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  624. --header 'Content-Type: application/json' \
  625. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  626. ```
  627. #### Request parameters
  628. - `"dataset_id"`: (*Path parameter*)
  629. - `"document_id"`: (*Path parameter*)
  630. ### Response
  631. The successful response includes a JSON object like the following:
  632. ```shell
  633. {
  634. "code": 0
  635. "data": {
  636. "chunks": [
  637. {
  638. "available_int": 1,
  639. "content": "<em>advantag</em>of ragflow increas accuraci and relev:by incorpor retriev inform , ragflow can gener respons that are more accur",
  640. "document_keyword": "ragflow_test.txt",
  641. "document_id": "77df9ef4759a11ef8bdd0242ac120004",
  642. "id": "4ab8c77cfac1a829c8d5ed022a0808c0",
  643. "image_id": "",
  644. "important_keywords": [],
  645. "positions": [
  646. ""
  647. ]
  648. }
  649. ],
  650. "doc": {
  651. "chunk_count": 5,
  652. "create_date": "Wed, 18 Sep 2024 08:46:16 GMT",
  653. "create_time": 1726649176833,
  654. "created_by": "134408906b6811efbcd20242ac120005",
  655. "id": "77df9ef4759a11ef8bdd0242ac120004",
  656. "knowledgebase_id": "77d9d24e759a11ef880c0242ac120004",
  657. "location": "ragflow_test.txt",
  658. "name": "ragflow_test.txt",
  659. "parser_config": {
  660. "chunk_token_count": 128,
  661. "delimiter": "\n!?。;!?",
  662. "layout_recognize": true,
  663. "task_page_size": 12
  664. },
  665. "parser_method": "naive",
  666. "process_begin_at": "Wed, 18 Sep 2024 08:46:16 GMT",
  667. "process_duation": 7.3213,
  668. "progress": 1.0,
  669. "progress_msg": "\nTask has been received.\nStart to parse.\nFinish parsing.\nFinished slicing files(5). Start to embedding the content.\nFinished embedding(6.16)! Start to build index!\nDone!",
  670. "run": "3",
  671. "size": 4209,
  672. "source_type": "local",
  673. "status": "1",
  674. "thumbnail": null,
  675. "token_count": 746,
  676. "type": "doc",
  677. "update_date": "Wed, 18 Sep 2024 08:46:23 GMT",
  678. "update_time": 1726649183321
  679. },
  680. "total": 1
  681. },
  682. }
  683. ```
  684. The error response includes a JSON object like the following:
  685. ```shell
  686. {
  687. "code": 3016,
  688. "message": "Can't connect database"
  689. }
  690. ```
  691. ## Delete document chunks
  692. **DELETE** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  693. Delete document chunks
  694. ### Request
  695. - Method: DELETE
  696. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  697. - Headers:
  698. - `content-Type: application/json`
  699. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  700. #### Request example
  701. ```shell
  702. curl --request DELETE \
  703. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  704. --header 'Content-Type: application/json' \
  705. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  706. --raw '{
  707. "chunks": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
  708. }'
  709. ```
  710. ## Update document chunk
  711. **PUT** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  712. Update document chunk
  713. ### Request
  714. - Method: PUT
  715. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  716. - Headers:
  717. - `content-Type: application/json`
  718. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  719. #### Request example
  720. ```shell
  721. curl --request PUT \
  722. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  723. --header 'Content-Type: application/json' \
  724. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  725. --raw '{
  726. "chunk_id": "d87fb0b7212c15c18d0831677552d7de",
  727. "knowledgebase_id": null,
  728. "name": "",
  729. "content": "ragflow123",
  730. "important_keywords": [],
  731. "document_id": "e6bbba92759511efaa900242ac120004",
  732. "status": "1"
  733. }'
  734. ```
  735. ## Insert document chunks
  736. **POST** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  737. Insert document chunks
  738. ### Request
  739. - Method: POST
  740. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  741. - Headers:
  742. - `content-Type: application/json`
  743. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  744. #### Request example
  745. ```shell
  746. curl --request POST \
  747. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  748. --header 'Content-Type: application/json' \
  749. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  750. --raw '{
  751. "document_id": "97ad64b6759811ef9fc30242ac120004",
  752. "content": ["ragflow content", "ragflow content"]
  753. }'
  754. ```
  755. ## Dataset retrieval test
  756. **GET** `/api/v1/dataset/{dataset_id}/retrieval`
  757. Retrieval test of a dataset
  758. ### Request
  759. - Method: GET
  760. - URL: `/api/v1/dataset/{dataset_id}/retrieval`
  761. - Headers:
  762. - `content-Type: application/json`
  763. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  764. #### Request example
  765. ```shell
  766. curl --request GET \
  767. --url http://{address}/api/v1/dataset/{dataset_id}/retrieval \
  768. --header 'Content-Type: application/json' \
  769. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  770. --raw '{
  771. "query_text": "This is a cat."
  772. }'
  773. ```
  774. ## Create chat
  775. **POST** `/api/v1/chat`
  776. Create a chat
  777. ### Request
  778. - Method: POST
  779. - URL: `http://{address}/api/v1/chat`
  780. - Headers:
  781. - `content-Type: application/json`
  782. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  783. - Body:
  784. - `"name"`: `string`
  785. - `"avatar"`: `string`
  786. - `"knowledgebases"`: `List[DataSet]`
  787. - `"id"`: `string`
  788. - `"llm"`: `LLM`
  789. - `"prompt"`: `Prompt`
  790. #### Request example
  791. ```shell
  792. curl --request POST \
  793. --url http://{address}/api/v1/chat \
  794. --header 'Content-Type: application/json' \
  795. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  796. --data-binary '{
  797. "knowledgebases": [
  798. {
  799. "avatar": null,
  800. "chunk_count": 0,
  801. "description": null,
  802. "document_count": 0,
  803. "embedding_model": "",
  804. "id": "0b2cbc8c877f11ef89070242ac120005",
  805. "language": "English",
  806. "name": "Test_assistant",
  807. "parse_method": "naive",
  808. "parser_config": {
  809. "pages": [
  810. [
  811. 1,
  812. 1000000
  813. ]
  814. ]
  815. },
  816. "permission": "me",
  817. "tenant_id": "4fb0cd625f9311efba4a0242ac120006"
  818. }
  819. ],
  820. "name":"new_chat_1"
  821. }'
  822. ```
  823. #### Request parameters
  824. - `"name"`: (*Body parameter*)
  825. The name of the created chat.
  826. - `"assistant"`
  827. - `"avatar"`: (*Body parameter*)
  828. The icon of the created chat.
  829. - `"path"`
  830. - `"knowledgebases"`: (*Body parameter*)
  831. Select knowledgebases associated.
  832. - `["kb1"]`
  833. - `"id"`: (*Body parameter*)
  834. The id of the created chat.
  835. - `""`
  836. - `"llm"`: (*Body parameter*)
  837. The LLM of the created chat.
  838. - If the value is `None`, a dictionary with default values will be generated.
  839. - `"prompt"`: (*Body parameter*)
  840. The prompt of the created chat.
  841. - If the value is `None`, a dictionary with default values will be generated.
  842. ---
  843. ##### Chat.LLM parameters:
  844. - `"model_name"`: (*Body parameter*)
  845. Large language chat model.
  846. - If it is `None`, it will return the user's default model.
  847. - `"temperature"`: (*Body parameter*)
  848. Controls the randomness of predictions by the model. A lower temperature makes the model more confident, while a higher temperature makes it more creative and diverse.
  849. - `0.1`
  850. - `"top_p"`: (*Body parameter*)
  851. Also known as "nucleus sampling," it focuses on the most likely words, cutting off the less probable ones.
  852. - `0.3`
  853. - `"presence_penalty"`: (*Body parameter*)
  854. Discourages the model from repeating the same information by penalizing repeated content.
  855. - `0.4`
  856. - `"frequency_penalty"`: (*Body parameter*)
  857. Reduces the model’s tendency to repeat words frequently.
  858. - `0.7`
  859. - `"max_tokens"`: (*Body parameter*)
  860. Sets the maximum length of the model’s output, measured in tokens (words or pieces of words).
  861. - `512`
  862. ---
  863. ##### Chat.Prompt parameters:
  864. - `"similarity_threshold"`: (*Body parameter*)
  865. Filters out chunks with similarity below this threshold.
  866. - `0.2`
  867. - `"keywords_similarity_weight"`: (*Body parameter*)
  868. Weighted keywords similarity and vector cosine similarity; the sum of weights is 1.0.
  869. - `0.7`
  870. - `"top_n"`: (*Body parameter*)
  871. Only the top N chunks above the similarity threshold will be fed to LLMs.
  872. - `8`
  873. - `"variables"`: (*Body parameter*)
  874. Variables help with different chat strategies by filling in the 'System' part of the prompt.
  875. - `[{"key": "knowledge", "optional": True}]`
  876. - `"rerank_model"`: (*Body parameter*)
  877. If empty, it uses vector cosine similarity; otherwise, it uses rerank score.
  878. - `""`
  879. - `"empty_response"`: (*Body parameter*)
  880. If nothing is retrieved, this will be used as the response. Leave blank if LLM should provide its own opinion.
  881. - `None`
  882. - `"opener"`: (*Body parameter*)
  883. The welcome message for clients.
  884. - `"Hi! I'm your assistant, what can I do for you?"`
  885. - `"show_quote"`: (*Body parameter*)
  886. Indicates whether the source of the original text should be displayed.
  887. - `True`
  888. - `"prompt"`: (*Body parameter*)
  889. Instructions for LLM to follow when answering questions, such as character design or answer length.
  890. - `"You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence 'The answer you are looking for is not found in the knowledge base!' Answers need to consider chat history. Here is the knowledge base: {knowledge} The above is the knowledge base."`
  891. ### Response
  892. Success:
  893. ```json
  894. {
  895. "code": 0,
  896. "data": {
  897. "avatar": "",
  898. "create_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  899. "create_time": 1728617004635,
  900. "description": "A helpful Assistant",
  901. "do_refer": "1",
  902. "id": "2ca4b22e878011ef88fe0242ac120005",
  903. "knowledgebases": [
  904. {
  905. "avatar": null,
  906. "chunk_count": 0,
  907. "description": null,
  908. "document_count": 0,
  909. "embedding_model": "",
  910. "id": "0b2cbc8c877f11ef89070242ac120005",
  911. "language": "English",
  912. "name": "Test_assistant",
  913. "parse_method": "naive",
  914. "parser_config": {
  915. "pages": [
  916. [
  917. 1,
  918. 1000000
  919. ]
  920. ]
  921. },
  922. "permission": "me",
  923. "tenant_id": "4fb0cd625f9311efba4a0242ac120006"
  924. }
  925. ],
  926. "language": "English",
  927. "llm": {
  928. "frequency_penalty": 0.7,
  929. "max_tokens": 512,
  930. "model_name": "deepseek-chat___OpenAI-API@OpenAI-API-Compatible",
  931. "presence_penalty": 0.4,
  932. "temperature": 0.1,
  933. "top_p": 0.3
  934. },
  935. "name": "new_chat_1",
  936. "prompt": {
  937. "empty_response": "Sorry! 知识库中未找到相关内容!",
  938. "keywords_similarity_weight": 0.3,
  939. "opener": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  940. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n {knowledge}\n 以上是知识库。",
  941. "rerank_model": "",
  942. "similarity_threshold": 0.2,
  943. "top_n": 6,
  944. "variables": [
  945. {
  946. "key": "knowledge",
  947. "optional": false
  948. }
  949. ]
  950. },
  951. "prompt_type": "simple",
  952. "status": "1",
  953. "tenant_id": "69736c5e723611efb51b0242ac120007",
  954. "top_k": 1024,
  955. "update_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  956. "update_time": 1728617004635
  957. }
  958. }
  959. ```
  960. Error:
  961. ```json
  962. {
  963. "code": 102,
  964. "message": "Duplicated chat name in creating dataset."
  965. }
  966. ```
  967. ## Update chat
  968. **PUT** `/api/v1/chat/{chat_id}`
  969. Update a chat
  970. ### Request
  971. - Method: PUT
  972. - URL: `http://{address}/api/v1/chat/{chat_id}`
  973. - Headers:
  974. - `content-Type: application/json`
  975. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  976. - Body: (Refer to the "Create chat" for the complete structure of the request body.)
  977. #### Request example
  978. ```bash
  979. curl --request PUT \
  980. --url http://{address}/api/v1/chat/{chat_id} \
  981. --header 'Content-Type: application/json' \
  982. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  983. --data '{
  984. "name":"Test"
  985. }'
  986. ```
  987. #### Parameters
  988. (Refer to the "Create chat" for the complete structure of the request parameters.)
  989. ### Response
  990. Success
  991. ```json
  992. {
  993. "code": 0
  994. }
  995. ```
  996. Error
  997. ```json
  998. {
  999. "code": 102,
  1000. "message": "Duplicated chat name in updating dataset."
  1001. }
  1002. ```
  1003. ## Delete chats
  1004. **DELETE** `/api/v1/chat`
  1005. Delete chats
  1006. ### Request
  1007. - Method: DELETE
  1008. - URL: `http://{address}/api/v1/chat`
  1009. - Headers:
  1010. - `content-Type: application/json`
  1011. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1012. - Body:
  1013. - `ids`: List[string]
  1014. #### Request example
  1015. ```bash
  1016. # Either id or name must be provided, but not both.
  1017. curl --request DELETE \
  1018. --url http://{address}/api/v1/chat \
  1019. --header 'Content-Type: application/json' \
  1020. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1021. --data '{
  1022. "ids": ["test_1", "test_2"]
  1023. }'
  1024. }'
  1025. ```
  1026. #### Request parameters:
  1027. - `"ids"`: (*Body parameter*)
  1028. IDs of the chats to be deleted.
  1029. - `None`
  1030. ### Response
  1031. Success
  1032. ```json
  1033. {
  1034. "code": 0
  1035. }
  1036. ```
  1037. Error
  1038. ```json
  1039. {
  1040. "code": 102,
  1041. "message": "ids are required"
  1042. }
  1043. ```
  1044. ## List chats
  1045. **GET** `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1046. List chats based on filter criteria.
  1047. ### Request
  1048. - Method: GET
  1049. - URL: `http://{address}/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1050. - Headers:
  1051. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1052. #### Request example
  1053. ```bash
  1054. curl --request GET \
  1055. --url http://{address}/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  1056. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1057. ```
  1058. #### Request parameters
  1059. - `"page"`: (*Path parameter*)
  1060. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  1061. - `1`
  1062. - `"page_size"`: (*Path parameter*)
  1063. The number of records to retrieve per page. This controls how many records will be included in each page.
  1064. - `1024`
  1065. - `"orderby"`: (*Path parameter*)
  1066. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  1067. - `"create_time"`
  1068. - `"desc"`: (*Path parameter*)
  1069. A boolean flag indicating whether the sorting should be in descending order.
  1070. - `True`
  1071. - `"id"`: (*Path parameter*)
  1072. The ID of the chat to be retrieved.
  1073. - `None`
  1074. - `"name"`: (*Path parameter*)
  1075. The name of the chat to be retrieved.
  1076. - `None`
  1077. ### Response
  1078. Success
  1079. ```json
  1080. {
  1081. "code": 0,
  1082. "data": [
  1083. {
  1084. "avatar": "",
  1085. "create_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  1086. "create_time": 1728617004635,
  1087. "description": "A helpful Assistant",
  1088. "do_refer": "1",
  1089. "id": "2ca4b22e878011ef88fe0242ac120005",
  1090. "knowledgebases": [
  1091. {
  1092. "avatar": "",
  1093. "chunk_num": 0,
  1094. "create_date": "Fri, 11 Oct 2024 03:15:18 GMT",
  1095. "create_time": 1728616518986,
  1096. "created_by": "69736c5e723611efb51b0242ac120007",
  1097. "description": "",
  1098. "doc_num": 0,
  1099. "embd_id": "BAAI/bge-large-zh-v1.5",
  1100. "id": "0b2cbc8c877f11ef89070242ac120005",
  1101. "language": "English",
  1102. "name": "test_delete_chat",
  1103. "parser_config": {
  1104. "chunk_token_count": 128,
  1105. "delimiter": "\n!?。;!?",
  1106. "layout_recognize": true,
  1107. "task_page_size": 12
  1108. },
  1109. "parser_id": "naive",
  1110. "permission": "me",
  1111. "similarity_threshold": 0.2,
  1112. "status": "1",
  1113. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1114. "token_num": 0,
  1115. "update_date": "Fri, 11 Oct 2024 04:01:31 GMT",
  1116. "update_time": 1728619291228,
  1117. "vector_similarity_weight": 0.3
  1118. }
  1119. ],
  1120. "language": "English",
  1121. "llm": {
  1122. "frequency_penalty": 0.7,
  1123. "max_tokens": 512,
  1124. "model_name": "deepseek-chat___OpenAI-API@OpenAI-API-Compatible",
  1125. "presence_penalty": 0.4,
  1126. "temperature": 0.1,
  1127. "top_p": 0.3
  1128. },
  1129. "name": "Test",
  1130. "prompt": {
  1131. "empty_response": "Sorry! 知识库中未找到相关内容!",
  1132. "keywords_similarity_weight": 0.3,
  1133. "opener": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  1134. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n {knowledge}\n 以上是知识库。",
  1135. "rerank_model": "",
  1136. "similarity_threshold": 0.2,
  1137. "top_n": 6,
  1138. "variables": [
  1139. {
  1140. "key": "knowledge",
  1141. "optional": false
  1142. }
  1143. ]
  1144. },
  1145. "prompt_type": "simple",
  1146. "status": "1",
  1147. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1148. "top_k": 1024,
  1149. "update_date": "Fri, 11 Oct 2024 03:47:58 GMT",
  1150. "update_time": 1728618478392
  1151. }
  1152. ]
  1153. }
  1154. ```
  1155. Error
  1156. ```json
  1157. {
  1158. "code": 102,
  1159. "message": "The chat doesn't exist"
  1160. }
  1161. ```
  1162. ## Create a chat session
  1163. **POST** `/api/v1/chat/{chat_id}/session`
  1164. Create a chat session
  1165. ### Request
  1166. - Method: POST
  1167. - URL: `http://{address}/api/v1/chat/{chat_id}/session`
  1168. - Headers:
  1169. - `content-Type: application/json`
  1170. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1171. - Body:
  1172. - name: `string`
  1173. #### Request example
  1174. ```bash
  1175. curl --request POST \
  1176. --url http://{address}/api/v1/chat/{chat_id}/session \
  1177. --header 'Content-Type: application/json' \
  1178. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1179. --data '{
  1180. "name": "new session"
  1181. }'
  1182. ```
  1183. #### Request parameters
  1184. - `"id"`: (*Body parameter*)
  1185. The ID of the created session used to identify different sessions.
  1186. - `None`
  1187. - `id` cannot be provided when creating.
  1188. - `"name"`: (*Body parameter*)
  1189. The name of the created session.
  1190. - `"New session"`
  1191. - `"messages"`: (*Body parameter*)
  1192. The messages of the created session.
  1193. - `[{"role": "assistant", "content": "Hi! I am your assistant, can I help you?"}]`
  1194. - `messages` cannot be provided when creating.
  1195. - `"chat_id"`: (*Path parameter*)
  1196. The ID of the associated chat.
  1197. - `""`
  1198. - `chat_id` cannot be changed.
  1199. ### Response
  1200. Success
  1201. ```json
  1202. {
  1203. "code": 0,
  1204. "data": {
  1205. "chat_id": "2ca4b22e878011ef88fe0242ac120005",
  1206. "create_date": "Fri, 11 Oct 2024 08:46:14 GMT",
  1207. "create_time": 1728636374571,
  1208. "id": "4606b4ec87ad11efbc4f0242ac120006",
  1209. "messages": [
  1210. {
  1211. "content": "Hi! I am your assistant,can I help you?",
  1212. "role": "assistant"
  1213. }
  1214. ],
  1215. "name": "new session",
  1216. "update_date": "Fri, 11 Oct 2024 08:46:14 GMT",
  1217. "update_time": 1728636374571
  1218. }
  1219. }
  1220. ```
  1221. Error
  1222. ```json
  1223. {
  1224. "code": 102,
  1225. "message": "Name can not be empty."
  1226. }
  1227. ```
  1228. ## List the sessions of a chat
  1229. **GET** `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1230. List all sessions under the chat based on the filtering criteria.
  1231. ### Request
  1232. - Method: GET
  1233. - URL: `http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1234. - Headers:
  1235. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1236. #### Request example
  1237. ```bash
  1238. curl --request GET \
  1239. --url http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  1240. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1241. ```
  1242. #### Request Parameters
  1243. - `"page"`: (*Path parameter*)
  1244. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  1245. - `1`
  1246. - `"page_size"`: (*Path parameter*)
  1247. The number of records to retrieve per page. This controls how many records will be included in each page.
  1248. - `1024`
  1249. - `"orderby"`: (*Path parameter*)
  1250. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  1251. - `"create_time"`
  1252. - `"desc"`: (*Path parameter*)
  1253. A boolean flag indicating whether the sorting should be in descending order.
  1254. - `True`
  1255. - `"id"`: (*Path parameter*)
  1256. The ID of the session to be retrieved.
  1257. - `None`
  1258. - `"name"`: (*Path parameter*)
  1259. The name of the session to be retrieved.
  1260. - `None`
  1261. ### Response
  1262. Success
  1263. ```json
  1264. {
  1265. "code": 0,
  1266. "data": [
  1267. {
  1268. "chat": "2ca4b22e878011ef88fe0242ac120005",
  1269. "create_date": "Fri, 11 Oct 2024 08:46:43 GMT",
  1270. "create_time": 1728636403974,
  1271. "id": "578d541e87ad11ef96b90242ac120006",
  1272. "messages": [
  1273. {
  1274. "content": "Hi! I am your assistant,can I help you?",
  1275. "role": "assistant"
  1276. }
  1277. ],
  1278. "name": "new session",
  1279. "update_date": "Fri, 11 Oct 2024 08:46:43 GMT",
  1280. "update_time": 1728636403974
  1281. }
  1282. ]
  1283. }
  1284. ```
  1285. Error
  1286. ```json
  1287. {
  1288. "code": 102,
  1289. "message": "The session doesn't exist"
  1290. }
  1291. ```
  1292. ## Delete chat sessions
  1293. **DELETE** `/api/v1/chat/{chat_id}/session`
  1294. Delete chat sessions
  1295. ### Request
  1296. - Method: DELETE
  1297. - URL: `http://{address}/api/v1/chat/{chat_id}/session`
  1298. - Headers:
  1299. - `content-Type: application/json`
  1300. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1301. - Body:
  1302. - `ids`: List[string]
  1303. #### Request example
  1304. ```bash
  1305. # Either id or name must be provided, but not both.
  1306. curl --request DELETE \
  1307. --url http://{address}/api/v1/chat/{chat_id}/session \
  1308. --header 'Content-Type: application/json' \
  1309. --header 'Authorization: Bear {YOUR_ACCESS_TOKEN}' \
  1310. --data '{
  1311. "ids": ["test_1", "test_2"]
  1312. }'
  1313. ```
  1314. #### Request Parameters
  1315. - `ids`: (*Body Parameter*)
  1316. IDs of the sessions to be deleted.
  1317. - `None`
  1318. ### Response
  1319. Success
  1320. ```json
  1321. {
  1322. "code": 0
  1323. }
  1324. ```
  1325. Error
  1326. ```json
  1327. {
  1328. "code": 102,
  1329. "message": "The chat doesn't own the session"
  1330. }
  1331. ```
  1332. ## Update a chat session
  1333. **PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
  1334. Update a chat session
  1335. ### Request
  1336. - Method: PUT
  1337. - URL: `http://{address}/api/v1/chat/{chat_id}/session/{session_id}`
  1338. - Headers:
  1339. - `content-Type: application/json`
  1340. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1341. - Body:
  1342. - `name`: string
  1343. #### Request example
  1344. ```bash
  1345. curl --request PUT \
  1346. --url http://{address}/api/v1/chat/{chat_id}/session/{session_id} \
  1347. --header 'Content-Type: application/json' \
  1348. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1349. --data '{
  1350. "name": "Updated session"
  1351. }'
  1352. ```
  1353. #### Request Parameter
  1354. - `name`:(*Body Parameter)
  1355. The name of the created session.
  1356. - `None`
  1357. ### Response
  1358. Success
  1359. ```json
  1360. {
  1361. "code": 0
  1362. }
  1363. ```
  1364. Error
  1365. ```json
  1366. {
  1367. "code": 102,
  1368. "message": "Name can not be empty."
  1369. }
  1370. ```
  1371. ## Chat with a chat session
  1372. **POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
  1373. Chat with a chat session
  1374. ### Request
  1375. - Method: POST
  1376. - URL: `http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion`
  1377. - Headers:
  1378. - `content-Type: application/json`
  1379. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1380. - Body:
  1381. - `question`: string
  1382. - `stream`: bool
  1383. #### Request example
  1384. ```bash
  1385. curl --request POST \
  1386. --url http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion \
  1387. --header 'Content-Type: application/json' \
  1388. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1389. --data-binary '{
  1390. "question": "你好!",
  1391. "stream": true
  1392. }'
  1393. ```
  1394. #### Request Parameters
  1395. - `question`:(*Body Parameter*)
  1396. The question you want to ask.
  1397. - question is required.
  1398. `None`
  1399. - `stream`: (*Body Parameter*)
  1400. The approach of streaming text generation.
  1401. `False`
  1402. ### Response
  1403. Success
  1404. ```json
  1405. data: {
  1406. "code": 0,
  1407. "data": {
  1408. "answer": "您好!有什么具体的问题或者需要的帮助",
  1409. "reference": {},
  1410. "audio_binary": null,
  1411. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1412. }
  1413. }
  1414. data: {
  1415. "code": 0,
  1416. "data": {
  1417. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助",
  1418. "reference": {},
  1419. "audio_binary": null,
  1420. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1421. }
  1422. }
  1423. data: {
  1424. "code": 0,
  1425. "data": {
  1426. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取",
  1427. "reference": {},
  1428. "audio_binary": null,
  1429. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1430. }
  1431. }
  1432. data: {
  1433. "code": 0,
  1434. "data": {
  1435. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
  1436. "reference": {},
  1437. "audio_binary": null,
  1438. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1439. }
  1440. }
  1441. data: {
  1442. "code": 0,
  1443. "data": {
  1444. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗 ##0$$?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
  1445. "reference": {
  1446. "total": 19,
  1447. "chunks": [
  1448. {
  1449. "chunk_id": "9d87f9d70a0d8a7565694a81fd4c5d5f",
  1450. "content_ltks": "当所有知识库内容都与问题无关时 ,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n以下是知识库:\r\n{knowledg}\r\n以上是知识库\r\n\"\"\"\r\n 1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n总结\r\n通过上面的介绍,可以对开源的 ragflow有了一个大致的了解,与前面的有道qanyth整体流程还是比较类似的。 ",
  1451. "content_with_weight": "当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。",
  1452. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1453. "docnm_kwd": "1.txt",
  1454. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  1455. "important_kwd": [],
  1456. "img_id": "",
  1457. "similarity": 0.38337178633282265,
  1458. "vector_similarity": 0.3321336754679629,
  1459. "term_similarity": 0.4053309767034769,
  1460. "positions": [
  1461. ""
  1462. ]
  1463. },
  1464. {
  1465. "chunk_id": "895d34de762e674b43e8613c6fb54c6d",
  1466. "content_ltks": "\r\n\r\n实际内容可能会超过大模型的输入token数量,因此在调用大模型前会调用api/db/servic/dialog_service.py文件中 messag_fit_in ()根据大模型可用的 token数量进行过滤。这部分与有道的 qanyth的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt ,即可作为大模型的输入了 ,默认的英文prompt如下所示:\r\n\r\n\"\"\"\r\nyou are an intellig assistant. pleas summar the content of the knowledg base to answer the question. pleas list thedata in the knowledg base and answer in detail. when all knowledg base content is irrelev to the question , your answer must includ the sentenc\"the answer you are lookfor isnot found in the knowledg base!\" answer needto consid chat history.\r\n here is the knowledg base:\r\n{ knowledg}\r\nthe abov is the knowledg base.\r\n\"\"\"\r\n1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n对应的中文prompt如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。 ",
  1467. "content_with_weight": "\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。",
  1468. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1469. "docnm_kwd": "1.txt",
  1470. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  1471. "important_kwd": [],
  1472. "img_id": "",
  1473. "similarity": 0.2788204323926715,
  1474. "vector_similarity": 0.35489427679953667,
  1475. "term_similarity": 0.2462173562183008,
  1476. "positions": [
  1477. ""
  1478. ]
  1479. }
  1480. ],
  1481. "doc_aggs": [
  1482. {
  1483. "doc_name": "1.txt",
  1484. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1485. "count": 2
  1486. }
  1487. ]
  1488. },
  1489. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n 当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。\n\n------\n\n\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。\n 以上是知识库。\n\n### Query:\n你好,请问有什么问题需要我帮忙解答吗?\n\n### Elapsed\n - Retrieval: 9131.1 ms\n - LLM: 12802.6 ms",
  1490. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1491. }
  1492. }
  1493. data:{
  1494. "code": 0,
  1495. "data": true
  1496. }
  1497. ```
  1498. Error
  1499. ```json
  1500. {
  1501. "code": 102,
  1502. "message": "Please input your question."
  1503. }
  1504. ```