You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

http_api.md 54KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853
  1. # DRAFT! HTTP API Reference
  2. **THE API REFERENCES BELOW ARE STILL UNDER DEVELOPMENT.**
  3. ## Create dataset
  4. **POST** `/api/v1/dataset`
  5. Creates a dataset.
  6. ### Request
  7. - Method: POST
  8. - URL: `http://{address}/api/v1/dataset`
  9. - Headers:
  10. - `content-Type: application/json`
  11. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  12. - Body:
  13. - `"id"`: `string`
  14. - `"name"`: `string`
  15. - `"avatar"`: `string`
  16. - `"tenant_id"`: `string`
  17. - `"description"`: `string`
  18. - `"language"`: `string`
  19. - `"embedding_model"`: `string`
  20. - `"permission"`: `string`
  21. - `"document_count"`: `integer`
  22. - `"chunk_count"`: `integer`
  23. - `"parse_method"`: `string`
  24. - `"parser_config"`: `Dataset.ParserConfig`
  25. #### Request example
  26. ```bash
  27. # "id": id must not be provided.
  28. # "name": name is required and can't be duplicated.
  29. # "tenant_id": tenant_id must not be provided.
  30. # "embedding_model": embedding_model must not be provided.
  31. # "navie" means general.
  32. curl --request POST \
  33. --url http://{address}/api/v1/dataset \
  34. --header 'Content-Type: application/json' \
  35. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  36. --data '{
  37. "name": "test",
  38. "chunk_count": 0,
  39. "document_count": 0,
  40. "parse_method": "naive"
  41. }'
  42. ```
  43. #### Request parameters
  44. - `"id"`: (*Body parameter*)
  45. The ID of the created dataset used to uniquely identify different datasets.
  46. - If creating a dataset, `id` must not be provided.
  47. - `"name"`: (*Body parameter*)
  48. The name of the dataset, which must adhere to the following requirements:
  49. - Required when creating a dataset and must be unique.
  50. - If updating a dataset, `name` must still be unique.
  51. - `"avatar"`: (*Body parameter*)
  52. Base64 encoding of the avatar.
  53. - `"tenant_id"`: (*Body parameter*)
  54. The ID of the tenant associated with the dataset, used to link it with specific users.
  55. - If creating a dataset, `tenant_id` must not be provided.
  56. - If updating a dataset, `tenant_id` cannot be changed.
  57. - `"description"`: (*Body parameter*)
  58. The description of the dataset.
  59. - `"language"`: (*Body parameter*)
  60. The language setting for the dataset.
  61. - `"embedding_model"`: (*Body parameter*)
  62. Embedding model used in the dataset to generate vector embeddings.
  63. - If creating a dataset, `embedding_model` must not be provided.
  64. - If updating a dataset, `embedding_model` cannot be changed.
  65. - `"permission"`: (*Body parameter*)
  66. Specifies who can manipulate the dataset.
  67. - `"document_count"`: (*Body parameter*)
  68. Document count of the dataset.
  69. - If updating a dataset, `document_count` cannot be changed.
  70. - `"chunk_count"`: (*Body parameter*)
  71. Chunk count of the dataset.
  72. - If updating a dataset, `chunk_count` cannot be changed.
  73. - `"parse_method"`: (*Body parameter*)
  74. Parsing method of the dataset.
  75. - If updating `parse_method`, `chunk_count` must be greater than 0.
  76. - `"parser_config"`: (*Body parameter*)
  77. The configuration settings for the dataset parser.
  78. ### Response
  79. The successful response includes a JSON object like the following:
  80. ```json
  81. {
  82. "code": 0,
  83. "data": {
  84. "avatar": null,
  85. "chunk_count": 0,
  86. "create_date": "Thu, 10 Oct 2024 05:57:37 GMT",
  87. "create_time": 1728539857641,
  88. "created_by": "69736c5e723611efb51b0242ac120007",
  89. "description": null,
  90. "document_count": 0,
  91. "embedding_model": "BAAI/bge-large-zh-v1.5",
  92. "id": "8d73076886cc11ef8c270242ac120006",
  93. "language": "English",
  94. "name": "test_1",
  95. "parse_method": "naive",
  96. "parser_config": {
  97. "pages": [
  98. [
  99. 1,
  100. 1000000
  101. ]
  102. ]
  103. },
  104. "permission": "me",
  105. "similarity_threshold": 0.2,
  106. "status": "1",
  107. "tenant_id": "69736c5e723611efb51b0242ac120007",
  108. "token_num": 0,
  109. "update_date": "Thu, 10 Oct 2024 05:57:37 GMT",
  110. "update_time": 1728539857641,
  111. "vector_similarity_weight": 0.3
  112. }
  113. }
  114. ```
  115. - `"error_code"`: `integer`
  116. `0`: The operation succeeds.
  117. The error response includes a JSON object like the following:
  118. ```json
  119. {
  120. "code": 102,
  121. "message": "Duplicated knowledgebase name in creating dataset."
  122. }
  123. ```
  124. ## Delete datasets
  125. **DELETE** `/api/v1/dataset`
  126. Deletes datasets by ids.
  127. ### Request
  128. - Method: DELETE
  129. - URL: `http://{address}/api/v1/dataset`
  130. - Headers:
  131. - `content-Type: application/json`
  132. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  133. - Body:
  134. - `"ids"`: `List[string]`
  135. #### Request example
  136. ```bash
  137. # Either id or name must be provided, but not both.
  138. curl --request DELETE \
  139. --url http://{address}/api/v1/dataset \
  140. --header 'Content-Type: application/json' \
  141. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  142. --data '{
  143. "ids": ["test_1", "test_2"]
  144. }'
  145. ```
  146. #### Request parameters
  147. - `"ids"`: (*Body parameter*)
  148. Dataset IDs to delete.
  149. ### Response
  150. The successful response includes a JSON object like the following:
  151. ```json
  152. {
  153. "code": 0
  154. }
  155. ```
  156. - `"error_code"`: `integer`
  157. `0`: The operation succeeds.
  158. The error response includes a JSON object like the following:
  159. ```json
  160. {
  161. "code": 102,
  162. "message": "You don't own the dataset."
  163. }
  164. ```
  165. ## Update dataset
  166. **PUT** `/api/v1/dataset/{dataset_id}`
  167. Updates a dataset by its id.
  168. ### Request
  169. - Method: PUT
  170. - URL: `http://{address}/api/v1/dataset/{dataset_id}`
  171. - Headers:
  172. - `content-Type: application/json`
  173. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  174. - Body: (Refer to the "Create Dataset" for the complete structure of the request body.)
  175. #### Request example
  176. ```bash
  177. # "id": id is required.
  178. # "name": If you update name, it can't be duplicated.
  179. # "tenant_id": If you update tenant_id, it can't be changed
  180. # "embedding_model": If you update embedding_model, it can't be changed.
  181. # "chunk_count": If you update chunk_count, it can't be changed.
  182. # "document_count": If you update document_count, it can't be changed.
  183. # "parse_method": If you update parse_method, chunk_count must be 0.
  184. # "navie" means general.
  185. curl --request PUT \
  186. --url http://{address}/api/v1/dataset/{dataset_id} \
  187. --header 'Content-Type: application/json' \
  188. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  189. --data '{
  190. "name": "test",
  191. "tenant_id": "4fb0cd625f9311efba4a0242ac120006",
  192. "embedding_model": "BAAI/bge-zh-v1.5",
  193. "chunk_count": 0,
  194. "document_count": 0,
  195. "parse_method": "navie"
  196. }'
  197. ```
  198. #### Request parameters
  199. (Refer to the "Create Dataset" for the complete structure of the request parameters.)
  200. ### Response
  201. The successful response includes a JSON object like the following:
  202. ```json
  203. {
  204. "code": 0
  205. }
  206. ```
  207. - `"error_code"`: `integer`
  208. `0`: The operation succeeds.
  209. The error response includes a JSON object like the following:
  210. ```json
  211. {
  212. "code": 102,
  213. "message": "Can't change tenant_id."
  214. }
  215. ```
  216. ## List datasets
  217. **GET** `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  218. List all datasets
  219. ### Request
  220. - Method: GET
  221. - URL: `http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  222. - Headers:
  223. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  224. #### Request example
  225. ```bash
  226. # If no page parameter is passed, the default is 1
  227. # If no page_size parameter is passed, the default is 1024
  228. # If no order_by parameter is passed, the default is "create_time"
  229. # If no desc parameter is passed, the default is True
  230. curl --request GET \
  231. --url http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  232. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  233. ```
  234. #### Request parameters
  235. - `path`: (*Path parameter*)
  236. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  237. - `path_size`: (*Path parameter*)
  238. The number of records to retrieve per page. This controls how many records will be included in each page.
  239. - `orderby`: (*Path parameter*)
  240. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  241. - `desc`: (*Path parameter*)
  242. A boolean flag indicating whether the sorting should be in descending order.
  243. - `name`: (*Path parameter*)
  244. Dataset name
  245. - `"id"`: (*Path parameter*)
  246. The ID of the dataset to be retrieved.
  247. - `"name"`: (*Path parameter*)
  248. The name of the dataset to be retrieved.
  249. ### Response
  250. The successful response includes a JSON object like the following:
  251. ```json
  252. {
  253. "code": 0,
  254. "data": [
  255. {
  256. "avatar": "",
  257. "chunk_count": 59,
  258. "create_date": "Sat, 14 Sep 2024 01:12:37 GMT",
  259. "create_time": 1726276357324,
  260. "created_by": "69736c5e723611efb51b0242ac120007",
  261. "description": null,
  262. "document_count": 1,
  263. "embedding_model": "BAAI/bge-large-zh-v1.5",
  264. "id": "6e211ee0723611efa10a0242ac120007",
  265. "language": "English",
  266. "name": "mysql",
  267. "parse_method": "knowledge_graph",
  268. "parser_config": {
  269. "chunk_token_num": 8192,
  270. "delimiter": "\\n!?;。;!?",
  271. "entity_types": [
  272. "organization",
  273. "person",
  274. "location",
  275. "event",
  276. "time"
  277. ]
  278. },
  279. "permission": "me",
  280. "similarity_threshold": 0.2,
  281. "status": "1",
  282. "tenant_id": "69736c5e723611efb51b0242ac120007",
  283. "token_num": 12744,
  284. "update_date": "Thu, 10 Oct 2024 04:07:23 GMT",
  285. "update_time": 1728533243536,
  286. "vector_similarity_weight": 0.3
  287. }
  288. ]
  289. }
  290. ```
  291. The error response includes a JSON object like the following:
  292. ```json
  293. {
  294. "code": 102,
  295. "message": "The dataset doesn't exist"
  296. }
  297. ```
  298. ## Upload files to a dataset
  299. **POST** `/api/v1/dataset/{dataset_id}/document`
  300. Uploads files to a dataset.
  301. ### Request
  302. - Method: POST
  303. - URL: `/api/v1/dataset/{dataset_id}/document`
  304. - Headers:
  305. - 'Content-Type: multipart/form-data'
  306. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  307. - Form:
  308. - 'file=@{FILE_PATH}'
  309. #### Request example
  310. ```bash
  311. curl --request POST \
  312. --url http://{address}/api/v1/dataset/{dataset_id}/document \
  313. --header 'Content-Type: multipart/form-data' \
  314. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  315. --form 'file=@./test.txt'
  316. ```
  317. #### Request parameters
  318. - `"dataset_id"`: (*Path parameter*)
  319. The dataset id
  320. - `"file"`: (*Body parameter*)
  321. The file to upload
  322. ### Response
  323. The successful response includes a JSON object like the following:
  324. ```json
  325. {
  326. "code": 0
  327. }
  328. ```
  329. - `"error_code"`: `integer`
  330. `0`: The operation succeeds.
  331. The error response includes a JSON object like the following:
  332. ```json
  333. {
  334. "code": 101,
  335. "message": "No file part!"
  336. }
  337. ```
  338. ## Download a file from a dataset
  339. **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}`
  340. Downloads files from a dataset.
  341. ### Request
  342. - Method: GET
  343. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
  344. - Headers:
  345. - `content-Type: application/json`
  346. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  347. - Output:
  348. - '{FILE_NAME}'
  349. #### Request example
  350. ```bash
  351. curl --request GET \
  352. --url http://{address}/api/v1/dataset/{dataset_id}/document/{documents_id} \
  353. --header 'Content-Type: application/json' \
  354. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  355. --output '{FILE_NAME}'
  356. ```
  357. #### Request parameters
  358. - `"dataset_id"`: (*PATH parameter*)
  359. The dataset id
  360. - `"documents_id"`: (*PATH parameter*)
  361. The document id of the file.
  362. ### Response
  363. The successful response includes a JSON object like the following:
  364. ```text
  365. test_2.
  366. ```
  367. - `"error_code"`: `integer`
  368. `0`: The operation succeeds.
  369. The error response includes a JSON object like the following:
  370. ```json
  371. {
  372. "code": 102,
  373. "message": "You do not own the dataset 7898da028a0511efbf750242ac1220005."
  374. }
  375. ```
  376. ## List files of a dataset
  377. **GET** `/api/v1/dataset/{dataset_id}/info?offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}`
  378. List files to a dataset.
  379. ### Request
  380. - Method: GET
  381. - URL: `/api/v1/dataset/{dataset_id}/info?keywords={keyword}&page={page}&page_size={limit}&orderby={orderby}&desc={desc}&name={name`
  382. - Headers:
  383. - `content-Type: application/json`
  384. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  385. #### Request example
  386. ```bash
  387. curl --request GET \
  388. --url http://{address}/api/v1/dataset/{dataset_id}/info?offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id} \
  389. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  390. ```
  391. #### Request parameters
  392. - `"dataset_id"`: (*PATH parameter*)
  393. The dataset id
  394. - `offset`: (*Filter parameter*)
  395. The beginning number of records for paging.
  396. - `keywords`: (*Filter parameter*)
  397. The keywords matches the search key workds;
  398. - `limit`: (*Filter parameter*)
  399. Records number to return.
  400. - `orderby`: (*Filter parameter*)
  401. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  402. - `desc`: (*Filter parameter*)
  403. A boolean flag indicating whether the sorting should be in descending order.
  404. - `id`: (*Filter parameter*)
  405. The id of the document to be got.
  406. ### Response
  407. The successful response includes a JSON object like the following:
  408. ```json
  409. {
  410. "code": 0,
  411. "data": {
  412. "docs": [
  413. {
  414. "chunk_count": 0,
  415. "create_date": "Mon, 14 Oct 2024 09:11:01 GMT",
  416. "create_time": 1728897061948,
  417. "created_by": "69736c5e723611efb51b0242ac120007",
  418. "id": "3bcfbf8a8a0c11ef8aba0242ac120006",
  419. "knowledgebase_id": "7898da028a0511efbf750242ac120005",
  420. "location": "Test_2.txt",
  421. "name": "Test_2.txt",
  422. "parser_config": {
  423. "chunk_token_count": 128,
  424. "delimiter": "\n!?。;!?",
  425. "layout_recognize": true,
  426. "task_page_size": 12
  427. },
  428. "parser_method": "naive",
  429. "process_begin_at": null,
  430. "process_duation": 0.0,
  431. "progress": 0.0,
  432. "progress_msg": "",
  433. "run": "0",
  434. "size": 7,
  435. "source_type": "local",
  436. "status": "1",
  437. "thumbnail": null,
  438. "token_count": 0,
  439. "type": "doc",
  440. "update_date": "Mon, 14 Oct 2024 09:11:01 GMT",
  441. "update_time": 1728897061948
  442. }
  443. ],
  444. "total": 1
  445. }
  446. }
  447. ```
  448. - `"error_code"`: `integer`
  449. `0`: The operation succeeds.
  450. The error response includes a JSON object like the following:
  451. ```json
  452. {
  453. "code": 102,
  454. "message": "You don't own the dataset 7898da028a0511efbf750242ac1220005. "
  455. }
  456. ```
  457. ## Update a file information in dataset
  458. **PUT** `/api/v1/dataset/{dataset_id}/info/{document_id}`
  459. Update a file in a dataset
  460. ### Request
  461. - Method: PUT
  462. - URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}`
  463. - Headers:
  464. - `content-Type: application/json`
  465. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  466. #### Request example
  467. ```bash
  468. curl --request PUT \
  469. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id} \
  470. --header 'Authorization: Bearer {YOUR_ACCESS TOKEN}' \
  471. --header 'Content-Type: application/json' \
  472. --data '{
  473. "name": "manual.txt",
  474. "thumbnail": null,
  475. "knowledgebase_id": "779333c0758611ef910f0242ac120004",
  476. "parser_method": "manual",
  477. "parser_config": {"chunk_token_count": 128, "delimiter": "\n!?。;!?", "layout_recognize": true, "task_page_size": 12},
  478. "source_type": "local", "type": "doc",
  479. "created_by": "134408906b6811efbcd20242ac120005",
  480. "size": 0, "token_count": 0, "chunk_count": 0,
  481. "progress": 0.0,
  482. "progress_msg": "",
  483. "process_begin_at": null,
  484. "process_duration": 0.0
  485. }'
  486. ```
  487. #### Request parameters
  488. - `"thumbnail"`: (*Body parameter*)
  489. Thumbnail image of the document.
  490. - `""`
  491. - `"knowledgebase_id"`: (*Body parameter*)
  492. Knowledge base ID related to the document.
  493. - `""`
  494. - `"parser_method"`: (*Body parameter*)
  495. Method used to parse the document.
  496. - `""`
  497. - `"parser_config"`: (*Body parameter*)
  498. Configuration object for the parser.
  499. - If the value is `None`, a dictionary with default values will be generated.
  500. - `"source_type"`: (*Body parameter*)
  501. Source type of the document.
  502. - `""`
  503. - `"type"`: (*Body parameter*)
  504. Type or category of the document.
  505. - `""`
  506. - `"created_by"`: (*Body parameter*)
  507. Creator of the document.
  508. - `""`
  509. - `"name"`: (*Body parameter*)
  510. Name or title of the document.
  511. - `""`
  512. - `"size"`: (*Body parameter*)
  513. Size of the document in bytes or some other unit.
  514. - `0`
  515. - `"token_count"`: (*Body parameter*)
  516. Number of tokens in the document.
  517. - `0`
  518. - `"chunk_count"`: (*Body parameter*)
  519. Number of chunks the document is split into.
  520. - `0`
  521. - `"progress"`: (*Body parameter*)
  522. Current processing progress as a percentage.
  523. - `0.0`
  524. - `"progress_msg"`: (*Body parameter*)
  525. Message indicating current progress status.
  526. - `""`
  527. - `"process_begin_at"`: (*Body parameter*)
  528. Start time of the document processing.
  529. - `None`
  530. - `"process_duration"`: (*Body parameter*)
  531. Duration of the processing in seconds or minutes.
  532. - `0.0`
  533. ### Response
  534. The successful response includes a JSON object like the following:
  535. ```json
  536. {
  537. "code": 0
  538. }
  539. ```
  540. The error response includes a JSON object like the following:
  541. ```json
  542. {
  543. "code": 102,
  544. "message": "The dataset not own the document."
  545. }
  546. ```
  547. ## Parse files in dataset
  548. **POST** `/api/v1/dataset/{dataset_id}/chunk`
  549. Parse files into chunks in a dataset
  550. ### Request
  551. - Method: POST
  552. - URL: `/api/v1/dataset/{dataset_id}/chunk`
  553. - Headers:
  554. - `content-Type: application/json`
  555. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  556. #### Request example
  557. ```shell
  558. curl --request POST \
  559. --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
  560. --header 'Content-Type: application/json' \
  561. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  562. --raw '{
  563. "documents": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
  564. }'
  565. ```
  566. #### Request parameters
  567. - `"dataset_id"`: (*Path parameter*)
  568. - `"documents"`: (*Body parameter*)
  569. - Documents to parse
  570. ### Response
  571. The successful response includes a JSON object like the following:
  572. ```shell
  573. {
  574. "code": 0
  575. }
  576. ```
  577. The error response includes a JSON object like the following:
  578. ```shell
  579. {
  580. "code": 3016,
  581. "message": "Can't connect database"
  582. }
  583. ```
  584. ## Stop file parsing
  585. **DELETE** `/api/v1/dataset/{dataset_id}/chunk`
  586. Stop file parsing
  587. ### Request
  588. - Method: POST
  589. - URL: `/api/v1/dataset/{dataset_id}/chunk`
  590. - Headers:
  591. - `content-Type: application/json`
  592. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  593. #### Request example
  594. ```shell
  595. curl --request DELETE \
  596. --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
  597. --header 'Content-Type: application/json' \
  598. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  599. --raw '{
  600. "documents": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
  601. }'
  602. ```
  603. #### Request parameters
  604. - `"dataset_id"`: (*Path parameter*)
  605. - `"documents"`: (*Body parameter*)
  606. - Documents to stop parsing
  607. ### Response
  608. The successful response includes a JSON object like the following:
  609. ```shell
  610. {
  611. "code": 0
  612. }
  613. ```
  614. The error response includes a JSON object like the following:
  615. ```shell
  616. {
  617. "code": 3016,
  618. "message": "Can't connect database"
  619. }
  620. ```
  621. ## Get document chunk list
  622. **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  623. Get document chunk list
  624. ### Request
  625. - Method: GET
  626. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  627. - Headers:
  628. - `content-Type: application/json`
  629. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  630. #### Request example
  631. ```shell
  632. curl --request GET \
  633. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  634. --header 'Content-Type: application/json' \
  635. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  636. ```
  637. #### Request parameters
  638. - `"dataset_id"`: (*Path parameter*)
  639. - `"document_id"`: (*Path parameter*)
  640. ### Response
  641. The successful response includes a JSON object like the following:
  642. ```shell
  643. {
  644. "code": 0
  645. "data": {
  646. "chunks": [
  647. {
  648. "available_int": 1,
  649. "content": "<em>advantag</em>of ragflow increas accuraci and relev:by incorpor retriev inform , ragflow can gener respons that are more accur",
  650. "document_keyword": "ragflow_test.txt",
  651. "document_id": "77df9ef4759a11ef8bdd0242ac120004",
  652. "id": "4ab8c77cfac1a829c8d5ed022a0808c0",
  653. "image_id": "",
  654. "important_keywords": [],
  655. "positions": [
  656. ""
  657. ]
  658. }
  659. ],
  660. "doc": {
  661. "chunk_count": 5,
  662. "create_date": "Wed, 18 Sep 2024 08:46:16 GMT",
  663. "create_time": 1726649176833,
  664. "created_by": "134408906b6811efbcd20242ac120005",
  665. "id": "77df9ef4759a11ef8bdd0242ac120004",
  666. "knowledgebase_id": "77d9d24e759a11ef880c0242ac120004",
  667. "location": "ragflow_test.txt",
  668. "name": "ragflow_test.txt",
  669. "parser_config": {
  670. "chunk_token_count": 128,
  671. "delimiter": "\n!?。;!?",
  672. "layout_recognize": true,
  673. "task_page_size": 12
  674. },
  675. "parser_method": "naive",
  676. "process_begin_at": "Wed, 18 Sep 2024 08:46:16 GMT",
  677. "process_duation": 7.3213,
  678. "progress": 1.0,
  679. "progress_msg": "\nTask has been received.\nStart to parse.\nFinish parsing.\nFinished slicing files(5). Start to embedding the content.\nFinished embedding(6.16)! Start to build index!\nDone!",
  680. "run": "3",
  681. "size": 4209,
  682. "source_type": "local",
  683. "status": "1",
  684. "thumbnail": null,
  685. "token_count": 746,
  686. "type": "doc",
  687. "update_date": "Wed, 18 Sep 2024 08:46:23 GMT",
  688. "update_time": 1726649183321
  689. },
  690. "total": 1
  691. },
  692. }
  693. ```
  694. The error response includes a JSON object like the following:
  695. ```shell
  696. {
  697. "code": 3016,
  698. "message": "Can't connect database"
  699. }
  700. ```
  701. ## Delete document chunks
  702. **DELETE** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  703. Delete document chunks
  704. ### Request
  705. - Method: DELETE
  706. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  707. - Headers:
  708. - `content-Type: application/json`
  709. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  710. #### Request example
  711. ```shell
  712. curl --request DELETE \
  713. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  714. --header 'Content-Type: application/json' \
  715. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  716. --raw '{
  717. "chunks": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
  718. }'
  719. ```
  720. ## Update document chunk
  721. **PUT** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  722. Update document chunk
  723. ### Request
  724. - Method: PUT
  725. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  726. - Headers:
  727. - `content-Type: application/json`
  728. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  729. #### Request example
  730. ```shell
  731. curl --request PUT \
  732. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  733. --header 'Content-Type: application/json' \
  734. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  735. --raw '{
  736. "chunk_id": "d87fb0b7212c15c18d0831677552d7de",
  737. "knowledgebase_id": null,
  738. "name": "",
  739. "content": "ragflow123",
  740. "important_keywords": [],
  741. "document_id": "e6bbba92759511efaa900242ac120004",
  742. "status": "1"
  743. }'
  744. ```
  745. ## Insert document chunks
  746. **POST** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  747. Insert document chunks
  748. ### Request
  749. - Method: POST
  750. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  751. - Headers:
  752. - `content-Type: application/json`
  753. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  754. #### Request example
  755. ```shell
  756. curl --request POST \
  757. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  758. --header 'Content-Type: application/json' \
  759. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  760. --raw '{
  761. "document_id": "97ad64b6759811ef9fc30242ac120004",
  762. "content": ["ragflow content", "ragflow content"]
  763. }'
  764. ```
  765. ## Dataset retrieval test
  766. **GET** `/api/v1/dataset/{dataset_id}/retrieval`
  767. Retrieval test of a dataset
  768. ### Request
  769. - Method: GET
  770. - URL: `/api/v1/dataset/{dataset_id}/retrieval`
  771. - Headers:
  772. - `content-Type: application/json`
  773. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  774. #### Request example
  775. ```shell
  776. curl --request GET \
  777. --url http://{address}/api/v1/dataset/{dataset_id}/retrieval \
  778. --header 'Content-Type: application/json' \
  779. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  780. --raw '{
  781. "query_text": "This is a cat."
  782. }'
  783. ```
  784. ## Create chat
  785. **POST** `/api/v1/chat`
  786. Create a chat
  787. ### Request
  788. - Method: POST
  789. - URL: `http://{address}/api/v1/chat`
  790. - Headers:
  791. - `content-Type: application/json`
  792. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  793. - Body:
  794. - `"name"`: `string`
  795. - `"avatar"`: `string`
  796. - `"knowledgebases"`: `List[DataSet]`
  797. - `"id"`: `string`
  798. - `"llm"`: `LLM`
  799. - `"prompt"`: `Prompt`
  800. #### Request example
  801. ```shell
  802. curl --request POST \
  803. --url http://{address}/api/v1/chat \
  804. --header 'Content-Type: application/json' \
  805. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  806. --data-binary '{
  807. "knowledgebases": [
  808. {
  809. "avatar": null,
  810. "chunk_count": 0,
  811. "description": null,
  812. "document_count": 0,
  813. "embedding_model": "",
  814. "id": "0b2cbc8c877f11ef89070242ac120005",
  815. "language": "English",
  816. "name": "Test_assistant",
  817. "parse_method": "naive",
  818. "parser_config": {
  819. "pages": [
  820. [
  821. 1,
  822. 1000000
  823. ]
  824. ]
  825. },
  826. "permission": "me",
  827. "tenant_id": "4fb0cd625f9311efba4a0242ac120006"
  828. }
  829. ],
  830. "name":"new_chat_1"
  831. }'
  832. ```
  833. #### Request parameters
  834. - `"name"`: (*Body parameter*)
  835. The name of the created chat.
  836. - `"assistant"`
  837. - `"avatar"`: (*Body parameter*)
  838. The icon of the created chat.
  839. - `"path"`
  840. - `"knowledgebases"`: (*Body parameter*)
  841. Select knowledgebases associated.
  842. - `["kb1"]`
  843. - `"id"`: (*Body parameter*)
  844. The id of the created chat.
  845. - `""`
  846. - `"llm"`: (*Body parameter*)
  847. The LLM of the created chat.
  848. - If the value is `None`, a dictionary with default values will be generated.
  849. - `"prompt"`: (*Body parameter*)
  850. The prompt of the created chat.
  851. - If the value is `None`, a dictionary with default values will be generated.
  852. ---
  853. ##### Chat.LLM parameters:
  854. - `"model_name"`: (*Body parameter*)
  855. Large language chat model.
  856. - If it is `None`, it will return the user's default model.
  857. - `"temperature"`: (*Body parameter*)
  858. Controls the randomness of predictions by the model. A lower temperature makes the model more confident, while a higher temperature makes it more creative and diverse.
  859. - `0.1`
  860. - `"top_p"`: (*Body parameter*)
  861. Also known as "nucleus sampling," it focuses on the most likely words, cutting off the less probable ones.
  862. - `0.3`
  863. - `"presence_penalty"`: (*Body parameter*)
  864. Discourages the model from repeating the same information by penalizing repeated content.
  865. - `0.4`
  866. - `"frequency_penalty"`: (*Body parameter*)
  867. Reduces the model’s tendency to repeat words frequently.
  868. - `0.7`
  869. - `"max_tokens"`: (*Body parameter*)
  870. Sets the maximum length of the model’s output, measured in tokens (words or pieces of words).
  871. - `512`
  872. ---
  873. ##### Chat.Prompt parameters:
  874. - `"similarity_threshold"`: (*Body parameter*)
  875. Filters out chunks with similarity below this threshold.
  876. - `0.2`
  877. - `"keywords_similarity_weight"`: (*Body parameter*)
  878. Weighted keywords similarity and vector cosine similarity; the sum of weights is 1.0.
  879. - `0.7`
  880. - `"top_n"`: (*Body parameter*)
  881. Only the top N chunks above the similarity threshold will be fed to LLMs.
  882. - `8`
  883. - `"variables"`: (*Body parameter*)
  884. Variables help with different chat strategies by filling in the 'System' part of the prompt.
  885. - `[{"key": "knowledge", "optional": True}]`
  886. - `"rerank_model"`: (*Body parameter*)
  887. If empty, it uses vector cosine similarity; otherwise, it uses rerank score.
  888. - `""`
  889. - `"empty_response"`: (*Body parameter*)
  890. If nothing is retrieved, this will be used as the response. Leave blank if LLM should provide its own opinion.
  891. - `None`
  892. - `"opener"`: (*Body parameter*)
  893. The welcome message for clients.
  894. - `"Hi! I'm your assistant, what can I do for you?"`
  895. - `"show_quote"`: (*Body parameter*)
  896. Indicates whether the source of the original text should be displayed.
  897. - `True`
  898. - `"prompt"`: (*Body parameter*)
  899. Instructions for LLM to follow when answering questions, such as character design or answer length.
  900. - `"You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence 'The answer you are looking for is not found in the knowledge base!' Answers need to consider chat history. Here is the knowledge base: {knowledge} The above is the knowledge base."`
  901. ### Response
  902. Success:
  903. ```json
  904. {
  905. "code": 0,
  906. "data": {
  907. "avatar": "",
  908. "create_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  909. "create_time": 1728617004635,
  910. "description": "A helpful Assistant",
  911. "do_refer": "1",
  912. "id": "2ca4b22e878011ef88fe0242ac120005",
  913. "knowledgebases": [
  914. {
  915. "avatar": null,
  916. "chunk_count": 0,
  917. "description": null,
  918. "document_count": 0,
  919. "embedding_model": "",
  920. "id": "0b2cbc8c877f11ef89070242ac120005",
  921. "language": "English",
  922. "name": "Test_assistant",
  923. "parse_method": "naive",
  924. "parser_config": {
  925. "pages": [
  926. [
  927. 1,
  928. 1000000
  929. ]
  930. ]
  931. },
  932. "permission": "me",
  933. "tenant_id": "4fb0cd625f9311efba4a0242ac120006"
  934. }
  935. ],
  936. "language": "English",
  937. "llm": {
  938. "frequency_penalty": 0.7,
  939. "max_tokens": 512,
  940. "model_name": "deepseek-chat___OpenAI-API@OpenAI-API-Compatible",
  941. "presence_penalty": 0.4,
  942. "temperature": 0.1,
  943. "top_p": 0.3
  944. },
  945. "name": "new_chat_1",
  946. "prompt": {
  947. "empty_response": "Sorry! 知识库中未找到相关内容!",
  948. "keywords_similarity_weight": 0.3,
  949. "opener": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  950. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n {knowledge}\n 以上是知识库。",
  951. "rerank_model": "",
  952. "similarity_threshold": 0.2,
  953. "top_n": 6,
  954. "variables": [
  955. {
  956. "key": "knowledge",
  957. "optional": false
  958. }
  959. ]
  960. },
  961. "prompt_type": "simple",
  962. "status": "1",
  963. "tenant_id": "69736c5e723611efb51b0242ac120007",
  964. "top_k": 1024,
  965. "update_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  966. "update_time": 1728617004635
  967. }
  968. }
  969. ```
  970. Error:
  971. ```json
  972. {
  973. "code": 102,
  974. "message": "Duplicated chat name in creating dataset."
  975. }
  976. ```
  977. ## Update chat
  978. **PUT** `/api/v1/chat/{chat_id}`
  979. Update a chat
  980. ### Request
  981. - Method: PUT
  982. - URL: `http://{address}/api/v1/chat/{chat_id}`
  983. - Headers:
  984. - `content-Type: application/json`
  985. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  986. - Body: (Refer to the "Create chat" for the complete structure of the request body.)
  987. #### Request example
  988. ```bash
  989. curl --request PUT \
  990. --url http://{address}/api/v1/chat/{chat_id} \
  991. --header 'Content-Type: application/json' \
  992. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  993. --data '{
  994. "name":"Test"
  995. }'
  996. ```
  997. #### Parameters
  998. (Refer to the "Create chat" for the complete structure of the request parameters.)
  999. ### Response
  1000. Success
  1001. ```json
  1002. {
  1003. "code": 0
  1004. }
  1005. ```
  1006. Error
  1007. ```json
  1008. {
  1009. "code": 102,
  1010. "message": "Duplicated chat name in updating dataset."
  1011. }
  1012. ```
  1013. ## Delete chats
  1014. **DELETE** `/api/v1/chat`
  1015. Delete chats
  1016. ### Request
  1017. - Method: DELETE
  1018. - URL: `http://{address}/api/v1/chat`
  1019. - Headers:
  1020. - `content-Type: application/json`
  1021. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1022. - Body:
  1023. - `ids`: List[string]
  1024. #### Request example
  1025. ```bash
  1026. # Either id or name must be provided, but not both.
  1027. curl --request DELETE \
  1028. --url http://{address}/api/v1/chat \
  1029. --header 'Content-Type: application/json' \
  1030. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1031. --data '{
  1032. "ids": ["test_1", "test_2"]
  1033. }'
  1034. }'
  1035. ```
  1036. #### Request parameters:
  1037. - `"ids"`: (*Body parameter*)
  1038. IDs of the chats to be deleted.
  1039. - `None`
  1040. ### Response
  1041. Success
  1042. ```json
  1043. {
  1044. "code": 0
  1045. }
  1046. ```
  1047. Error
  1048. ```json
  1049. {
  1050. "code": 102,
  1051. "message": "ids are required"
  1052. }
  1053. ```
  1054. ## List chats
  1055. **GET** `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1056. List chats based on filter criteria.
  1057. ### Request
  1058. - Method: GET
  1059. - URL: `http://{address}/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1060. - Headers:
  1061. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1062. #### Request example
  1063. ```bash
  1064. curl --request GET \
  1065. --url http://{address}/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  1066. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1067. ```
  1068. #### Request parameters
  1069. - `"page"`: (*Path parameter*)
  1070. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  1071. - `1`
  1072. - `"page_size"`: (*Path parameter*)
  1073. The number of records to retrieve per page. This controls how many records will be included in each page.
  1074. - `1024`
  1075. - `"orderby"`: (*Path parameter*)
  1076. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  1077. - `"create_time"`
  1078. - `"desc"`: (*Path parameter*)
  1079. A boolean flag indicating whether the sorting should be in descending order.
  1080. - `True`
  1081. - `"id"`: (*Path parameter*)
  1082. The ID of the chat to be retrieved.
  1083. - `None`
  1084. - `"name"`: (*Path parameter*)
  1085. The name of the chat to be retrieved.
  1086. - `None`
  1087. ### Response
  1088. Success
  1089. ```json
  1090. {
  1091. "code": 0,
  1092. "data": [
  1093. {
  1094. "avatar": "",
  1095. "create_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  1096. "create_time": 1728617004635,
  1097. "description": "A helpful Assistant",
  1098. "do_refer": "1",
  1099. "id": "2ca4b22e878011ef88fe0242ac120005",
  1100. "knowledgebases": [
  1101. {
  1102. "avatar": "",
  1103. "chunk_num": 0,
  1104. "create_date": "Fri, 11 Oct 2024 03:15:18 GMT",
  1105. "create_time": 1728616518986,
  1106. "created_by": "69736c5e723611efb51b0242ac120007",
  1107. "description": "",
  1108. "doc_num": 0,
  1109. "embd_id": "BAAI/bge-large-zh-v1.5",
  1110. "id": "0b2cbc8c877f11ef89070242ac120005",
  1111. "language": "English",
  1112. "name": "test_delete_chat",
  1113. "parser_config": {
  1114. "chunk_token_count": 128,
  1115. "delimiter": "\n!?。;!?",
  1116. "layout_recognize": true,
  1117. "task_page_size": 12
  1118. },
  1119. "parser_id": "naive",
  1120. "permission": "me",
  1121. "similarity_threshold": 0.2,
  1122. "status": "1",
  1123. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1124. "token_num": 0,
  1125. "update_date": "Fri, 11 Oct 2024 04:01:31 GMT",
  1126. "update_time": 1728619291228,
  1127. "vector_similarity_weight": 0.3
  1128. }
  1129. ],
  1130. "language": "English",
  1131. "llm": {
  1132. "frequency_penalty": 0.7,
  1133. "max_tokens": 512,
  1134. "model_name": "deepseek-chat___OpenAI-API@OpenAI-API-Compatible",
  1135. "presence_penalty": 0.4,
  1136. "temperature": 0.1,
  1137. "top_p": 0.3
  1138. },
  1139. "name": "Test",
  1140. "prompt": {
  1141. "empty_response": "Sorry! 知识库中未找到相关内容!",
  1142. "keywords_similarity_weight": 0.3,
  1143. "opener": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  1144. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n {knowledge}\n 以上是知识库。",
  1145. "rerank_model": "",
  1146. "similarity_threshold": 0.2,
  1147. "top_n": 6,
  1148. "variables": [
  1149. {
  1150. "key": "knowledge",
  1151. "optional": false
  1152. }
  1153. ]
  1154. },
  1155. "prompt_type": "simple",
  1156. "status": "1",
  1157. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1158. "top_k": 1024,
  1159. "update_date": "Fri, 11 Oct 2024 03:47:58 GMT",
  1160. "update_time": 1728618478392
  1161. }
  1162. ]
  1163. }
  1164. ```
  1165. Error
  1166. ```json
  1167. {
  1168. "code": 102,
  1169. "message": "The chat doesn't exist"
  1170. }
  1171. ```
  1172. ## Create a chat session
  1173. **POST** `/api/v1/chat/{chat_id}/session`
  1174. Create a chat session
  1175. ### Request
  1176. - Method: POST
  1177. - URL: `http://{address}/api/v1/chat/{chat_id}/session`
  1178. - Headers:
  1179. - `content-Type: application/json`
  1180. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1181. - Body:
  1182. - name: `string`
  1183. #### Request example
  1184. ```bash
  1185. curl --request POST \
  1186. --url http://{address}/api/v1/chat/{chat_id}/session \
  1187. --header 'Content-Type: application/json' \
  1188. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1189. --data '{
  1190. "name": "new session"
  1191. }'
  1192. ```
  1193. #### Request parameters
  1194. - `"id"`: (*Body parameter*)
  1195. The ID of the created session used to identify different sessions.
  1196. - `None`
  1197. - `id` cannot be provided when creating.
  1198. - `"name"`: (*Body parameter*)
  1199. The name of the created session.
  1200. - `"New session"`
  1201. - `"messages"`: (*Body parameter*)
  1202. The messages of the created session.
  1203. - `[{"role": "assistant", "content": "Hi! I am your assistant, can I help you?"}]`
  1204. - `messages` cannot be provided when creating.
  1205. - `"chat_id"`: (*Path parameter*)
  1206. The ID of the associated chat.
  1207. - `""`
  1208. - `chat_id` cannot be changed.
  1209. ### Response
  1210. Success
  1211. ```json
  1212. {
  1213. "code": 0,
  1214. "data": {
  1215. "chat_id": "2ca4b22e878011ef88fe0242ac120005",
  1216. "create_date": "Fri, 11 Oct 2024 08:46:14 GMT",
  1217. "create_time": 1728636374571,
  1218. "id": "4606b4ec87ad11efbc4f0242ac120006",
  1219. "messages": [
  1220. {
  1221. "content": "Hi! I am your assistant,can I help you?",
  1222. "role": "assistant"
  1223. }
  1224. ],
  1225. "name": "new session",
  1226. "update_date": "Fri, 11 Oct 2024 08:46:14 GMT",
  1227. "update_time": 1728636374571
  1228. }
  1229. }
  1230. ```
  1231. Error
  1232. ```json
  1233. {
  1234. "code": 102,
  1235. "message": "Name can not be empty."
  1236. }
  1237. ```
  1238. ## List the sessions of a chat
  1239. **GET** `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1240. List all sessions under the chat based on the filtering criteria.
  1241. ### Request
  1242. - Method: GET
  1243. - URL: `http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1244. - Headers:
  1245. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1246. #### Request example
  1247. ```bash
  1248. curl --request GET \
  1249. --url http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  1250. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1251. ```
  1252. #### Request Parameters
  1253. - `"page"`: (*Path parameter*)
  1254. The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
  1255. - `1`
  1256. - `"page_size"`: (*Path parameter*)
  1257. The number of records to retrieve per page. This controls how many records will be included in each page.
  1258. - `1024`
  1259. - `"orderby"`: (*Path parameter*)
  1260. The field by which the records should be sorted. This specifies the attribute or column used to order the results.
  1261. - `"create_time"`
  1262. - `"desc"`: (*Path parameter*)
  1263. A boolean flag indicating whether the sorting should be in descending order.
  1264. - `True`
  1265. - `"id"`: (*Path parameter*)
  1266. The ID of the session to be retrieved.
  1267. - `None`
  1268. - `"name"`: (*Path parameter*)
  1269. The name of the session to be retrieved.
  1270. - `None`
  1271. ### Response
  1272. Success
  1273. ```json
  1274. {
  1275. "code": 0,
  1276. "data": [
  1277. {
  1278. "chat": "2ca4b22e878011ef88fe0242ac120005",
  1279. "create_date": "Fri, 11 Oct 2024 08:46:43 GMT",
  1280. "create_time": 1728636403974,
  1281. "id": "578d541e87ad11ef96b90242ac120006",
  1282. "messages": [
  1283. {
  1284. "content": "Hi! I am your assistant,can I help you?",
  1285. "role": "assistant"
  1286. }
  1287. ],
  1288. "name": "new session",
  1289. "update_date": "Fri, 11 Oct 2024 08:46:43 GMT",
  1290. "update_time": 1728636403974
  1291. }
  1292. ]
  1293. }
  1294. ```
  1295. Error
  1296. ```json
  1297. {
  1298. "code": 102,
  1299. "message": "The session doesn't exist"
  1300. }
  1301. ```
  1302. ## Delete chat sessions
  1303. **DELETE** `/api/v1/chat/{chat_id}/session`
  1304. Delete chat sessions
  1305. ### Request
  1306. - Method: DELETE
  1307. - URL: `http://{address}/api/v1/chat/{chat_id}/session`
  1308. - Headers:
  1309. - `content-Type: application/json`
  1310. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1311. - Body:
  1312. - `ids`: List[string]
  1313. #### Request example
  1314. ```bash
  1315. # Either id or name must be provided, but not both.
  1316. curl --request DELETE \
  1317. --url http://{address}/api/v1/chat/{chat_id}/session \
  1318. --header 'Content-Type: application/json' \
  1319. --header 'Authorization: Bear {YOUR_ACCESS_TOKEN}' \
  1320. --data '{
  1321. "ids": ["test_1", "test_2"]
  1322. }'
  1323. ```
  1324. #### Request Parameters
  1325. - `ids`: (*Body Parameter*)
  1326. IDs of the sessions to be deleted.
  1327. - `None`
  1328. ### Response
  1329. Success
  1330. ```json
  1331. {
  1332. "code": 0
  1333. }
  1334. ```
  1335. Error
  1336. ```json
  1337. {
  1338. "code": 102,
  1339. "message": "The chat doesn't own the session"
  1340. }
  1341. ```
  1342. ## Update a chat session
  1343. **PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
  1344. Update a chat session
  1345. ### Request
  1346. - Method: PUT
  1347. - URL: `http://{address}/api/v1/chat/{chat_id}/session/{session_id}`
  1348. - Headers:
  1349. - `content-Type: application/json`
  1350. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1351. - Body:
  1352. - `name`: string
  1353. #### Request example
  1354. ```bash
  1355. curl --request PUT \
  1356. --url http://{address}/api/v1/chat/{chat_id}/session/{session_id} \
  1357. --header 'Content-Type: application/json' \
  1358. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1359. --data '{
  1360. "name": "Updated session"
  1361. }'
  1362. ```
  1363. #### Request Parameter
  1364. - `name`:(*Body Parameter)
  1365. The name of the created session.
  1366. - `None`
  1367. ### Response
  1368. Success
  1369. ```json
  1370. {
  1371. "code": 0
  1372. }
  1373. ```
  1374. Error
  1375. ```json
  1376. {
  1377. "code": 102,
  1378. "message": "Name can not be empty."
  1379. }
  1380. ```
  1381. ## Chat with a chat session
  1382. **POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
  1383. Chat with a chat session
  1384. ### Request
  1385. - Method: POST
  1386. - URL: `http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion`
  1387. - Headers:
  1388. - `content-Type: application/json`
  1389. - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
  1390. - Body:
  1391. - `question`: string
  1392. - `stream`: bool
  1393. #### Request example
  1394. ```bash
  1395. curl --request POST \
  1396. --url http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion \
  1397. --header 'Content-Type: application/json' \
  1398. --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  1399. --data-binary '{
  1400. "question": "你好!",
  1401. "stream": true
  1402. }'
  1403. ```
  1404. #### Request Parameters
  1405. - `question`:(*Body Parameter*)
  1406. The question you want to ask.
  1407. - question is required.
  1408. `None`
  1409. - `stream`: (*Body Parameter*)
  1410. The approach of streaming text generation.
  1411. `False`
  1412. ### Response
  1413. Success
  1414. ```json
  1415. data: {
  1416. "code": 0,
  1417. "data": {
  1418. "answer": "您好!有什么具体的问题或者需要的帮助",
  1419. "reference": {},
  1420. "audio_binary": null,
  1421. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1422. }
  1423. }
  1424. data: {
  1425. "code": 0,
  1426. "data": {
  1427. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助",
  1428. "reference": {},
  1429. "audio_binary": null,
  1430. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1431. }
  1432. }
  1433. data: {
  1434. "code": 0,
  1435. "data": {
  1436. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取",
  1437. "reference": {},
  1438. "audio_binary": null,
  1439. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1440. }
  1441. }
  1442. data: {
  1443. "code": 0,
  1444. "data": {
  1445. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
  1446. "reference": {},
  1447. "audio_binary": null,
  1448. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1449. }
  1450. }
  1451. data: {
  1452. "code": 0,
  1453. "data": {
  1454. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗 ##0$$?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
  1455. "reference": {
  1456. "total": 19,
  1457. "chunks": [
  1458. {
  1459. "chunk_id": "9d87f9d70a0d8a7565694a81fd4c5d5f",
  1460. "content_ltks": "当所有知识库内容都与问题无关时 ,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n以下是知识库:\r\n{knowledg}\r\n以上是知识库\r\n\"\"\"\r\n 1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n总结\r\n通过上面的介绍,可以对开源的 ragflow有了一个大致的了解,与前面的有道qanyth整体流程还是比较类似的。 ",
  1461. "content_with_weight": "当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。",
  1462. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1463. "docnm_kwd": "1.txt",
  1464. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  1465. "important_kwd": [],
  1466. "img_id": "",
  1467. "similarity": 0.38337178633282265,
  1468. "vector_similarity": 0.3321336754679629,
  1469. "term_similarity": 0.4053309767034769,
  1470. "positions": [
  1471. ""
  1472. ]
  1473. },
  1474. {
  1475. "chunk_id": "895d34de762e674b43e8613c6fb54c6d",
  1476. "content_ltks": "\r\n\r\n实际内容可能会超过大模型的输入token数量,因此在调用大模型前会调用api/db/servic/dialog_service.py文件中 messag_fit_in ()根据大模型可用的 token数量进行过滤。这部分与有道的 qanyth的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt ,即可作为大模型的输入了 ,默认的英文prompt如下所示:\r\n\r\n\"\"\"\r\nyou are an intellig assistant. pleas summar the content of the knowledg base to answer the question. pleas list thedata in the knowledg base and answer in detail. when all knowledg base content is irrelev to the question , your answer must includ the sentenc\"the answer you are lookfor isnot found in the knowledg base!\" answer needto consid chat history.\r\n here is the knowledg base:\r\n{ knowledg}\r\nthe abov is the knowledg base.\r\n\"\"\"\r\n1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n对应的中文prompt如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。 ",
  1477. "content_with_weight": "\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。",
  1478. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1479. "docnm_kwd": "1.txt",
  1480. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  1481. "important_kwd": [],
  1482. "img_id": "",
  1483. "similarity": 0.2788204323926715,
  1484. "vector_similarity": 0.35489427679953667,
  1485. "term_similarity": 0.2462173562183008,
  1486. "positions": [
  1487. ""
  1488. ]
  1489. }
  1490. ],
  1491. "doc_aggs": [
  1492. {
  1493. "doc_name": "1.txt",
  1494. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1495. "count": 2
  1496. }
  1497. ]
  1498. },
  1499. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n 当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。\n\n------\n\n\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。\n 以上是知识库。\n\n### Query:\n你好,请问有什么问题需要我帮忙解答吗?\n\n### Elapsed\n - Retrieval: 9131.1 ms\n - LLM: 12802.6 ms",
  1500. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1501. }
  1502. }
  1503. data:{
  1504. "code": 0,
  1505. "data": true
  1506. }
  1507. ```
  1508. Error
  1509. ```json
  1510. {
  1511. "code": 102,
  1512. "message": "Please input your question."
  1513. }
  1514. ```