Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

http_api_reference.md 62KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141
  1. # DRAFT! HTTP API Reference
  2. **THE API REFERENCES BELOW ARE STILL UNDER DEVELOPMENT.**
  3. ---
  4. :::tip NOTE
  5. Dataset Management
  6. :::
  7. ---
  8. ## Create dataset
  9. **POST** `/api/v1/dataset`
  10. Creates a dataset.
  11. ### Request
  12. - Method: POST
  13. - URL: `/api/v1/dataset`
  14. - Headers:
  15. - `'content-Type: application/json'`
  16. - `'Authorization: Bearer {YOUR_API_KEY}'`
  17. - Body:
  18. - `"name"`: `string`
  19. - `"avatar"`: `string`
  20. - `"description"`: `string`
  21. - `"language"`: `string`
  22. - `"embedding_model"`: `string`
  23. - `"permission"`: `string`
  24. - `"parse_method"`: `string`
  25. - `"parser_config"`: `Dataset.ParserConfig`
  26. #### Request example
  27. ```bash
  28. curl --request POST \
  29. --url http://{address}/api/v1/dataset \
  30. --header 'Content-Type: application/json' \
  31. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  32. --data '
  33. {
  34. "name": "test",
  35. "chunk_method": "naive"
  36. }'
  37. ```
  38. #### Request parameters
  39. - `"name"`: (*Body parameter*), `string`, *Required*
  40. The unique name of the dataset to create. It must adhere to the following requirements:
  41. - Permitted characters include:
  42. - English letters (a-z, A-Z)
  43. - Digits (0-9)
  44. - "_" (underscore)
  45. - Must begin with an English letter or underscore.
  46. - Maximum 65,535 characters.
  47. - Case-insensitive.
  48. - `"avatar"`: (*Body parameter*), `string`
  49. Base64 encoding of the avatar.
  50. - `"description"`: (*Body parameter*), `string`
  51. A brief description of the dataset to create.
  52. - `"language"`: (*Body parameter*), `string`
  53. The language setting of the dataset to create. Available options:
  54. - `"English"` (Default)
  55. - `"Chinese"`
  56. - `"embedding_model"`: (*Body parameter*), `string`
  57. The name of the embedding model to use. For example: `"BAAI/bge-zh-v1.5"`
  58. - `"permission"`: (*Body parameter*), `string`
  59. Specifies who can access the dataset to create. You can set it only to `"me"` for now.
  60. - `"chunk_method"`: (*Body parameter*), `enum<string>`
  61. The chunking method of the dataset to create. Available options:
  62. - `"naive"`: General (default)
  63. - `"manual`: Manual
  64. - `"qa"`: Q&A
  65. - `"table"`: Table
  66. - `"paper"`: Paper
  67. - `"book"`: Book
  68. - `"laws"`: Laws
  69. - `"presentation"`: Presentation
  70. - `"picture"`: Picture
  71. - `"one"`:One
  72. - `"knowledge_graph"`: Knowledge Graph
  73. - `"email"`: Email
  74. - `"parser_config"`: (*Body parameter*)
  75. The configuration settings for the dataset parser. A `ParserConfig` object contains the following attributes:
  76. - `"chunk_token_count"`: Defaults to `128`.
  77. - `"layout_recognize"`: Defaults to `true`.
  78. - `"delimiter"`: Defaults to `"\n!?。;!?"`.
  79. - `"task_page_size"`: Defaults to `12`.
  80. ### Response
  81. Success:
  82. ```json
  83. {
  84. "code": 0,
  85. "data": {
  86. "avatar": null,
  87. "chunk_count": 0,
  88. "create_date": "Thu, 10 Oct 2024 05:57:37 GMT",
  89. "create_time": 1728539857641,
  90. "created_by": "69736c5e723611efb51b0242ac120007",
  91. "description": null,
  92. "document_count": 0,
  93. "embedding_model": "BAAI/bge-large-zh-v1.5",
  94. "id": "8d73076886cc11ef8c270242ac120006",
  95. "language": "English",
  96. "name": "test_1",
  97. "parse_method": "naive",
  98. "parser_config": {
  99. "pages": [
  100. [
  101. 1,
  102. 1000000
  103. ]
  104. ]
  105. },
  106. "permission": "me",
  107. "similarity_threshold": 0.2,
  108. "status": "1",
  109. "tenant_id": "69736c5e723611efb51b0242ac120007",
  110. "token_num": 0,
  111. "update_date": "Thu, 10 Oct 2024 05:57:37 GMT",
  112. "update_time": 1728539857641,
  113. "vector_similarity_weight": 0.3
  114. }
  115. }
  116. ```
  117. Failure:
  118. ```json
  119. {
  120. "code": 102,
  121. "message": "Duplicated knowledgebase name in creating dataset."
  122. }
  123. ```
  124. ---
  125. ## Delete datasets
  126. **DELETE** `/api/v1/dataset`
  127. Deletes datasets by ID.
  128. ### Request
  129. - Method: DELETE
  130. - URL: `/api/v1/dataset`
  131. - Headers:
  132. - `'content-Type: application/json'`
  133. - `'Authorization: Bearer {YOUR_API_KEY}'`
  134. - Body:
  135. - `"ids"`: `list[string]`
  136. #### Request example
  137. ```bash
  138. curl --request DELETE \
  139. --url http://{address}/api/v1/dataset \
  140. --header 'Content-Type: application/json' \
  141. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  142. --data '{"ids": ["test_1", "test_2"]}'
  143. ```
  144. #### Request parameters
  145. - `"ids"`: (*Body parameter*), `list[string]`
  146. The IDs of the datasets to delete. If it is not specified, all datasets will be deleted.
  147. ### Response
  148. Success:
  149. ```json
  150. {
  151. "code": 0
  152. }
  153. ```
  154. Failure:
  155. ```json
  156. {
  157. "code": 102,
  158. "message": "You don't own the dataset."
  159. }
  160. ```
  161. ---
  162. ## Update dataset
  163. **PUT** `/api/v1/dataset/{dataset_id}`
  164. Updates configurations for a specified dataset.
  165. ### Request
  166. - Method: PUT
  167. - URL: `/api/v1/dataset/{dataset_id}`
  168. - Headers:
  169. - `'content-Type: application/json'`
  170. - `'Authorization: Bearer {YOUR_API_KEY}'`
  171. - Body:
  172. - `"name"`: `string`
  173. - `"embedding_model"`: `string`
  174. - `"chunk_method"`: `enum<string>`
  175. #### Request example
  176. ```bash
  177. curl --request PUT \
  178. --url http://{address}/api/v1/dataset/{dataset_id} \
  179. --header 'Content-Type: application/json' \
  180. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  181. --data '
  182. {
  183. "name": "test",
  184. "embedding_model": "BAAI/bge-zh-v1.5",
  185. "chunk_method": "naive"
  186. }'
  187. ```
  188. #### Request parameters
  189. - `dataset_id`: (*Path parameter*)
  190. The ID of the dataset to update.
  191. - `"name"`: `string`
  192. The name of the dataset to update.
  193. - `"embedding_model"`: `string` The embedding model name to update.
  194. - Ensure that `"chunk_count"` is `0` before updating `"embedding_model"`.
  195. - `"chunk_method"`: `enum<string>` The chunking method for the dataset. Available options:
  196. - `"naive"`: General
  197. - `"manual`: Manual
  198. - `"qa"`: Q&A
  199. - `"table"`: Table
  200. - `"paper"`: Paper
  201. - `"book"`: Book
  202. - `"laws"`: Laws
  203. - `"presentation"`: Presentation
  204. - `"picture"`: Picture
  205. - `"one"`:One
  206. - `"knowledge_graph"`: Knowledge Graph
  207. - `"email"`: Email
  208. ### Response
  209. Success:
  210. ```json
  211. {
  212. "code": 0
  213. }
  214. ```
  215. Failure:
  216. ```json
  217. {
  218. "code": 102,
  219. "message": "Can't change tenant_id."
  220. }
  221. ```
  222. ---
  223. ## List datasets
  224. **GET** `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  225. Lists datasets.
  226. ### Request
  227. - Method: GET
  228. - URL: `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  229. - Headers:
  230. - `'Authorization: Bearer {YOUR_API_KEY}'`
  231. #### Request example
  232. ```bash
  233. curl --request GET \
  234. --url http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  235. --header 'Authorization: Bearer {YOUR_API_KEY}'
  236. ```
  237. #### Request parameters
  238. - `page`: (*Path parameter*)
  239. Specifies the page on which the datasets will be displayed. Defaults to `1`.
  240. - `page_size`: (*Path parameter*)
  241. The number of datasets on each page. Defaults to `1024`.
  242. - `orderby`: (*Path parameter*)
  243. The field by which datasets should be sorted. Available options:
  244. - `create_time` (default)
  245. - `update_time`
  246. - `desc`: (*Path parameter*)
  247. Indicates whether the retrieved datasets should be sorted in descending order. Defaults to `true`.
  248. - `name`: (*Path parameter*)
  249. The name of the dataset to retrieve.
  250. - `id`: (*Path parameter*)
  251. The ID of the dataset to retrieve.
  252. ### Response
  253. Success:
  254. ```json
  255. {
  256. "code": 0,
  257. "data": [
  258. {
  259. "avatar": "",
  260. "chunk_count": 59,
  261. "create_date": "Sat, 14 Sep 2024 01:12:37 GMT",
  262. "create_time": 1726276357324,
  263. "created_by": "69736c5e723611efb51b0242ac120007",
  264. "description": null,
  265. "document_count": 1,
  266. "embedding_model": "BAAI/bge-large-zh-v1.5",
  267. "id": "6e211ee0723611efa10a0242ac120007",
  268. "language": "English",
  269. "name": "mysql",
  270. "parse_method": "knowledge_graph",
  271. "parser_config": {
  272. "chunk_token_num": 8192,
  273. "delimiter": "\\n!?;。;!?",
  274. "entity_types": [
  275. "organization",
  276. "person",
  277. "location",
  278. "event",
  279. "time"
  280. ]
  281. },
  282. "permission": "me",
  283. "similarity_threshold": 0.2,
  284. "status": "1",
  285. "tenant_id": "69736c5e723611efb51b0242ac120007",
  286. "token_num": 12744,
  287. "update_date": "Thu, 10 Oct 2024 04:07:23 GMT",
  288. "update_time": 1728533243536,
  289. "vector_similarity_weight": 0.3
  290. }
  291. ]
  292. }
  293. ```
  294. Failure:
  295. ```json
  296. {
  297. "code": 102,
  298. "message": "The dataset doesn't exist"
  299. }
  300. ```
  301. ---
  302. :::tip API GROUPING
  303. File Management within Dataset
  304. :::
  305. ---
  306. ## Upload documents
  307. **POST** `/api/v1/dataset/{dataset_id}/document`
  308. Uploads documents to a specified dataset.
  309. ### Request
  310. - Method: POST
  311. - URL: `/api/v1/dataset/{dataset_id}/document`
  312. - Headers:
  313. - `'Content-Type: multipart/form-data'`
  314. - `'Authorization: Bearer {YOUR_API_KEY}'`
  315. - Form:
  316. - `'file=@{FILE_PATH}'`
  317. #### Request example
  318. ```bash
  319. curl --request POST \
  320. --url http://{address}/api/v1/dataset/{dataset_id}/document \
  321. --header 'Content-Type: multipart/form-data' \
  322. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  323. --form 'file=@./test1.txt' \
  324. --form 'file=@./test2.pdf'
  325. ```
  326. #### Request parameters
  327. - `dataset_id`: (*Path parameter*)
  328. The ID of the dataset to which the documents will be uploaded.
  329. - `'file'`: (*Body parameter*)
  330. A document to upload.
  331. ### Response
  332. Success:
  333. ```json
  334. {
  335. "code": 0
  336. }
  337. ```
  338. Failure:
  339. ```json
  340. {
  341. "code": 101,
  342. "message": "No file part!"
  343. }
  344. ```
  345. ---
  346. ## Update document
  347. **PUT** `/api/v1/dataset/{dataset_id}/info/{document_id}`
  348. Updates configurations for a specified document.
  349. ### Request
  350. - Method: PUT
  351. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
  352. - Headers:
  353. - `'content-Type: application/json'`
  354. - `'Authorization: Bearer {YOUR_API_KEY}'`
  355. - Body:
  356. - `"name"`:`string`
  357. - `"chunk_method"`:`string`
  358. - `"parser_config"`:`object`
  359. #### Request example
  360. ```bash
  361. curl --request PUT \
  362. --url http://{address}/api/v1/dataset/{dataset_id}/info/{document_id} \
  363. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  364. --header 'Content-Type: application/json' \
  365. --data '
  366. {
  367. "name": "manual.txt",
  368. "chunk_method": "manual",
  369. "parser_config": {"chunk_token_count": 128}
  370. }'
  371. ```
  372. #### Request parameters
  373. - `dataset_id`: (*Path parameter*)
  374. The ID of the associated dataset.
  375. - `document_id`: (*Path parameter*)
  376. The ID of the document to update.
  377. - `"name"`: (*Body parameter*), `string`
  378. - `"chunk_method"`: (*Body parameter*), `string`
  379. The parsing method to apply to the document:
  380. - `"naive"`: General
  381. - `"manual`: Manual
  382. - `"qa"`: Q&A
  383. - `"table"`: Table
  384. - `"paper"`: Paper
  385. - `"book"`: Book
  386. - `"laws"`: Laws
  387. - `"presentation"`: Presentation
  388. - `"picture"`: Picture
  389. - `"one"`: One
  390. - `"knowledge_graph"`: Knowledge Graph
  391. - `"email"`: Email
  392. - `"parser_config"`: (*Body parameter*), `object`
  393. The parsing configuration for the document:
  394. - `"chunk_token_count"`: Defaults to `128`.
  395. - `"layout_recognize"`: Defaults to `true`.
  396. - `"delimiter"`: Defaults to `"\n!?。;!?"`.
  397. - `"task_page_size"`: Defaults to `12`.
  398. ### Response
  399. Success:
  400. ```json
  401. {
  402. "code": 0
  403. }
  404. ```
  405. Failure:
  406. ```json
  407. {
  408. "code": 102,
  409. "message": "The dataset does not have the document."
  410. }
  411. ```
  412. ---
  413. ## Download document
  414. **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}`
  415. Downloads a document from a specified dataset.
  416. ### Request
  417. - Method: GET
  418. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
  419. - Headers:
  420. - `'Authorization: Bearer {YOUR_API_KEY}'`
  421. - Output:
  422. - `'{PATH_TO_THE_FILE}'`
  423. #### Request example
  424. ```bash
  425. curl --request GET \
  426. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id} \
  427. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  428. --output ./ragflow.txt
  429. ```
  430. #### Request parameters
  431. - `dataset_id`: (*Path parameter*)
  432. The associated dataset ID.
  433. - `documents_id`: (*Path parameter*)
  434. The ID of the document to download.
  435. ### Response
  436. Success:
  437. ```text
  438. test_2.
  439. ```
  440. Failure:
  441. ```json
  442. {
  443. "code": 102,
  444. "message": "You do not own the dataset 7898da028a0511efbf750242ac1220005."
  445. }
  446. ```
  447. ---
  448. ## List documents
  449. **GET** `/api/v1/dataset/{dataset_id}/info?offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}`
  450. Lists documents in a specified dataset.
  451. ### Request
  452. - Method: GET
  453. - URL: `/api/v1/dataset/{dataset_id}/info?keywords={keyword}&page={page}&page_size={limit}&orderby={orderby}&desc={desc}&name={name}`
  454. - Headers:
  455. - `'content-Type: application/json'`
  456. - `'Authorization: Bearer {YOUR_API_KEY}'`
  457. #### Request example
  458. ```bash
  459. curl --request GET \
  460. --url http://{address}/api/v1/dataset/{dataset_id}/info?keywords={keywords}&offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&id={document_id} \
  461. --header 'Authorization: Bearer {YOUR_API_KEY}'
  462. ```
  463. #### Request parameters
  464. - `dataset_id`: (*Path parameter*)
  465. The associated dataset ID.
  466. - `keywords`: (*Filter parameter*), `string`
  467. The keywords used to match document titles.
  468. - `offset`: (*Filter parameter*), `integer`
  469. The starting index for the documents to retrieve. Typically used in conjunction with `limit`. Defaults to `1`.
  470. - `limit`: (*Filter parameter*), `integer`
  471. The maximum number of documents to retrieve. Defaults to `1024`.
  472. - `orderby`: (*Filter parameter*), `string`
  473. The field by which documents should be sorted. Available options:
  474. - `create_time` (default)
  475. - `update_time`
  476. - `desc`: (*Filter parameter*), `boolean`
  477. Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`.
  478. - `id`: (*Filter parameter*), `string`
  479. The ID of the document to retrieve.
  480. ### Response
  481. Success:
  482. ```json
  483. {
  484. "code": 0,
  485. "data": {
  486. "docs": [
  487. {
  488. "chunk_count": 0,
  489. "create_date": "Mon, 14 Oct 2024 09:11:01 GMT",
  490. "create_time": 1728897061948,
  491. "created_by": "69736c5e723611efb51b0242ac120007",
  492. "id": "3bcfbf8a8a0c11ef8aba0242ac120006",
  493. "knowledgebase_id": "7898da028a0511efbf750242ac120005",
  494. "location": "Test_2.txt",
  495. "name": "Test_2.txt",
  496. "parser_config": {
  497. "chunk_token_count": 128,
  498. "delimiter": "\n!?。;!?",
  499. "layout_recognize": true,
  500. "task_page_size": 12
  501. },
  502. "parser_method": "naive",
  503. "process_begin_at": null,
  504. "process_duation": 0.0,
  505. "progress": 0.0,
  506. "progress_msg": "",
  507. "run": "0",
  508. "size": 7,
  509. "source_type": "local",
  510. "status": "1",
  511. "thumbnail": null,
  512. "token_count": 0,
  513. "type": "doc",
  514. "update_date": "Mon, 14 Oct 2024 09:11:01 GMT",
  515. "update_time": 1728897061948
  516. }
  517. ],
  518. "total": 1
  519. }
  520. }
  521. ```
  522. Failure:
  523. ```json
  524. {
  525. "code": 102,
  526. "message": "You don't own the dataset 7898da028a0511efbf750242ac1220005. "
  527. }
  528. ```
  529. ---
  530. ## Delete documents
  531. **DELETE** `/api/v1/dataset/{dataset_id}/document`
  532. Deletes documents by ID.
  533. ### Request
  534. - Method: DELETE
  535. - URL: `/api/v1/dataset/{dataset_id}/document`
  536. - Headers:
  537. - `'Content-Type: application/json'`
  538. - `'Authorization: Bearer {YOUR_API_KEY}'`
  539. - Body:
  540. - `"ids"`: `list[string]`
  541. #### Request example
  542. ```bash
  543. curl --request DELETE \
  544. --url http://{address}/api/v1/dataset/{dataset_id}/document \
  545. --header 'Content-Type: application/json' \
  546. --header 'Authorization: {YOUR_API_KEY}' \
  547. --data '
  548. {
  549. "ids": ["id_1","id_2"]
  550. }'
  551. ```
  552. #### Request parameters
  553. - `dataset_id`: (*Path parameter*)
  554. The associated dataset ID.
  555. - `"ids"`: (*Body parameter*), `list[string]`
  556. The IDs of the documents to delete. If it is not specified, all documents in the specified dataset will be deleted.
  557. ### Response
  558. Success:
  559. ```json
  560. {
  561. "code": 0
  562. }.
  563. ```
  564. Failure:
  565. ```json
  566. {
  567. "code": 102,
  568. "message": "You do not own the dataset 7898da028a0511efbf750242ac1220005."
  569. }
  570. ```
  571. ---
  572. ## Parse documents
  573. **POST** `/api/v1/dataset/{dataset_id}/chunk`
  574. Parses documents in a specified dataset.
  575. ### Request
  576. - Method: POST
  577. - URL: `/api/v1/dataset/{dataset_id}/chunk`
  578. - Headers:
  579. - `'content-Type: application/json'`
  580. - 'Authorization: Bearer {YOUR_API_KEY}'
  581. - Body:
  582. - `"document_ids"`: `list[string]`
  583. #### Request example
  584. ```bash
  585. curl --request POST \
  586. --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
  587. --header 'Content-Type: application/json' \
  588. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  589. --data '
  590. {
  591. "document_ids": ["97a5f1c2759811efaa500242ac120004","97ad64b6759811ef9fc30242ac120004"]
  592. }'
  593. ```
  594. #### Request parameters
  595. - `dataset_id`: (*Path parameter*)
  596. The dataset ID.
  597. - `"document_ids"`: (*Body parameter*), `list[string]`, *Required*
  598. The IDs of the documents to parse.
  599. ### Response
  600. Success:
  601. ```json
  602. {
  603. "code": 0
  604. }
  605. ```
  606. Failure:
  607. ```json
  608. {
  609. "code": 102,
  610. "message": "`document_ids` is required"
  611. }
  612. ```
  613. ---
  614. ## Stop parsing documents
  615. **DELETE** `/api/v1/dataset/{dataset_id}/chunk`
  616. Stops parsing specified documents.
  617. ### Request
  618. - Method: DELETE
  619. - URL: `/api/v1/dataset/{dataset_id}/chunk`
  620. - Headers:
  621. - `'content-Type: application/json'`
  622. - `'Authorization: Bearer {YOUR_API_KEY}'`
  623. - Body:
  624. - `"document_ids"`: `list[string]`
  625. #### Request example
  626. ```bash
  627. curl --request DELETE \
  628. --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
  629. --header 'Content-Type: application/json' \
  630. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  631. --data '
  632. {
  633. "document_ids": ["97a5f1c2759811efaa500242ac120004","97ad64b6759811ef9fc30242ac120004"]
  634. }'
  635. ```
  636. #### Request parameters
  637. - `dataset_id`: (*Path parameter*)
  638. The associated dataset ID.
  639. - `"document_ids"`: (*Body parameter*), `list[string]`, *Required*
  640. The IDs of the documents for which the parsing should be stopped.
  641. ### Response
  642. Success:
  643. ```json
  644. {
  645. "code": 0
  646. }
  647. ```
  648. Failure:
  649. ```json
  650. {
  651. "code": 102,
  652. "message": "`document_ids` is required"
  653. }
  654. ```
  655. ---
  656. ## Add chunks
  657. **POST** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  658. Adds a chunk to a specified document in a specified dataset.
  659. ### Request
  660. - Method: POST
  661. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  662. - Headers:
  663. - `'content-Type: application/json'`
  664. - `'Authorization: Bearer {YOUR_API_KEY}'`
  665. - Body:
  666. - `"content"`: `string`
  667. - `"important_keywords"`: `list[string]`
  668. #### Request example
  669. ```bash
  670. curl --request POST \
  671. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  672. --header 'Content-Type: application/json' \
  673. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  674. --data '
  675. {
  676. "content": "<SOME_CHUNK_CONTENT_HERE>"
  677. }'
  678. ```
  679. #### Request parameters
  680. - `dataset_id`: (*Path parameter*)
  681. The associated dataset ID.
  682. - `document_ids`: (*Path parameter*)
  683. The associated document ID.
  684. - `"content"`: (*Body parameter*), `string`, *Required*
  685. The text content of the chunk.
  686. - `"important_keywords`(*Body parameter*), `list[string]`
  687. The key terms or phrases to tag with the chunk.
  688. ### Response
  689. Success:
  690. ```json
  691. {
  692. "code": 0,
  693. "data": {
  694. "chunk": {
  695. "content": "ragflow content",
  696. "create_time": "2024-10-16 08:05:04",
  697. "create_timestamp": 1729065904.581025,
  698. "dataset_id": [
  699. "c7ee74067a2c11efb21c0242ac120006"
  700. ],
  701. "document_id": "5c5999ec7be811ef9cab0242ac120005",
  702. "id": "d78435d142bd5cf6704da62c778795c5",
  703. "important_keywords": []
  704. }
  705. }
  706. }
  707. ```
  708. Failure:
  709. ```json
  710. {
  711. "code": 102,
  712. "message": "`content` is required"
  713. }
  714. ```
  715. ---
  716. ## List chunks
  717. **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`
  718. Lists chunks in a specified document.
  719. ### Request
  720. - Method: GET
  721. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`
  722. - Headers:
  723. - `'Authorization: Bearer {YOUR_API_KEY}'`
  724. #### Request example
  725. ```bash
  726. curl --request GET \
  727. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id} \
  728. --header 'Authorization: Bearer {YOUR_API_KEY}'
  729. ```
  730. #### Request parameters
  731. - `dataset_id`: (*Path parameter*)
  732. The associated dataset ID.
  733. - `document_ids`: (*Path parameter*)
  734. The associated document ID.
  735. - `"keywords"`(*Filter parameter*), `string`
  736. The keywords used to match chunk content.
  737. - `"offset"`(*Filter parameter*), `string`
  738. The starting index for the chunks to retrieve. Defaults to `1`.
  739. - `"limit"`(*Filter parameter*), `integer`
  740. The maximum number of chunks to retrieve. Default: `1024`
  741. - `"id"`(*Filter parameter*), `string`
  742. The ID of the chunk to retrieve.
  743. ### Response
  744. Success:
  745. ```json
  746. {
  747. "code": 0,
  748. "data": {
  749. "chunks": [],
  750. "doc": {
  751. "chunk_num": 0,
  752. "create_date": "Sun, 29 Sep 2024 03:47:29 GMT",
  753. "create_time": 1727581649216,
  754. "created_by": "69736c5e723611efb51b0242ac120007",
  755. "id": "8cb781ec7e1511ef98ac0242ac120006",
  756. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  757. "location": "sunny_tomorrow.txt",
  758. "name": "sunny_tomorrow.txt",
  759. "parser_config": {
  760. "pages": [
  761. [
  762. 1,
  763. 1000000
  764. ]
  765. ]
  766. },
  767. "parser_id": "naive",
  768. "process_begin_at": "Tue, 15 Oct 2024 10:23:51 GMT",
  769. "process_duation": 1435.37,
  770. "progress": 0.0370833,
  771. "progress_msg": "\nTask has been received.",
  772. "run": "1",
  773. "size": 24,
  774. "source_type": "local",
  775. "status": "1",
  776. "thumbnail": null,
  777. "token_num": 0,
  778. "type": "doc",
  779. "update_date": "Tue, 15 Oct 2024 10:47:46 GMT",
  780. "update_time": 1728989266371
  781. },
  782. "total": 0
  783. }
  784. }
  785. ```
  786. Failure:
  787. ```json
  788. {
  789. "code": 102,
  790. "message": "You don't own the document 5c5999ec7be811ef9cab0242ac12000e5."
  791. }
  792. ```
  793. ---
  794. ## Delete chunks
  795. **DELETE** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  796. Deletes chunks by ID.
  797. ### Request
  798. - Method: DELETE
  799. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
  800. - Headers:
  801. - `'content-Type: application/json'`
  802. - `'Authorization: Bearer {YOUR_API_KEY}'`
  803. - Body:
  804. - `"chunk_ids"`: `list[string]`
  805. #### Request example
  806. ```bash
  807. curl --request DELETE \
  808. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
  809. --header 'Content-Type: application/json' \
  810. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  811. --data '
  812. {
  813. "chunk_ids": ["test_1", "test_2"]
  814. }'
  815. ```
  816. #### Request parameters
  817. - `dataset_id`: (*Path parameter*)
  818. The associated dataset ID.
  819. - `document_ids`: (*Path parameter*)
  820. The associated document ID.
  821. - `"chunk_ids"`: (*Body parameter*), `list[string]`
  822. The IDs of the chunks to delete. If it is not specified, all chunks of the specified document will be deleted.
  823. ### Response
  824. Success:
  825. ```json
  826. {
  827. "code": 0
  828. }
  829. ```
  830. Failure:
  831. ```json
  832. {
  833. "code": 102,
  834. "message": "`chunk_ids` is required"
  835. }
  836. ```
  837. ---
  838. ## Update chunk
  839. **PUT** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}`
  840. Updates content or configurations for a specified chunk.
  841. ### Request
  842. - Method: PUT
  843. - URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}`
  844. - Headers:
  845. - `'content-Type: application/json'`
  846. - `'Authorization: Bearer {YOUR_API_KEY}'`
  847. - Body:
  848. - `"content"`: `string`
  849. - `"important_keywords"`: `string`
  850. - `"available"`: `integer`
  851. #### Request example
  852. ```bash
  853. curl --request PUT \
  854. --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id} \
  855. --header 'Content-Type: application/json' \
  856. --header 'Authorization: {YOUR_API_KEY}' \
  857. --data '
  858. {
  859. "content": "ragflow123",
  860. "important_keywords": [],
  861. }'
  862. ```
  863. #### Request parameters
  864. - `dataset_id`: (*Path parameter*)
  865. The associated dataset ID.
  866. - `document_ids`: (*Path parameter*)
  867. The associated document ID.
  868. - `chunk_id`: (*Path parameter*)
  869. The ID of the chunk to update.
  870. - `"content"`: (*Body parameter*), `string`
  871. The text content of the chunk.
  872. - `"important_keywords"`: (*Body parameter*), `list[string]`
  873. A list of key terms or phrases to tag with the chunk.
  874. - `"available"`: (*Body parameter*) `boolean`
  875. The chunk's availability status in the dataset. Value options:
  876. - `true`: Available (default)
  877. - `false`: Unavailable
  878. ### Response
  879. Success:
  880. ```json
  881. {
  882. "code": 0
  883. }
  884. ```
  885. Failure:
  886. ```json
  887. {
  888. "code": 102,
  889. "message": "Can't find this chunk 29a2d9987e16ba331fb4d7d30d99b71d2"
  890. }
  891. ```
  892. ---
  893. ## Retrieve chunks
  894. **GET** `/api/v1/retrieval`
  895. Retrieves chunks from specified datasets.
  896. ### Request
  897. - Method: POST
  898. - URL: `/api/v1/retrieval`
  899. - Headers:
  900. - `'content-Type: application/json'`
  901. - `'Authorization: Bearer {YOUR_API_KEY}'`
  902. - Body:
  903. - `"question"`: `string`
  904. - `"dataset_ids"`: `list[string]`
  905. - `"document_ids"`: `list[string]`
  906. - `"offset"`: `integer`
  907. - `"limit"`: `integer`
  908. - `"similarity_threshold"`: `float`
  909. - `"vector_similarity_weight"`: `float`
  910. - `"top_k"`: `integer`
  911. - `"rerank_id"`: `string`
  912. - `"keyword"`: `boolean`
  913. - `"highlight"`: `boolean`
  914. #### Request example
  915. ```bash
  916. curl --request POST \
  917. --url http://{address}/api/v1/retrieval \
  918. --header 'Content-Type: application/json' \
  919. --header 'Authorization: {YOUR_API_KEY}' \
  920. --data '
  921. {
  922. "question": "What is advantage of ragflow?",
  923. "dataset_ids": ["b2a62730759d11ef987d0242ac120004"],
  924. "document_ids": ["77df9ef4759a11ef8bdd0242ac120004"]
  925. }'
  926. ```
  927. #### Request parameter
  928. - `"question"`: (*Body parameter*), `string`, *Required*
  929. The user query or query keywords.
  930. - `"dataset_ids"`: (*Body parameter*) `list[string]`, *Required*
  931. The IDs of the datasets to search from.
  932. - `"document_ids"`: (*Body parameter*), `list[string]`
  933. The IDs of the documents to search from.
  934. - `"offset"`: (*Body parameter*), `integer`
  935. The starting index for the documents to retrieve. Defaults to `1`.
  936. - `"limit"`: (*Body parameter*)
  937. The maximum number of chunks to retrieve. Defaults to `1024`.
  938. - `"similarity_threshold"`: (*Body parameter*)
  939. The minimum similarity score. Defaults to `0.2`.
  940. - `"vector_similarity_weight"`: (*Body parameter*)
  941. The weight of vector cosine similarity. Defaults to `0.3`. If x represents the vector cosine similarity, then (1 - x) is the term similarity weight.
  942. - `"top_k"`: (*Body parameter*)
  943. The number of chunks engaged in vector cosine computaton. Defaults to `1024`.
  944. - `"rerank_id"`: (*Body parameter*)
  945. The ID of the rerank model.
  946. - `"keyword"`: (*Body parameter*), `boolean`
  947. Indicates whether to enable keyword-based matching:
  948. - `true`: Enable keyword-based matching.
  949. - `false`: Disable keyword-based matching (default).
  950. - `"highlight"`: (*Body parameter*), `boolean`
  951. Specifies whether to enable highlighting of matched terms in the results:
  952. - `true`: Enable highlighting of matched terms.
  953. - `false`: Disable highlighting of matched terms (default).
  954. ### Response
  955. Success:
  956. ```json
  957. {
  958. "code": 0,
  959. "data": {
  960. "chunks": [
  961. {
  962. "content": "ragflow content",
  963. "content_ltks": "ragflow content",
  964. "document_id": "5c5999ec7be811ef9cab0242ac120005",
  965. "document_keyword": "1.txt",
  966. "highlight": "<em>ragflow</em> content",
  967. "id": "d78435d142bd5cf6704da62c778795c5",
  968. "img_id": "",
  969. "important_keywords": [
  970. ""
  971. ],
  972. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  973. "positions": [
  974. ""
  975. ],
  976. "similarity": 0.9669436601210759,
  977. "term_similarity": 1.0,
  978. "vector_similarity": 0.8898122004035864
  979. }
  980. ],
  981. "doc_aggs": [
  982. {
  983. "count": 1,
  984. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  985. "doc_name": "1.txt"
  986. }
  987. ],
  988. "total": 1
  989. }
  990. }
  991. ```
  992. Failure:
  993. ```json
  994. {
  995. "code": 102,
  996. "message": "`datasets` is required."
  997. }
  998. ```
  999. ---
  1000. :::tip API GROUPING
  1001. Chat Assistant Management
  1002. :::
  1003. ---
  1004. ## Create chat assistant
  1005. **POST** `/api/v1/chat`
  1006. Creates a chat assistant.
  1007. ### Request
  1008. - Method: POST
  1009. - URL: `/api/v1/chat`
  1010. - Headers:
  1011. - `'content-Type: application/json'`
  1012. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1013. - Body:
  1014. - `"name"`: `string`
  1015. - `"avatar"`: `string`
  1016. - `"dataset_ids"`: `list[string]`
  1017. - `"llm"`: `object`
  1018. - `"prompt"`: `object`
  1019. #### Request example
  1020. ```shell
  1021. curl --request POST \
  1022. --url http://{address}/api/v1/chat \
  1023. --header 'Content-Type: application/json' \
  1024. --header 'Authorization: Bearer {YOUR_API_KEY}'
  1025. --data '{
  1026. "dataset_ids": [
  1027. {
  1028. "avatar": null,
  1029. "chunk_count": 0,
  1030. "description": null,
  1031. "document_count": 0,
  1032. "embedding_model": "",
  1033. "id": "0b2cbc8c877f11ef89070242ac120005",
  1034. "language": "English",
  1035. "name": "Test_assistant",
  1036. "parse_method": "naive",
  1037. "parser_config": {
  1038. "pages": [
  1039. [
  1040. 1,
  1041. 1000000
  1042. ]
  1043. ]
  1044. },
  1045. "permission": "me",
  1046. "tenant_id": "4fb0cd625f9311efba4a0242ac120006"
  1047. }
  1048. ],
  1049. "name":"new_chat_1"
  1050. }'
  1051. ```
  1052. #### Request parameters
  1053. - `"name"`: (*Body parameter*), `string`, *Required*
  1054. The name of the chat assistant.
  1055. - `"avatar"`: (*Body parameter*), `string`
  1056. Base64 encoding of the avatar.
  1057. - `"dataset_ids"`: (*Body parameter*), `list[string]`
  1058. The IDs of the associated datasets.
  1059. - `"llm"`: (*Body parameter*), `object`
  1060. The LLM settings for the chat assistant to create. If it is not explicitly set, a dictionary with the following values will be generated as the default. An `llm` object contains the following attributes:
  1061. - `"model_name"`, `string`
  1062. The chat model name. If not set, the user's default chat model will be used.
  1063. - `"temperature"`: `float`
  1064. Controls the randomness of the model's predictions. A lower temperature increases the model's confidence in its responses; a higher temperature increases creativity and diversity. Defaults to `0.1`.
  1065. - `"top_p"`: `float`
  1066. Also known as “nucleus sampling”, this parameter sets a threshold to select a smaller set of words to sample from. It focuses on the most likely words, cutting off the less probable ones. Defaults to `0.3`
  1067. - `"presence_penalty"`: `float`
  1068. This discourages the model from repeating the same information by penalizing words that have already appeared in the conversation. Defaults to `0.2`.
  1069. - `"frequency penalty"`: `float`
  1070. Similar to the presence penalty, this reduces the model’s tendency to repeat the same words frequently. Defaults to `0.7`.
  1071. - `"max_token"`: `integer`
  1072. The maximum length of the model’s output, measured in the number of tokens (words or pieces of words). Defaults to `512`.
  1073. - `"prompt"`: (*Body parameter*), `object`
  1074. Instructions for the LLM to follow. A `prompt` object contains the following attributes:
  1075. - `"similarity_threshold"`: `float` RAGFlow uses a hybrid of weighted keyword similarity and vector cosine similarity during retrieval. This argument sets the threshold for similarities between the user query and chunks. If a similarity score falls below this threshold, the corresponding chunk will be excluded from the results. The default value is `0.2`.
  1076. - `"keywords_similarity_weight"`: `float` This argument sets the weight of keyword similarity in the hybrid similarity score with vector cosine similarity or reranking model similarity. By adjusting this weight, you can control the influence of keyword similarity in relation to other similarity measures. The default value is `0.7`.
  1077. - `"top_n"`: `int` This argument specifies the number of top chunks with similarity scores above the `similarity_threshold` that are fed to the LLM. The LLM will *only* access these 'top N' chunks. The default value is `8`.
  1078. - `"variables"`: `object[]` This argument lists the variables to use in the 'System' field of **Chat Configurations**. Note that:
  1079. - `"knowledge"` is a reserved variable, which represents the retrieved chunks.
  1080. - All the variables in 'System' should be curly bracketed.
  1081. - The default value is `[{"key": "knowledge", "optional": true}]`.
  1082. - `"rerank_model"`: `string` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used.
  1083. - `"empty_response"`: `string` If nothing is retrieved in the dataset for the user's question, this will be used as the response. To allow the LLM to improvise when nothing is found, leave this blank.
  1084. - `"opener"`: `string` The opening greeting for the user. Defaults to `"Hi! I am your assistant, can I help you?"`.
  1085. - `"show_quote`: `boolean` Indicates whether the source of text should be displayed. Defaults to `true`.
  1086. - `"prompt"`: `string` The prompt content. Defaults to `You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
  1087. Here is the knowledge base:
  1088. {knowledge}
  1089. The above is the knowledge base.`
  1090. ### Response
  1091. Success:
  1092. ```json
  1093. {
  1094. "code": 0,
  1095. "data": {
  1096. "avatar": "",
  1097. "create_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  1098. "create_time": 1728617004635,
  1099. "description": "A helpful Assistant",
  1100. "do_refer": "1",
  1101. "id": "2ca4b22e878011ef88fe0242ac120005",
  1102. "knowledgebases": [
  1103. {
  1104. "avatar": null,
  1105. "chunk_count": 0,
  1106. "description": null,
  1107. "document_count": 0,
  1108. "embedding_model": "",
  1109. "id": "0b2cbc8c877f11ef89070242ac120005",
  1110. "language": "English",
  1111. "name": "Test_assistant",
  1112. "parse_method": "naive",
  1113. "parser_config": {
  1114. "pages": [
  1115. [
  1116. 1,
  1117. 1000000
  1118. ]
  1119. ]
  1120. },
  1121. "permission": "me",
  1122. "tenant_id": "4fb0cd625f9311efba4a0242ac120006"
  1123. }
  1124. ],
  1125. "language": "English",
  1126. "llm": {
  1127. "frequency_penalty": 0.7,
  1128. "max_tokens": 512,
  1129. "model_name": "deepseek-chat___OpenAI-API@OpenAI-API-Compatible",
  1130. "presence_penalty": 0.4,
  1131. "temperature": 0.1,
  1132. "top_p": 0.3
  1133. },
  1134. "name": "new_chat_1",
  1135. "prompt": {
  1136. "empty_response": "Sorry! 知识库中未找到相关内容!",
  1137. "keywords_similarity_weight": 0.3,
  1138. "opener": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  1139. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n {knowledge}\n 以上是知识库。",
  1140. "rerank_model": "",
  1141. "similarity_threshold": 0.2,
  1142. "top_n": 6,
  1143. "variables": [
  1144. {
  1145. "key": "knowledge",
  1146. "optional": false
  1147. }
  1148. ]
  1149. },
  1150. "prompt_type": "simple",
  1151. "status": "1",
  1152. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1153. "top_k": 1024,
  1154. "update_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  1155. "update_time": 1728617004635
  1156. }
  1157. }
  1158. ```
  1159. Failure:
  1160. ```json
  1161. {
  1162. "code": 102,
  1163. "message": "Duplicated chat name in creating dataset."
  1164. }
  1165. ```
  1166. ---
  1167. ## Update chat assistant
  1168. **PUT** `/api/v1/chat/{chat_id}`
  1169. Updates configurations for a specified chat assistant.
  1170. ### Request
  1171. - Method: PUT
  1172. - URL: `/api/v1/chat/{chat_id}`
  1173. - Headers:
  1174. - `'content-Type: application/json'`
  1175. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1176. - Body:
  1177. - `"name"`: `string`
  1178. - `"avatar"`: `string`
  1179. - `"dataset_ids"`: `list[string]`
  1180. - `"llm"`: `object`
  1181. - `"prompt"`: `object`
  1182. #### Request example
  1183. ```bash
  1184. curl --request PUT \
  1185. --url http://{address}/api/v1/chat/{chat_id} \
  1186. --header 'Content-Type: application/json' \
  1187. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  1188. --data '
  1189. {
  1190. "name":"Test"
  1191. }'
  1192. ```
  1193. #### Parameters
  1194. - `chat_id`: (*Path parameter*)
  1195. The ID of the chat assistant to update.
  1196. - `"name"`: (*Body parameter*), `string`, *Required*
  1197. The name of the chat assistant.
  1198. - `"avatar"`: (*Body parameter*), `string`
  1199. Base64 encoding of the avatar.
  1200. - `"dataset_ids"`: (*Body parameter*), `list[string]`
  1201. The IDs of the associated datasets.
  1202. - `"llm"`: (*Body parameter*), `object`
  1203. The LLM settings for the chat assistant to create. If it is not explicitly set, a dictionary with the following values will be generated as the default. An `llm` object contains the following attributes:
  1204. - `"model_name"`, `string`
  1205. The chat model name. If not set, the user's default chat model will be used.
  1206. - `"temperature"`: `float`
  1207. Controls the randomness of the model's predictions. A lower temperature increases the model's confidence in its responses; a higher temperature increases creativity and diversity. Defaults to `0.1`.
  1208. - `"top_p"`: `float`
  1209. Also known as “nucleus sampling”, this parameter sets a threshold to select a smaller set of words to sample from. It focuses on the most likely words, cutting off the less probable ones. Defaults to `0.3`
  1210. - `"presence_penalty"`: `float`
  1211. This discourages the model from repeating the same information by penalizing words that have already appeared in the conversation. Defaults to `0.2`.
  1212. - `"frequency penalty"`: `float`
  1213. Similar to the presence penalty, this reduces the model’s tendency to repeat the same words frequently. Defaults to `0.7`.
  1214. - `"max_token"`: `integer`
  1215. The maximum length of the model’s output, measured in the number of tokens (words or pieces of words). Defaults to `512`.
  1216. - `"prompt"`: (*Body parameter*), `object`
  1217. Instructions for the LLM to follow. A `prompt` object contains the following attributes:
  1218. - `"similarity_threshold"`: `float` RAGFlow uses a hybrid of weighted keyword similarity and vector cosine similarity during retrieval. This argument sets the threshold for similarities between the user query and chunks. If a similarity score falls below this threshold, the corresponding chunk will be excluded from the results. The default value is `0.2`.
  1219. - `"keywords_similarity_weight"`: `float` This argument sets the weight of keyword similarity in the hybrid similarity score with vector cosine similarity or reranking model similarity. By adjusting this weight, you can control the influence of keyword similarity in relation to other similarity measures. The default value is `0.7`.
  1220. - `"top_n"`: `int` This argument specifies the number of top chunks with similarity scores above the `similarity_threshold` that are fed to the LLM. The LLM will *only* access these 'top N' chunks. The default value is `8`.
  1221. - `"variables"`: `object[]` This argument lists the variables to use in the 'System' field of **Chat Configurations**. Note that:
  1222. - `"knowledge"` is a reserved variable, which represents the retrieved chunks.
  1223. - All the variables in 'System' should be curly bracketed.
  1224. - The default value is `[{"key": "knowledge", "optional": true}]`
  1225. - `"rerank_model"`: `string` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used.
  1226. - `"empty_response"`: `string` If nothing is retrieved in the dataset for the user's question, this will be used as the response. To allow the LLM to improvise when nothing is found, leave this blank.
  1227. - `"opener"`: `string` The opening greeting for the user. Defaults to `"Hi! I am your assistant, can I help you?"`.
  1228. - `"show_quote`: `boolean` Indicates whether the source of text should be displayed. Defaults to `true`.
  1229. - `"prompt"`: `string` The prompt content. Defaults to `You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
  1230. Here is the knowledge base:
  1231. {knowledge}
  1232. The above is the knowledge base.`
  1233. ### Response
  1234. Success:
  1235. ```json
  1236. {
  1237. "code": 0
  1238. }
  1239. ```
  1240. Failure:
  1241. ```json
  1242. {
  1243. "code": 102,
  1244. "message": "Duplicated chat name in updating dataset."
  1245. }
  1246. ```
  1247. ---
  1248. ## Delete chat assistants
  1249. **DELETE** `/api/v1/chat`
  1250. Deletes chat assistants by ID.
  1251. ### Request
  1252. - Method: DELETE
  1253. - URL: `/api/v1/chat`
  1254. - Headers:
  1255. - `'content-Type: application/json'`
  1256. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1257. - Body:
  1258. - `"ids"`: `list[string]`
  1259. #### Request example
  1260. ```bash
  1261. curl --request DELETE \
  1262. --url http://{address}/api/v1/chat \
  1263. --header 'Content-Type: application/json' \
  1264. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  1265. --data '
  1266. {
  1267. "ids": ["test_1", "test_2"]
  1268. }'
  1269. ```
  1270. #### Request parameters
  1271. - `"ids"`: (*Body parameter*), `list[string]`
  1272. The IDs of the chat assistants to delete. If it is not specified, all chat assistants in the system will be deleted.
  1273. ### Response
  1274. Success:
  1275. ```json
  1276. {
  1277. "code": 0
  1278. }
  1279. ```
  1280. Failure:
  1281. ```json
  1282. {
  1283. "code": 102,
  1284. "message": "ids are required"
  1285. }
  1286. ```
  1287. ---
  1288. ## List chat assistants
  1289. **GET** `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={chat_name}&id={chat_id}`
  1290. Lists chat assistants.
  1291. ### Request
  1292. - Method: GET
  1293. - URL: `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1294. - Headers:
  1295. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1296. #### Request example
  1297. ```bash
  1298. curl --request GET \
  1299. --url http://{address}/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
  1300. --header 'Authorization: Bearer {YOUR_API_KEY}'
  1301. ```
  1302. #### Request parameters
  1303. - `page`: (*Path parameter*), `integer`
  1304. Specifies the page on which the chat assistants will be displayed. Defaults to `1`.
  1305. - `page_size`: (*Path parameter*), `integer`
  1306. The number of chat assistants on each page. Defaults to `1024`.
  1307. - `orderby`: (*Path parameter*), `string`
  1308. The attribute by which the results are sorted. Available options:
  1309. - `create_time` (default)
  1310. - `update_time`
  1311. - `"desc"`: (*Path parameter*), `boolean`
  1312. Indicates whether the retrieved chat assistants should be sorted in descending order. Defaults to `true`.
  1313. - `id`: (*Path parameter*), `string`
  1314. The ID of the chat assistant to retrieve.
  1315. - `name`: (*Path parameter*), `string`
  1316. The name of the chat assistant to retrieve.
  1317. ### Response
  1318. Success:
  1319. ```json
  1320. {
  1321. "code": 0,
  1322. "data": [
  1323. {
  1324. "avatar": "",
  1325. "create_date": "Fri, 11 Oct 2024 03:23:24 GMT",
  1326. "create_time": 1728617004635,
  1327. "description": "A helpful Assistant",
  1328. "do_refer": "1",
  1329. "id": "2ca4b22e878011ef88fe0242ac120005",
  1330. "knowledgebases": [
  1331. {
  1332. "avatar": "",
  1333. "chunk_num": 0,
  1334. "create_date": "Fri, 11 Oct 2024 03:15:18 GMT",
  1335. "create_time": 1728616518986,
  1336. "created_by": "69736c5e723611efb51b0242ac120007",
  1337. "description": "",
  1338. "doc_num": 0,
  1339. "embd_id": "BAAI/bge-large-zh-v1.5",
  1340. "id": "0b2cbc8c877f11ef89070242ac120005",
  1341. "language": "English",
  1342. "name": "test_delete_chat",
  1343. "parser_config": {
  1344. "chunk_token_count": 128,
  1345. "delimiter": "\n!?。;!?",
  1346. "layout_recognize": true,
  1347. "task_page_size": 12
  1348. },
  1349. "parser_id": "naive",
  1350. "permission": "me",
  1351. "similarity_threshold": 0.2,
  1352. "status": "1",
  1353. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1354. "token_num": 0,
  1355. "update_date": "Fri, 11 Oct 2024 04:01:31 GMT",
  1356. "update_time": 1728619291228,
  1357. "vector_similarity_weight": 0.3
  1358. }
  1359. ],
  1360. "language": "English",
  1361. "llm": {
  1362. "frequency_penalty": 0.7,
  1363. "max_tokens": 512,
  1364. "model_name": "deepseek-chat___OpenAI-API@OpenAI-API-Compatible",
  1365. "presence_penalty": 0.4,
  1366. "temperature": 0.1,
  1367. "top_p": 0.3
  1368. },
  1369. "name": "Test",
  1370. "prompt": {
  1371. "empty_response": "Sorry! 知识库中未找到相关内容!",
  1372. "keywords_similarity_weight": 0.3,
  1373. "opener": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  1374. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n {knowledge}\n 以上是知识库。",
  1375. "rerank_model": "",
  1376. "similarity_threshold": 0.2,
  1377. "top_n": 6,
  1378. "variables": [
  1379. {
  1380. "key": "knowledge",
  1381. "optional": false
  1382. }
  1383. ]
  1384. },
  1385. "prompt_type": "simple",
  1386. "status": "1",
  1387. "tenant_id": "69736c5e723611efb51b0242ac120007",
  1388. "top_k": 1024,
  1389. "update_date": "Fri, 11 Oct 2024 03:47:58 GMT",
  1390. "update_time": 1728618478392
  1391. }
  1392. ]
  1393. }
  1394. ```
  1395. Failure:
  1396. ```json
  1397. {
  1398. "code": 102,
  1399. "message": "The chat doesn't exist"
  1400. }
  1401. ```
  1402. ## Create session
  1403. **POST** `/api/v1/chat/{chat_id}/session`
  1404. Creates a chat session.
  1405. ### Request
  1406. - Method: POST
  1407. - URL: `/api/v1/chat/{chat_id}/session`
  1408. - Headers:
  1409. - `'content-Type: application/json'`
  1410. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1411. - Body:
  1412. - `"name"`: `string`
  1413. #### Request example
  1414. ```bash
  1415. curl --request POST \
  1416. --url http://{address}/api/v1/chat/{chat_id}/session \
  1417. --header 'Content-Type: application/json' \
  1418. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  1419. --data '
  1420. {
  1421. "name": "new session"
  1422. }'
  1423. ```
  1424. #### Request parameters
  1425. - `chat_id`: (*Path parameter*)
  1426. The ID of the associated chat assistant.
  1427. - `"name"`: (*Body parameter*), `string`
  1428. The name of the chat session to create.
  1429. ### Response
  1430. Success:
  1431. ```json
  1432. {
  1433. "code": 0,
  1434. "data": {
  1435. "chat_id": "2ca4b22e878011ef88fe0242ac120005",
  1436. "create_date": "Fri, 11 Oct 2024 08:46:14 GMT",
  1437. "create_time": 1728636374571,
  1438. "id": "4606b4ec87ad11efbc4f0242ac120006",
  1439. "messages": [
  1440. {
  1441. "content": "Hi! I am your assistant,can I help you?",
  1442. "role": "assistant"
  1443. }
  1444. ],
  1445. "name": "new session",
  1446. "update_date": "Fri, 11 Oct 2024 08:46:14 GMT",
  1447. "update_time": 1728636374571
  1448. }
  1449. }
  1450. ```
  1451. Failure:
  1452. ```json
  1453. {
  1454. "code": 102,
  1455. "message": "Name can not be empty."
  1456. }
  1457. ```
  1458. ---
  1459. ## Update session
  1460. **PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
  1461. Updates a chat session.
  1462. ### Request
  1463. - Method: PUT
  1464. - URL: `/api/v1/chat/{chat_id}/session/{session_id}`
  1465. - Headers:
  1466. - `'content-Type: application/json'`
  1467. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1468. - Body:
  1469. - `"name`: string
  1470. #### Request example
  1471. ```bash
  1472. curl --request PUT \
  1473. --url http://{address}/api/v1/chat/{chat_id}/session/{session_id} \
  1474. --header 'Content-Type: application/json' \
  1475. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  1476. --data '
  1477. {
  1478. "name": "Updated session"
  1479. }'
  1480. ```
  1481. #### Request Parameter
  1482. - `chat_id`: (*Path parameter*)
  1483. The ID of the associated chat assistant.
  1484. - `session_id`: (*Path parameter*)
  1485. The ID of the session to update.
  1486. - `"name"`: (*Body Parameter), `string`
  1487. The name of the session to update.
  1488. ### Response
  1489. Success:
  1490. ```json
  1491. {
  1492. "code": 0
  1493. }
  1494. ```
  1495. Failure:
  1496. ```json
  1497. {
  1498. "code": 102,
  1499. "message": "Name cannot be empty."
  1500. }
  1501. ```
  1502. ---
  1503. ## List sessions
  1504. **GET** `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={session_name}&id={session_id}`
  1505. Lists sessions associated with a specified chat assistant.
  1506. ### Request
  1507. - Method: GET
  1508. - URL: `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
  1509. - Headers:
  1510. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1511. #### Request example
  1512. ```bash
  1513. curl --request GET \
  1514. --url http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={session_name}&id={session_id} \
  1515. --header 'Authorization: Bearer {YOUR_API_KEY}'
  1516. ```
  1517. #### Request Parameters
  1518. - `chat_id`: (*Path parameter*)
  1519. The ID of the associated chat assistant.
  1520. - `page`: (*Filter parameter*), `integer`
  1521. Specifies the page on which the sessions will be displayed. Defaults to `1`.
  1522. - `page_size`: (*Filter parameter*), `integer`
  1523. The number of sessions on each page. Defaults to `1024`.
  1524. - `orderby`: (*Filter parameter*), `string`
  1525. The field by which sessions should be sorted. Available options:
  1526. - `create_time` (default)
  1527. - `update_time`
  1528. - `desc`: (*Filter parameter*), `boolean`
  1529. Indicates whether the retrieved sessions should be sorted in descending order. Defaults to `true`.
  1530. - `name`: (*Filter parameter*) `string`
  1531. The name of the chat session to retrieve.
  1532. - `id`: (*Filter parameter*), `string`
  1533. The ID of the chat session to retrieve.
  1534. ### Response
  1535. Success:
  1536. ```json
  1537. {
  1538. "code": 0,
  1539. "data": [
  1540. {
  1541. "chat": "2ca4b22e878011ef88fe0242ac120005",
  1542. "create_date": "Fri, 11 Oct 2024 08:46:43 GMT",
  1543. "create_time": 1728636403974,
  1544. "id": "578d541e87ad11ef96b90242ac120006",
  1545. "messages": [
  1546. {
  1547. "content": "Hi! I am your assistant,can I help you?",
  1548. "role": "assistant"
  1549. }
  1550. ],
  1551. "name": "new session",
  1552. "update_date": "Fri, 11 Oct 2024 08:46:43 GMT",
  1553. "update_time": 1728636403974
  1554. }
  1555. ]
  1556. }
  1557. ```
  1558. Failure:
  1559. ```json
  1560. {
  1561. "code": 102,
  1562. "message": "The session doesn't exist"
  1563. }
  1564. ```
  1565. ---
  1566. ## Delete sessions
  1567. **DELETE** `/api/v1/chat/{chat_id}/session`
  1568. Deletes sessions by ID.
  1569. ### Request
  1570. - Method: DELETE
  1571. - URL: `/api/v1/chat/{chat_id}/session`
  1572. - Headers:
  1573. - `'content-Type: application/json'`
  1574. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1575. - Body:
  1576. - `"ids"`: `list[string]`
  1577. #### Request example
  1578. ```bash
  1579. # Either id or name must be provided, but not both.
  1580. curl --request DELETE \
  1581. --url http://{address}/api/v1/chat/{chat_id}/session \
  1582. --header 'Content-Type: application/json' \
  1583. --header 'Authorization: Bear {YOUR_API_KEY}' \
  1584. --data '
  1585. {
  1586. "ids": ["test_1", "test_2"]
  1587. }'
  1588. ```
  1589. #### Request Parameters
  1590. - `chat_id`: (*Path parameter*)
  1591. The ID of the associated chat assistant.
  1592. - `"ids"`: (*Body Parameter*), `list[string]`
  1593. The IDs of the sessions to delete. If it is not specified, all sessions associated with the specified chat assistant will be deleted.
  1594. ### Response
  1595. Success:
  1596. ```json
  1597. {
  1598. "code": 0
  1599. }
  1600. ```
  1601. Failure:
  1602. ```json
  1603. {
  1604. "code": 102,
  1605. "message": "The chat doesn't own the session"
  1606. }
  1607. ```
  1608. ---
  1609. ## Converse
  1610. **POST** `/api/v1/chat/{chat_id}/completion`
  1611. Asks a question to start a conversation.
  1612. ### Request
  1613. - Method: POST
  1614. - URL: `/api/v1/chat/{chat_id}/completion`
  1615. - Headers:
  1616. - `'content-Type: application/json'`
  1617. - `'Authorization: Bearer {YOUR_API_KEY}'`
  1618. - Body:
  1619. - `"question"`: `string`
  1620. - `"stream"`: `boolean`
  1621. - `"session_id"`: `string`
  1622. #### Request example
  1623. ```bash
  1624. curl --request POST \
  1625. --url http://{address} /api/v1/chat/{chat_id}/completion \
  1626. --header 'Content-Type: application/json' \
  1627. --header 'Authorization: Bearer {YOUR_API_KEY}' \
  1628. --data-binary '
  1629. {
  1630. "question": "Hello!",
  1631. "stream": true
  1632. }'
  1633. ```
  1634. #### Request Parameters
  1635. - `chat_id`: (*Path parameter*)
  1636. The ID of the associated chat assistant.
  1637. - `"question"`: (*Body Parameter*), `string` *Required*
  1638. The question to start an AI chat.
  1639. - `"stream"`: (*Body Parameter*), `boolean`
  1640. Indicates whether to output responses in a streaming way:
  1641. - `true`: Enable streaming.
  1642. - `false`: (Default) Disable streaming.
  1643. - `"session_id"`: (*Body Parameter*)
  1644. The ID of session. If it is not provided, a new session will be generated.
  1645. ### Response
  1646. Success:
  1647. ```json
  1648. data: {
  1649. "code": 0,
  1650. "data": {
  1651. "answer": "您好!有什么具体的问题或者需要的帮助",
  1652. "reference": {},
  1653. "audio_binary": null,
  1654. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1655. }
  1656. }
  1657. data: {
  1658. "code": 0,
  1659. "data": {
  1660. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助",
  1661. "reference": {},
  1662. "audio_binary": null,
  1663. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1664. }
  1665. }
  1666. data: {
  1667. "code": 0,
  1668. "data": {
  1669. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取",
  1670. "reference": {},
  1671. "audio_binary": null,
  1672. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1673. }
  1674. }
  1675. data: {
  1676. "code": 0,
  1677. "data": {
  1678. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
  1679. "reference": {},
  1680. "audio_binary": null,
  1681. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1682. }
  1683. }
  1684. data: {
  1685. "code": 0,
  1686. "data": {
  1687. "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗 ##0$$?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
  1688. "reference": {
  1689. "total": 19,
  1690. "chunks": [
  1691. {
  1692. "chunk_id": "9d87f9d70a0d8a7565694a81fd4c5d5f",
  1693. "content_ltks": "当所有知识库内容都与问题无关时 ,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n以下是知识库:\r\n{knowledg}\r\n以上是知识库\r\n\"\"\"\r\n 1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n总结\r\n通过上面的介绍,可以对开源的 ragflow有了一个大致的了解,与前面的有道qanyth整体流程还是比较类似的。 ",
  1694. "content_with_weight": "当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。",
  1695. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1696. "docnm_kwd": "1.txt",
  1697. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  1698. "important_kwd": [],
  1699. "img_id": "",
  1700. "similarity": 0.38337178633282265,
  1701. "vector_similarity": 0.3321336754679629,
  1702. "term_similarity": 0.4053309767034769,
  1703. "positions": [
  1704. ""
  1705. ]
  1706. },
  1707. {
  1708. "chunk_id": "895d34de762e674b43e8613c6fb54c6d",
  1709. "content_ltks": "\r\n\r\n实际内容可能会超过大模型的输入token数量,因此在调用大模型前会调用api/db/servic/dialog_service.py文件中 messag_fit_in ()根据大模型可用的 token数量进行过滤。这部分与有道的 qanyth的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt ,即可作为大模型的输入了 ,默认的英文prompt如下所示:\r\n\r\n\"\"\"\r\nyou are an intellig assistant. pleas summar the content of the knowledg base to answer the question. pleas list thedata in the knowledg base and answer in detail. when all knowledg base content is irrelev to the question , your answer must includ the sentenc\"the answer you are lookfor isnot found in the knowledg base!\" answer needto consid chat history.\r\n here is the knowledg base:\r\n{ knowledg}\r\nthe abov is the knowledg base.\r\n\"\"\"\r\n1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n对应的中文prompt如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。 ",
  1710. "content_with_weight": "\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。",
  1711. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1712. "docnm_kwd": "1.txt",
  1713. "kb_id": "c7ee74067a2c11efb21c0242ac120006",
  1714. "important_kwd": [],
  1715. "img_id": "",
  1716. "similarity": 0.2788204323926715,
  1717. "vector_similarity": 0.35489427679953667,
  1718. "term_similarity": 0.2462173562183008,
  1719. "positions": [
  1720. ""
  1721. ]
  1722. }
  1723. ],
  1724. "doc_aggs": [
  1725. {
  1726. "doc_name": "1.txt",
  1727. "doc_id": "5c5999ec7be811ef9cab0242ac120005",
  1728. "count": 2
  1729. }
  1730. ]
  1731. },
  1732. "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n 当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。\n\n------\n\n\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。\n 以上是知识库。\n\n### Query:\n你好,请问有什么问题需要我帮忙解答吗?\n\n### Elapsed\n - Retrieval: 9131.1 ms\n - LLM: 12802.6 ms",
  1733. "id": "31153052-7bac-4741-a513-ed07d853f29e"
  1734. }
  1735. }
  1736. data:{
  1737. "code": 0,
  1738. "data": true
  1739. }
  1740. ```
  1741. Failure:
  1742. ```json
  1743. {
  1744. "code": 102,
  1745. "message": "Please input your question."
  1746. }
  1747. ```