You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

doc.py 46KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import pathlib
  17. import datetime
  18. from api.db.services.dialog_service import keyword_extraction
  19. from rag.app.qa import rmPrefix, beAdoc
  20. from rag.nlp import rag_tokenizer
  21. from api.db import LLMType, ParserType
  22. from api.db.services.llm_service import TenantLLMService
  23. from api import settings
  24. import hashlib
  25. import re
  26. from api.utils.api_utils import token_required
  27. from api.db.db_models import Task
  28. from api.db.services.task_service import TaskService, queue_tasks
  29. from api.utils.api_utils import server_error_response
  30. from api.utils.api_utils import get_result, get_error_data_result
  31. from io import BytesIO
  32. from flask import request, send_file
  33. from api.db import FileSource, TaskStatus, FileType
  34. from api.db.db_models import File
  35. from api.db.services.document_service import DocumentService
  36. from api.db.services.file2document_service import File2DocumentService
  37. from api.db.services.file_service import FileService
  38. from api.db.services.knowledgebase_service import KnowledgebaseService
  39. from api.utils.api_utils import construct_json_result, get_parser_config
  40. from rag.nlp import search
  41. from rag.utils import rmSpace
  42. from rag.utils.storage_factory import STORAGE_IMPL
  43. MAXIMUM_OF_UPLOADING_FILES = 256
  44. @manager.route("/datasets/<dataset_id>/documents", methods=["POST"]) # noqa: F821
  45. @token_required
  46. def upload(dataset_id, tenant_id):
  47. """
  48. Upload documents to a dataset.
  49. ---
  50. tags:
  51. - Documents
  52. security:
  53. - ApiKeyAuth: []
  54. parameters:
  55. - in: path
  56. name: dataset_id
  57. type: string
  58. required: true
  59. description: ID of the dataset.
  60. - in: header
  61. name: Authorization
  62. type: string
  63. required: true
  64. description: Bearer token for authentication.
  65. - in: formData
  66. name: file
  67. type: file
  68. required: true
  69. description: Document files to upload.
  70. responses:
  71. 200:
  72. description: Successfully uploaded documents.
  73. schema:
  74. type: object
  75. properties:
  76. data:
  77. type: array
  78. items:
  79. type: object
  80. properties:
  81. id:
  82. type: string
  83. description: Document ID.
  84. name:
  85. type: string
  86. description: Document name.
  87. chunk_count:
  88. type: integer
  89. description: Number of chunks.
  90. token_count:
  91. type: integer
  92. description: Number of tokens.
  93. dataset_id:
  94. type: string
  95. description: ID of the dataset.
  96. chunk_method:
  97. type: string
  98. description: Chunking method used.
  99. run:
  100. type: string
  101. description: Processing status.
  102. """
  103. if "file" not in request.files:
  104. return get_error_data_result(
  105. message="No file part!", code=settings.RetCode.ARGUMENT_ERROR
  106. )
  107. file_objs = request.files.getlist("file")
  108. for file_obj in file_objs:
  109. if file_obj.filename == "":
  110. return get_result(
  111. message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR
  112. )
  113. '''
  114. # total size
  115. total_size = 0
  116. for file_obj in file_objs:
  117. file_obj.seek(0, os.SEEK_END)
  118. total_size += file_obj.tell()
  119. file_obj.seek(0)
  120. MAX_TOTAL_FILE_SIZE = 10 * 1024 * 1024
  121. if total_size > MAX_TOTAL_FILE_SIZE:
  122. return get_result(
  123. message=f"Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)",
  124. code=settings.RetCode.ARGUMENT_ERROR,
  125. )
  126. '''
  127. e, kb = KnowledgebaseService.get_by_id(dataset_id)
  128. if not e:
  129. raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
  130. err, files = FileService.upload_document(kb, file_objs, tenant_id)
  131. if err:
  132. return get_result(message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
  133. # rename key's name
  134. renamed_doc_list = []
  135. for file in files:
  136. doc = file[0]
  137. key_mapping = {
  138. "chunk_num": "chunk_count",
  139. "kb_id": "dataset_id",
  140. "token_num": "token_count",
  141. "parser_id": "chunk_method",
  142. }
  143. renamed_doc = {}
  144. for key, value in doc.items():
  145. new_key = key_mapping.get(key, key)
  146. renamed_doc[new_key] = value
  147. renamed_doc["run"] = "UNSTART"
  148. renamed_doc_list.append(renamed_doc)
  149. return get_result(data=renamed_doc_list)
  150. @manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PUT"]) # noqa: F821
  151. @token_required
  152. def update_doc(tenant_id, dataset_id, document_id):
  153. """
  154. Update a document within a dataset.
  155. ---
  156. tags:
  157. - Documents
  158. security:
  159. - ApiKeyAuth: []
  160. parameters:
  161. - in: path
  162. name: dataset_id
  163. type: string
  164. required: true
  165. description: ID of the dataset.
  166. - in: path
  167. name: document_id
  168. type: string
  169. required: true
  170. description: ID of the document to update.
  171. - in: header
  172. name: Authorization
  173. type: string
  174. required: true
  175. description: Bearer token for authentication.
  176. - in: body
  177. name: body
  178. description: Document update parameters.
  179. required: true
  180. schema:
  181. type: object
  182. properties:
  183. name:
  184. type: string
  185. description: New name of the document.
  186. parser_config:
  187. type: object
  188. description: Parser configuration.
  189. chunk_method:
  190. type: string
  191. description: Chunking method.
  192. responses:
  193. 200:
  194. description: Document updated successfully.
  195. schema:
  196. type: object
  197. """
  198. req = request.json
  199. if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
  200. return get_error_data_result(message="You don't own the dataset.")
  201. doc = DocumentService.query(kb_id=dataset_id, id=document_id)
  202. if not doc:
  203. return get_error_data_result(message="The dataset doesn't own the document.")
  204. doc = doc[0]
  205. if "chunk_count" in req:
  206. if req["chunk_count"] != doc.chunk_num:
  207. return get_error_data_result(message="Can't change `chunk_count`.")
  208. if "token_count" in req:
  209. if req["token_count"] != doc.token_num:
  210. return get_error_data_result(message="Can't change `token_count`.")
  211. if "progress" in req:
  212. if req["progress"] != doc.progress:
  213. return get_error_data_result(message="Can't change `progress`.")
  214. if "name" in req and req["name"] != doc.name:
  215. if (
  216. pathlib.Path(req["name"].lower()).suffix
  217. != pathlib.Path(doc.name.lower()).suffix
  218. ):
  219. return get_result(
  220. message="The extension of file can't be changed",
  221. code=settings.RetCode.ARGUMENT_ERROR,
  222. )
  223. for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
  224. if d.name == req["name"]:
  225. return get_error_data_result(
  226. message="Duplicated document name in the same dataset."
  227. )
  228. if not DocumentService.update_by_id(document_id, {"name": req["name"]}):
  229. return get_error_data_result(message="Database error (Document rename)!")
  230. informs = File2DocumentService.get_by_document_id(document_id)
  231. if informs:
  232. e, file = FileService.get_by_id(informs[0].file_id)
  233. FileService.update_by_id(file.id, {"name": req["name"]})
  234. if "parser_config" in req:
  235. DocumentService.update_parser_config(doc.id, req["parser_config"])
  236. if "chunk_method" in req:
  237. valid_chunk_method = {
  238. "naive",
  239. "manual",
  240. "qa",
  241. "table",
  242. "paper",
  243. "book",
  244. "laws",
  245. "presentation",
  246. "picture",
  247. "one",
  248. "knowledge_graph",
  249. "email",
  250. }
  251. if req.get("chunk_method") not in valid_chunk_method:
  252. return get_error_data_result(
  253. f"`chunk_method` {req['chunk_method']} doesn't exist"
  254. )
  255. if doc.parser_id.lower() == req["chunk_method"].lower():
  256. return get_result()
  257. if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
  258. return get_error_data_result(message="Not supported yet!")
  259. e = DocumentService.update_by_id(
  260. doc.id,
  261. {
  262. "parser_id": req["chunk_method"],
  263. "progress": 0,
  264. "progress_msg": "",
  265. "run": TaskStatus.UNSTART.value,
  266. },
  267. )
  268. if not e:
  269. return get_error_data_result(message="Document not found!")
  270. req["parser_config"] = get_parser_config(
  271. req["chunk_method"], req.get("parser_config")
  272. )
  273. DocumentService.update_parser_config(doc.id, req["parser_config"])
  274. if doc.token_num > 0:
  275. e = DocumentService.increment_chunk_num(
  276. doc.id,
  277. doc.kb_id,
  278. doc.token_num * -1,
  279. doc.chunk_num * -1,
  280. doc.process_duation * -1,
  281. )
  282. if not e:
  283. return get_error_data_result(message="Document not found!")
  284. settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
  285. return get_result()
  286. @manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
  287. @token_required
  288. def download(tenant_id, dataset_id, document_id):
  289. """
  290. Download a document from a dataset.
  291. ---
  292. tags:
  293. - Documents
  294. security:
  295. - ApiKeyAuth: []
  296. produces:
  297. - application/octet-stream
  298. parameters:
  299. - in: path
  300. name: dataset_id
  301. type: string
  302. required: true
  303. description: ID of the dataset.
  304. - in: path
  305. name: document_id
  306. type: string
  307. required: true
  308. description: ID of the document to download.
  309. - in: header
  310. name: Authorization
  311. type: string
  312. required: true
  313. description: Bearer token for authentication.
  314. responses:
  315. 200:
  316. description: Document file stream.
  317. schema:
  318. type: file
  319. 400:
  320. description: Error message.
  321. schema:
  322. type: object
  323. """
  324. if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
  325. return get_error_data_result(message=f"You do not own the dataset {dataset_id}.")
  326. doc = DocumentService.query(kb_id=dataset_id, id=document_id)
  327. if not doc:
  328. return get_error_data_result(
  329. message=f"The dataset not own the document {document_id}."
  330. )
  331. # The process of downloading
  332. doc_id, doc_location = File2DocumentService.get_storage_address(
  333. doc_id=document_id
  334. ) # minio address
  335. file_stream = STORAGE_IMPL.get(doc_id, doc_location)
  336. if not file_stream:
  337. return construct_json_result(
  338. message="This file is empty.", code=settings.RetCode.DATA_ERROR
  339. )
  340. file = BytesIO(file_stream)
  341. # Use send_file with a proper filename and MIME type
  342. return send_file(
  343. file,
  344. as_attachment=True,
  345. download_name=doc[0].name,
  346. mimetype="application/octet-stream", # Set a default MIME type
  347. )
  348. @manager.route("/datasets/<dataset_id>/documents", methods=["GET"]) # noqa: F821
  349. @token_required
  350. def list_docs(dataset_id, tenant_id):
  351. """
  352. List documents in a dataset.
  353. ---
  354. tags:
  355. - Documents
  356. security:
  357. - ApiKeyAuth: []
  358. parameters:
  359. - in: path
  360. name: dataset_id
  361. type: string
  362. required: true
  363. description: ID of the dataset.
  364. - in: query
  365. name: id
  366. type: string
  367. required: false
  368. description: Filter by document ID.
  369. - in: query
  370. name: page
  371. type: integer
  372. required: false
  373. default: 1
  374. description: Page number.
  375. - in: query
  376. name: page_size
  377. type: integer
  378. required: false
  379. default: 30
  380. description: Number of items per page.
  381. - in: query
  382. name: orderby
  383. type: string
  384. required: false
  385. default: "create_time"
  386. description: Field to order by.
  387. - in: query
  388. name: desc
  389. type: boolean
  390. required: false
  391. default: true
  392. description: Order in descending.
  393. - in: header
  394. name: Authorization
  395. type: string
  396. required: true
  397. description: Bearer token for authentication.
  398. responses:
  399. 200:
  400. description: List of documents.
  401. schema:
  402. type: object
  403. properties:
  404. total:
  405. type: integer
  406. description: Total number of documents.
  407. docs:
  408. type: array
  409. items:
  410. type: object
  411. properties:
  412. id:
  413. type: string
  414. description: Document ID.
  415. name:
  416. type: string
  417. description: Document name.
  418. chunk_count:
  419. type: integer
  420. description: Number of chunks.
  421. token_count:
  422. type: integer
  423. description: Number of tokens.
  424. dataset_id:
  425. type: string
  426. description: ID of the dataset.
  427. chunk_method:
  428. type: string
  429. description: Chunking method used.
  430. run:
  431. type: string
  432. description: Processing status.
  433. """
  434. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  435. return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
  436. id = request.args.get("id")
  437. name = request.args.get("name")
  438. if not DocumentService.query(id=id, kb_id=dataset_id):
  439. return get_error_data_result(message=f"You don't own the document {id}.")
  440. if not DocumentService.query(name=name, kb_id=dataset_id):
  441. return get_error_data_result(message=f"You don't own the document {name}.")
  442. page = int(request.args.get("page", 1))
  443. keywords = request.args.get("keywords", "")
  444. page_size = int(request.args.get("page_size", 30))
  445. orderby = request.args.get("orderby", "create_time")
  446. if request.args.get("desc") == "False":
  447. desc = False
  448. else:
  449. desc = True
  450. docs, tol = DocumentService.get_list(
  451. dataset_id, page, page_size, orderby, desc, keywords, id, name
  452. )
  453. # rename key's name
  454. renamed_doc_list = []
  455. for doc in docs:
  456. key_mapping = {
  457. "chunk_num": "chunk_count",
  458. "kb_id": "dataset_id",
  459. "token_num": "token_count",
  460. "parser_id": "chunk_method",
  461. }
  462. run_mapping = {
  463. "0": "UNSTART",
  464. "1": "RUNNING",
  465. "2": "CANCEL",
  466. "3": "DONE",
  467. "4": "FAIL",
  468. }
  469. renamed_doc = {}
  470. for key, value in doc.items():
  471. if key == "run":
  472. renamed_doc["run"] = run_mapping.get(str(value))
  473. new_key = key_mapping.get(key, key)
  474. renamed_doc[new_key] = value
  475. if key == "run":
  476. renamed_doc["run"] = run_mapping.get(value)
  477. renamed_doc_list.append(renamed_doc)
  478. return get_result(data={"total": tol, "docs": renamed_doc_list})
  479. @manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
  480. @token_required
  481. def delete(tenant_id, dataset_id):
  482. """
  483. Delete documents from a dataset.
  484. ---
  485. tags:
  486. - Documents
  487. security:
  488. - ApiKeyAuth: []
  489. parameters:
  490. - in: path
  491. name: dataset_id
  492. type: string
  493. required: true
  494. description: ID of the dataset.
  495. - in: body
  496. name: body
  497. description: Document deletion parameters.
  498. required: true
  499. schema:
  500. type: object
  501. properties:
  502. ids:
  503. type: array
  504. items:
  505. type: string
  506. description: List of document IDs to delete.
  507. - in: header
  508. name: Authorization
  509. type: string
  510. required: true
  511. description: Bearer token for authentication.
  512. responses:
  513. 200:
  514. description: Documents deleted successfully.
  515. schema:
  516. type: object
  517. """
  518. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  519. return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
  520. req = request.json
  521. if not req:
  522. doc_ids = None
  523. else:
  524. doc_ids = req.get("ids")
  525. if not doc_ids:
  526. doc_list = []
  527. docs = DocumentService.query(kb_id=dataset_id)
  528. for doc in docs:
  529. doc_list.append(doc.id)
  530. else:
  531. doc_list = doc_ids
  532. root_folder = FileService.get_root_folder(tenant_id)
  533. pf_id = root_folder["id"]
  534. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  535. errors = ""
  536. for doc_id in doc_list:
  537. try:
  538. e, doc = DocumentService.get_by_id(doc_id)
  539. if not e:
  540. return get_error_data_result(message="Document not found!")
  541. tenant_id = DocumentService.get_tenant_id(doc_id)
  542. if not tenant_id:
  543. return get_error_data_result(message="Tenant not found!")
  544. b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
  545. if not DocumentService.remove_document(doc, tenant_id):
  546. return get_error_data_result(
  547. message="Database error (Document removal)!"
  548. )
  549. f2d = File2DocumentService.get_by_document_id(doc_id)
  550. FileService.filter_delete(
  551. [
  552. File.source_type == FileSource.KNOWLEDGEBASE,
  553. File.id == f2d[0].file_id,
  554. ]
  555. )
  556. File2DocumentService.delete_by_document_id(doc_id)
  557. STORAGE_IMPL.rm(b, n)
  558. except Exception as e:
  559. errors += str(e)
  560. if errors:
  561. return get_result(message=errors, code=settings.RetCode.SERVER_ERROR)
  562. return get_result()
  563. @manager.route("/datasets/<dataset_id>/chunks", methods=["POST"]) # noqa: F821
  564. @token_required
  565. def parse(tenant_id, dataset_id):
  566. """
  567. Start parsing documents into chunks.
  568. ---
  569. tags:
  570. - Chunks
  571. security:
  572. - ApiKeyAuth: []
  573. parameters:
  574. - in: path
  575. name: dataset_id
  576. type: string
  577. required: true
  578. description: ID of the dataset.
  579. - in: body
  580. name: body
  581. description: Parsing parameters.
  582. required: true
  583. schema:
  584. type: object
  585. properties:
  586. document_ids:
  587. type: array
  588. items:
  589. type: string
  590. description: List of document IDs to parse.
  591. - in: header
  592. name: Authorization
  593. type: string
  594. required: true
  595. description: Bearer token for authentication.
  596. responses:
  597. 200:
  598. description: Parsing started successfully.
  599. schema:
  600. type: object
  601. """
  602. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  603. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  604. req = request.json
  605. if not req.get("document_ids"):
  606. return get_error_data_result("`document_ids` is required")
  607. for id in req["document_ids"]:
  608. doc = DocumentService.query(id=id, kb_id=dataset_id)
  609. if not doc:
  610. return get_error_data_result(message=f"You don't own the document {id}.")
  611. if doc[0].progress != 0.0:
  612. return get_error_data_result(
  613. "Can't stop parsing document with progress at 0 or 100"
  614. )
  615. info = {"run": "1", "progress": 0}
  616. info["progress_msg"] = ""
  617. info["chunk_num"] = 0
  618. info["token_num"] = 0
  619. DocumentService.update_by_id(id, info)
  620. settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id)
  621. TaskService.filter_delete([Task.doc_id == id])
  622. e, doc = DocumentService.get_by_id(id)
  623. doc = doc.to_dict()
  624. doc["tenant_id"] = tenant_id
  625. bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
  626. queue_tasks(doc, bucket, name)
  627. return get_result()
  628. @manager.route("/datasets/<dataset_id>/chunks", methods=["DELETE"]) # noqa: F821
  629. @token_required
  630. def stop_parsing(tenant_id, dataset_id):
  631. """
  632. Stop parsing documents into chunks.
  633. ---
  634. tags:
  635. - Chunks
  636. security:
  637. - ApiKeyAuth: []
  638. parameters:
  639. - in: path
  640. name: dataset_id
  641. type: string
  642. required: true
  643. description: ID of the dataset.
  644. - in: body
  645. name: body
  646. description: Stop parsing parameters.
  647. required: true
  648. schema:
  649. type: object
  650. properties:
  651. document_ids:
  652. type: array
  653. items:
  654. type: string
  655. description: List of document IDs to stop parsing.
  656. - in: header
  657. name: Authorization
  658. type: string
  659. required: true
  660. description: Bearer token for authentication.
  661. responses:
  662. 200:
  663. description: Parsing stopped successfully.
  664. schema:
  665. type: object
  666. """
  667. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  668. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  669. req = request.json
  670. if not req.get("document_ids"):
  671. return get_error_data_result("`document_ids` is required")
  672. for id in req["document_ids"]:
  673. doc = DocumentService.query(id=id, kb_id=dataset_id)
  674. if not doc:
  675. return get_error_data_result(message=f"You don't own the document {id}.")
  676. if int(doc[0].progress) == 1 or int(doc[0].progress) == 0:
  677. return get_error_data_result(
  678. "Can't stop parsing document with progress at 0 or 1"
  679. )
  680. info = {"run": "2", "progress": 0, "chunk_num": 0}
  681. DocumentService.update_by_id(id, info)
  682. settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
  683. return get_result()
  684. @manager.route("/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["GET"]) # noqa: F821
  685. @token_required
  686. def list_chunks(tenant_id, dataset_id, document_id):
  687. """
  688. List chunks of a document.
  689. ---
  690. tags:
  691. - Chunks
  692. security:
  693. - ApiKeyAuth: []
  694. parameters:
  695. - in: path
  696. name: dataset_id
  697. type: string
  698. required: true
  699. description: ID of the dataset.
  700. - in: path
  701. name: document_id
  702. type: string
  703. required: true
  704. description: ID of the document.
  705. - in: query
  706. name: page
  707. type: integer
  708. required: false
  709. default: 1
  710. description: Page number.
  711. - in: query
  712. name: page_size
  713. type: integer
  714. required: false
  715. default: 30
  716. description: Number of items per page.
  717. - in: header
  718. name: Authorization
  719. type: string
  720. required: true
  721. description: Bearer token for authentication.
  722. responses:
  723. 200:
  724. description: List of chunks.
  725. schema:
  726. type: object
  727. properties:
  728. total:
  729. type: integer
  730. description: Total number of chunks.
  731. chunks:
  732. type: array
  733. items:
  734. type: object
  735. properties:
  736. id:
  737. type: string
  738. description: Chunk ID.
  739. content:
  740. type: string
  741. description: Chunk content.
  742. document_id:
  743. type: string
  744. description: ID of the document.
  745. important_keywords:
  746. type: array
  747. items:
  748. type: string
  749. description: Important keywords.
  750. image_id:
  751. type: string
  752. description: Image ID associated with the chunk.
  753. doc:
  754. type: object
  755. description: Document details.
  756. """
  757. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  758. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  759. doc = DocumentService.query(id=document_id, kb_id=dataset_id)
  760. if not doc:
  761. return get_error_data_result(
  762. message=f"You don't own the document {document_id}."
  763. )
  764. doc = doc[0]
  765. req = request.args
  766. doc_id = document_id
  767. page = int(req.get("page", 1))
  768. size = int(req.get("page_size", 30))
  769. question = req.get("keywords", "")
  770. query = {
  771. "doc_ids": [doc_id],
  772. "page": page,
  773. "size": size,
  774. "question": question,
  775. "sort": True,
  776. }
  777. key_mapping = {
  778. "chunk_num": "chunk_count",
  779. "kb_id": "dataset_id",
  780. "token_num": "token_count",
  781. "parser_id": "chunk_method",
  782. }
  783. run_mapping = {
  784. "0": "UNSTART",
  785. "1": "RUNNING",
  786. "2": "CANCEL",
  787. "3": "DONE",
  788. "4": "FAIL",
  789. }
  790. doc = doc.to_dict()
  791. renamed_doc = {}
  792. for key, value in doc.items():
  793. new_key = key_mapping.get(key, key)
  794. renamed_doc[new_key] = value
  795. if key == "run":
  796. renamed_doc["run"] = run_mapping.get(str(value))
  797. res = {"total": 0, "chunks": [], "doc": renamed_doc}
  798. origin_chunks = []
  799. if settings.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
  800. sres = settings.retrievaler.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None,
  801. highlight=True)
  802. res["total"] = sres.total
  803. sign = 0
  804. for id in sres.ids:
  805. d = {
  806. "id": id,
  807. "content_with_weight": (
  808. rmSpace(sres.highlight[id])
  809. if question and id in sres.highlight
  810. else sres.field[id].get("content_with_weight", "")
  811. ),
  812. "doc_id": sres.field[id]["doc_id"],
  813. "docnm_kwd": sres.field[id]["docnm_kwd"],
  814. "important_kwd": sres.field[id].get("important_kwd", []),
  815. "question_kwd": sres.field[id].get("question_kwd", []),
  816. "img_id": sres.field[id].get("img_id", ""),
  817. "available_int": sres.field[id].get("available_int", 1),
  818. "positions": sres.field[id].get("position_int", "").split("\t"),
  819. }
  820. if len(d["positions"]) % 5 == 0:
  821. poss = []
  822. for i in range(0, len(d["positions"]), 5):
  823. poss.append(
  824. [
  825. float(d["positions"][i]),
  826. float(d["positions"][i + 1]),
  827. float(d["positions"][i + 2]),
  828. float(d["positions"][i + 3]),
  829. float(d["positions"][i + 4]),
  830. ]
  831. )
  832. d["positions"] = poss
  833. origin_chunks.append(d)
  834. if req.get("id"):
  835. if req.get("id") == id:
  836. origin_chunks.clear()
  837. origin_chunks.append(d)
  838. sign = 1
  839. break
  840. if req.get("id"):
  841. if sign == 0:
  842. return get_error_data_result(f"Can't find this chunk {req.get('id')}")
  843. for chunk in origin_chunks:
  844. key_mapping = {
  845. "id": "id",
  846. "content_with_weight": "content",
  847. "doc_id": "document_id",
  848. "important_kwd": "important_keywords",
  849. "question_kwd": "questions",
  850. "img_id": "image_id",
  851. "available_int": "available",
  852. }
  853. renamed_chunk = {}
  854. for key, value in chunk.items():
  855. new_key = key_mapping.get(key, key)
  856. renamed_chunk[new_key] = value
  857. if renamed_chunk["available"] == 0:
  858. renamed_chunk["available"] = False
  859. if renamed_chunk["available"] == 1:
  860. renamed_chunk["available"] = True
  861. res["chunks"].append(renamed_chunk)
  862. return get_result(data=res)
  863. @manager.route( # noqa: F821
  864. "/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["POST"]
  865. )
  866. @token_required
  867. def add_chunk(tenant_id, dataset_id, document_id):
  868. """
  869. Add a chunk to a document.
  870. ---
  871. tags:
  872. - Chunks
  873. security:
  874. - ApiKeyAuth: []
  875. parameters:
  876. - in: path
  877. name: dataset_id
  878. type: string
  879. required: true
  880. description: ID of the dataset.
  881. - in: path
  882. name: document_id
  883. type: string
  884. required: true
  885. description: ID of the document.
  886. - in: body
  887. name: body
  888. description: Chunk data.
  889. required: true
  890. schema:
  891. type: object
  892. properties:
  893. content:
  894. type: string
  895. required: true
  896. description: Content of the chunk.
  897. important_keywords:
  898. type: array
  899. items:
  900. type: string
  901. description: Important keywords.
  902. - in: header
  903. name: Authorization
  904. type: string
  905. required: true
  906. description: Bearer token for authentication.
  907. responses:
  908. 200:
  909. description: Chunk added successfully.
  910. schema:
  911. type: object
  912. properties:
  913. chunk:
  914. type: object
  915. properties:
  916. id:
  917. type: string
  918. description: Chunk ID.
  919. content:
  920. type: string
  921. description: Chunk content.
  922. document_id:
  923. type: string
  924. description: ID of the document.
  925. important_keywords:
  926. type: array
  927. items:
  928. type: string
  929. description: Important keywords.
  930. """
  931. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  932. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  933. doc = DocumentService.query(id=document_id, kb_id=dataset_id)
  934. if not doc:
  935. return get_error_data_result(
  936. message=f"You don't own the document {document_id}."
  937. )
  938. doc = doc[0]
  939. req = request.json
  940. if not req.get("content"):
  941. return get_error_data_result(message="`content` is required")
  942. if "important_keywords" in req:
  943. if not isinstance(req["important_keywords"], list):
  944. return get_error_data_result(
  945. "`important_keywords` is required to be a list"
  946. )
  947. if "questions" in req:
  948. if not isinstance(req["questions"], list):
  949. return get_error_data_result(
  950. "`questions` is required to be a list"
  951. )
  952. md5 = hashlib.md5()
  953. md5.update((req["content"] + document_id).encode("utf-8"))
  954. chunk_id = md5.hexdigest()
  955. d = {
  956. "id": chunk_id,
  957. "content_ltks": rag_tokenizer.tokenize(req["content"]),
  958. "content_with_weight": req["content"],
  959. }
  960. d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
  961. d["important_kwd"] = req.get("important_keywords", [])
  962. d["important_tks"] = rag_tokenizer.tokenize(
  963. " ".join(req.get("important_keywords", []))
  964. )
  965. d["question_kwd"] = req.get("questions", [])
  966. d["question_tks"] = rag_tokenizer.tokenize(
  967. "\n".join(req.get("questions", []))
  968. )
  969. d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
  970. d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
  971. d["kb_id"] = dataset_id
  972. d["docnm_kwd"] = doc.name
  973. d["doc_id"] = document_id
  974. embd_id = DocumentService.get_embd_id(document_id)
  975. embd_mdl = TenantLLMService.model_instance(
  976. tenant_id, LLMType.EMBEDDING.value, embd_id
  977. )
  978. v, c = embd_mdl.encode([doc.name, req["content"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
  979. v = 0.1 * v[0] + 0.9 * v[1]
  980. d["q_%d_vec" % len(v)] = v.tolist()
  981. settings.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)
  982. DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
  983. # rename keys
  984. key_mapping = {
  985. "id": "id",
  986. "content_with_weight": "content",
  987. "doc_id": "document_id",
  988. "important_kwd": "important_keywords",
  989. "question_kwd": "questions",
  990. "kb_id": "dataset_id",
  991. "create_timestamp_flt": "create_timestamp",
  992. "create_time": "create_time",
  993. "document_keyword": "document",
  994. }
  995. renamed_chunk = {}
  996. for key, value in d.items():
  997. if key in key_mapping:
  998. new_key = key_mapping.get(key, key)
  999. renamed_chunk[new_key] = value
  1000. return get_result(data={"chunk": renamed_chunk})
  1001. # return get_result(data={"chunk_id": chunk_id})
  1002. @manager.route( # noqa: F821
  1003. "datasets/<dataset_id>/documents/<document_id>/chunks", methods=["DELETE"]
  1004. )
  1005. @token_required
  1006. def rm_chunk(tenant_id, dataset_id, document_id):
  1007. """
  1008. Remove chunks from a document.
  1009. ---
  1010. tags:
  1011. - Chunks
  1012. security:
  1013. - ApiKeyAuth: []
  1014. parameters:
  1015. - in: path
  1016. name: dataset_id
  1017. type: string
  1018. required: true
  1019. description: ID of the dataset.
  1020. - in: path
  1021. name: document_id
  1022. type: string
  1023. required: true
  1024. description: ID of the document.
  1025. - in: body
  1026. name: body
  1027. description: Chunk removal parameters.
  1028. required: true
  1029. schema:
  1030. type: object
  1031. properties:
  1032. chunk_ids:
  1033. type: array
  1034. items:
  1035. type: string
  1036. description: List of chunk IDs to remove.
  1037. - in: header
  1038. name: Authorization
  1039. type: string
  1040. required: true
  1041. description: Bearer token for authentication.
  1042. responses:
  1043. 200:
  1044. description: Chunks removed successfully.
  1045. schema:
  1046. type: object
  1047. """
  1048. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  1049. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  1050. req = request.json
  1051. condition = {"doc_id": document_id}
  1052. if "chunk_ids" in req:
  1053. condition["id"] = req["chunk_ids"]
  1054. chunk_number = settings.docStoreConn.delete(condition, search.index_name(tenant_id), dataset_id)
  1055. if chunk_number != 0:
  1056. DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0)
  1057. if "chunk_ids" in req and chunk_number != len(req["chunk_ids"]):
  1058. return get_error_data_result(message=f"rm_chunk deleted chunks {chunk_number}, expect {len(req['chunk_ids'])}")
  1059. return get_result(message=f"deleted {chunk_number} chunks")
  1060. @manager.route( # noqa: F821
  1061. "/datasets/<dataset_id>/documents/<document_id>/chunks/<chunk_id>", methods=["PUT"]
  1062. )
  1063. @token_required
  1064. def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
  1065. """
  1066. Update a chunk within a document.
  1067. ---
  1068. tags:
  1069. - Chunks
  1070. security:
  1071. - ApiKeyAuth: []
  1072. parameters:
  1073. - in: path
  1074. name: dataset_id
  1075. type: string
  1076. required: true
  1077. description: ID of the dataset.
  1078. - in: path
  1079. name: document_id
  1080. type: string
  1081. required: true
  1082. description: ID of the document.
  1083. - in: path
  1084. name: chunk_id
  1085. type: string
  1086. required: true
  1087. description: ID of the chunk to update.
  1088. - in: body
  1089. name: body
  1090. description: Chunk update parameters.
  1091. required: true
  1092. schema:
  1093. type: object
  1094. properties:
  1095. content:
  1096. type: string
  1097. description: Updated content of the chunk.
  1098. important_keywords:
  1099. type: array
  1100. items:
  1101. type: string
  1102. description: Updated important keywords.
  1103. available:
  1104. type: boolean
  1105. description: Availability status of the chunk.
  1106. - in: header
  1107. name: Authorization
  1108. type: string
  1109. required: true
  1110. description: Bearer token for authentication.
  1111. responses:
  1112. 200:
  1113. description: Chunk updated successfully.
  1114. schema:
  1115. type: object
  1116. """
  1117. chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant_id), [dataset_id])
  1118. if chunk is None:
  1119. return get_error_data_result(f"Can't find this chunk {chunk_id}")
  1120. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  1121. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  1122. doc = DocumentService.query(id=document_id, kb_id=dataset_id)
  1123. if not doc:
  1124. return get_error_data_result(
  1125. message=f"You don't own the document {document_id}."
  1126. )
  1127. doc = doc[0]
  1128. req = request.json
  1129. if "content" in req:
  1130. content = req["content"]
  1131. else:
  1132. content = chunk.get("content_with_weight", "")
  1133. d = {"id": chunk_id, "content_with_weight": content}
  1134. d["content_ltks"] = rag_tokenizer.tokenize(d["content_with_weight"])
  1135. d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
  1136. if "important_keywords" in req:
  1137. if not isinstance(req["important_keywords"], list):
  1138. return get_error_data_result("`important_keywords` should be a list")
  1139. d["important_kwd"] = req.get("important_keywords", [])
  1140. d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
  1141. if "questions" in req:
  1142. if not isinstance(req["questions"], list):
  1143. return get_error_data_result("`questions` should be a list")
  1144. d["question_kwd"] = req.get("questions")
  1145. d["question_tks"] = rag_tokenizer.tokenize("\n".join(req["questions"]))
  1146. if "available" in req:
  1147. d["available_int"] = int(req["available"])
  1148. embd_id = DocumentService.get_embd_id(document_id)
  1149. embd_mdl = TenantLLMService.model_instance(
  1150. tenant_id, LLMType.EMBEDDING.value, embd_id
  1151. )
  1152. if doc.parser_id == ParserType.QA:
  1153. arr = [t for t in re.split(r"[\n\t]", d["content_with_weight"]) if len(t) > 1]
  1154. if len(arr) != 2:
  1155. return get_error_data_result(
  1156. message="Q&A must be separated by TAB/ENTER key."
  1157. )
  1158. q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
  1159. d = beAdoc(
  1160. d, arr[0], arr[1], not any([rag_tokenizer.is_chinese(t) for t in q + a])
  1161. )
  1162. v, c = embd_mdl.encode([doc.name, d["content_with_weight"] if not d.get("question_kwd") else "\n".join(d["question_kwd"])])
  1163. v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
  1164. d["q_%d_vec" % len(v)] = v.tolist()
  1165. settings.docStoreConn.update({"id": chunk_id}, d, search.index_name(tenant_id), dataset_id)
  1166. return get_result()
  1167. @manager.route("/retrieval", methods=["POST"]) # noqa: F821
  1168. @token_required
  1169. def retrieval_test(tenant_id):
  1170. """
  1171. Retrieve chunks based on a query.
  1172. ---
  1173. tags:
  1174. - Retrieval
  1175. security:
  1176. - ApiKeyAuth: []
  1177. parameters:
  1178. - in: body
  1179. name: body
  1180. description: Retrieval parameters.
  1181. required: true
  1182. schema:
  1183. type: object
  1184. properties:
  1185. dataset_ids:
  1186. type: array
  1187. items:
  1188. type: string
  1189. required: true
  1190. description: List of dataset IDs to search in.
  1191. question:
  1192. type: string
  1193. required: true
  1194. description: Query string.
  1195. document_ids:
  1196. type: array
  1197. items:
  1198. type: string
  1199. description: List of document IDs to filter.
  1200. similarity_threshold:
  1201. type: number
  1202. format: float
  1203. description: Similarity threshold.
  1204. vector_similarity_weight:
  1205. type: number
  1206. format: float
  1207. description: Vector similarity weight.
  1208. top_k:
  1209. type: integer
  1210. description: Maximum number of chunks to return.
  1211. highlight:
  1212. type: boolean
  1213. description: Whether to highlight matched content.
  1214. - in: header
  1215. name: Authorization
  1216. type: string
  1217. required: true
  1218. description: Bearer token for authentication.
  1219. responses:
  1220. 200:
  1221. description: Retrieval results.
  1222. schema:
  1223. type: object
  1224. properties:
  1225. chunks:
  1226. type: array
  1227. items:
  1228. type: object
  1229. properties:
  1230. id:
  1231. type: string
  1232. description: Chunk ID.
  1233. content:
  1234. type: string
  1235. description: Chunk content.
  1236. document_id:
  1237. type: string
  1238. description: ID of the document.
  1239. dataset_id:
  1240. type: string
  1241. description: ID of the dataset.
  1242. similarity:
  1243. type: number
  1244. format: float
  1245. description: Similarity score.
  1246. """
  1247. req = request.json
  1248. if not req.get("dataset_ids"):
  1249. return get_error_data_result("`dataset_ids` is required.")
  1250. kb_ids = req["dataset_ids"]
  1251. if not isinstance(kb_ids, list):
  1252. return get_error_data_result("`dataset_ids` should be a list")
  1253. kbs = KnowledgebaseService.get_by_ids(kb_ids)
  1254. for id in kb_ids:
  1255. if not KnowledgebaseService.accessible(kb_id=id, user_id=tenant_id):
  1256. return get_error_data_result(f"You don't own the dataset {id}.")
  1257. embd_nms = list(set([kb.embd_id for kb in kbs]))
  1258. if len(embd_nms) != 1:
  1259. return get_result(
  1260. message='Datasets use different embedding models."',
  1261. code=settings.RetCode.AUTHENTICATION_ERROR,
  1262. )
  1263. if "question" not in req:
  1264. return get_error_data_result("`question` is required.")
  1265. page = int(req.get("page", 1))
  1266. size = int(req.get("page_size", 30))
  1267. question = req["question"]
  1268. doc_ids = req.get("document_ids", [])
  1269. if not isinstance(doc_ids, list):
  1270. return get_error_data_result("`documents` should be a list")
  1271. doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids)
  1272. for doc_id in doc_ids:
  1273. if doc_id not in doc_ids_list:
  1274. return get_error_data_result(
  1275. f"The datasets don't own the document {doc_id}"
  1276. )
  1277. similarity_threshold = float(req.get("similarity_threshold", 0.2))
  1278. vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
  1279. top = int(req.get("top_k", 1024))
  1280. if req.get("highlight") == "False" or req.get("highlight") == "false":
  1281. highlight = False
  1282. else:
  1283. highlight = True
  1284. try:
  1285. e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
  1286. if not e:
  1287. return get_error_data_result(message="Dataset not found!")
  1288. embd_mdl = TenantLLMService.model_instance(
  1289. kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id
  1290. )
  1291. rerank_mdl = None
  1292. if req.get("rerank_id"):
  1293. rerank_mdl = TenantLLMService.model_instance(
  1294. kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]
  1295. )
  1296. if req.get("keyword", False):
  1297. chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
  1298. question += keyword_extraction(chat_mdl, question)
  1299. retr = settings.retrievaler if kb.parser_id != ParserType.KG else settings.kg_retrievaler
  1300. ranks = retr.retrieval(
  1301. question,
  1302. embd_mdl,
  1303. kb.tenant_id,
  1304. kb_ids,
  1305. page,
  1306. size,
  1307. similarity_threshold,
  1308. vector_similarity_weight,
  1309. top,
  1310. doc_ids,
  1311. rerank_mdl=rerank_mdl,
  1312. highlight=highlight,
  1313. )
  1314. for c in ranks["chunks"]:
  1315. c.pop("vector", None)
  1316. ##rename keys
  1317. renamed_chunks = []
  1318. for chunk in ranks["chunks"]:
  1319. key_mapping = {
  1320. "chunk_id": "id",
  1321. "content_with_weight": "content",
  1322. "doc_id": "document_id",
  1323. "important_kwd": "important_keywords",
  1324. "question_kwd": "questions",
  1325. "docnm_kwd": "document_keyword",
  1326. }
  1327. rename_chunk = {}
  1328. for key, value in chunk.items():
  1329. new_key = key_mapping.get(key, key)
  1330. rename_chunk[new_key] = value
  1331. renamed_chunks.append(rename_chunk)
  1332. ranks["chunks"] = renamed_chunks
  1333. return get_result(data=ranks)
  1334. except Exception as e:
  1335. if str(e).find("not_found") > 0:
  1336. return get_result(
  1337. message="No chunk found! Check the chunk status please!",
  1338. code=settings.RetCode.DATA_ERROR,
  1339. )
  1340. return server_error_response(e)