Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

doc.py 46KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import pathlib
  17. import datetime
  18. from api.db.services.dialog_service import keyword_extraction
  19. from rag.app.qa import rmPrefix, beAdoc
  20. from rag.nlp import rag_tokenizer
  21. from api.db import LLMType, ParserType
  22. from api.db.services.llm_service import TenantLLMService
  23. from api import settings
  24. import xxhash
  25. import re
  26. from api.utils.api_utils import token_required
  27. from api.db.db_models import Task
  28. from api.db.services.task_service import TaskService, queue_tasks
  29. from api.utils.api_utils import server_error_response
  30. from api.utils.api_utils import get_result, get_error_data_result
  31. from io import BytesIO
  32. from flask import request, send_file
  33. from api.db import FileSource, TaskStatus, FileType
  34. from api.db.db_models import File
  35. from api.db.services.document_service import DocumentService
  36. from api.db.services.file2document_service import File2DocumentService
  37. from api.db.services.file_service import FileService
  38. from api.db.services.knowledgebase_service import KnowledgebaseService
  39. from api.utils.api_utils import construct_json_result, get_parser_config
  40. from rag.nlp import search
  41. from rag.utils import rmSpace
  42. from rag.utils.storage_factory import STORAGE_IMPL
  43. MAXIMUM_OF_UPLOADING_FILES = 256
  44. @manager.route("/datasets/<dataset_id>/documents", methods=["POST"]) # noqa: F821
  45. @token_required
  46. def upload(dataset_id, tenant_id):
  47. """
  48. Upload documents to a dataset.
  49. ---
  50. tags:
  51. - Documents
  52. security:
  53. - ApiKeyAuth: []
  54. parameters:
  55. - in: path
  56. name: dataset_id
  57. type: string
  58. required: true
  59. description: ID of the dataset.
  60. - in: header
  61. name: Authorization
  62. type: string
  63. required: true
  64. description: Bearer token for authentication.
  65. - in: formData
  66. name: file
  67. type: file
  68. required: true
  69. description: Document files to upload.
  70. responses:
  71. 200:
  72. description: Successfully uploaded documents.
  73. schema:
  74. type: object
  75. properties:
  76. data:
  77. type: array
  78. items:
  79. type: object
  80. properties:
  81. id:
  82. type: string
  83. description: Document ID.
  84. name:
  85. type: string
  86. description: Document name.
  87. chunk_count:
  88. type: integer
  89. description: Number of chunks.
  90. token_count:
  91. type: integer
  92. description: Number of tokens.
  93. dataset_id:
  94. type: string
  95. description: ID of the dataset.
  96. chunk_method:
  97. type: string
  98. description: Chunking method used.
  99. run:
  100. type: string
  101. description: Processing status.
  102. """
  103. if "file" not in request.files:
  104. return get_error_data_result(
  105. message="No file part!", code=settings.RetCode.ARGUMENT_ERROR
  106. )
  107. file_objs = request.files.getlist("file")
  108. for file_obj in file_objs:
  109. if file_obj.filename == "":
  110. return get_result(
  111. message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR
  112. )
  113. '''
  114. # total size
  115. total_size = 0
  116. for file_obj in file_objs:
  117. file_obj.seek(0, os.SEEK_END)
  118. total_size += file_obj.tell()
  119. file_obj.seek(0)
  120. MAX_TOTAL_FILE_SIZE = 10 * 1024 * 1024
  121. if total_size > MAX_TOTAL_FILE_SIZE:
  122. return get_result(
  123. message=f"Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)",
  124. code=settings.RetCode.ARGUMENT_ERROR,
  125. )
  126. '''
  127. e, kb = KnowledgebaseService.get_by_id(dataset_id)
  128. if not e:
  129. raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
  130. err, files = FileService.upload_document(kb, file_objs, tenant_id)
  131. if err:
  132. return get_result(message="\n".join(err), code=settings.RetCode.SERVER_ERROR)
  133. # rename key's name
  134. renamed_doc_list = []
  135. for file in files:
  136. doc = file[0]
  137. key_mapping = {
  138. "chunk_num": "chunk_count",
  139. "kb_id": "dataset_id",
  140. "token_num": "token_count",
  141. "parser_id": "chunk_method",
  142. }
  143. renamed_doc = {}
  144. for key, value in doc.items():
  145. new_key = key_mapping.get(key, key)
  146. renamed_doc[new_key] = value
  147. renamed_doc["run"] = "UNSTART"
  148. renamed_doc_list.append(renamed_doc)
  149. return get_result(data=renamed_doc_list)
  150. @manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PUT"]) # noqa: F821
  151. @token_required
  152. def update_doc(tenant_id, dataset_id, document_id):
  153. """
  154. Update a document within a dataset.
  155. ---
  156. tags:
  157. - Documents
  158. security:
  159. - ApiKeyAuth: []
  160. parameters:
  161. - in: path
  162. name: dataset_id
  163. type: string
  164. required: true
  165. description: ID of the dataset.
  166. - in: path
  167. name: document_id
  168. type: string
  169. required: true
  170. description: ID of the document to update.
  171. - in: header
  172. name: Authorization
  173. type: string
  174. required: true
  175. description: Bearer token for authentication.
  176. - in: body
  177. name: body
  178. description: Document update parameters.
  179. required: true
  180. schema:
  181. type: object
  182. properties:
  183. name:
  184. type: string
  185. description: New name of the document.
  186. parser_config:
  187. type: object
  188. description: Parser configuration.
  189. chunk_method:
  190. type: string
  191. description: Chunking method.
  192. responses:
  193. 200:
  194. description: Document updated successfully.
  195. schema:
  196. type: object
  197. """
  198. req = request.json
  199. if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
  200. return get_error_data_result(message="You don't own the dataset.")
  201. doc = DocumentService.query(kb_id=dataset_id, id=document_id)
  202. if not doc:
  203. return get_error_data_result(message="The dataset doesn't own the document.")
  204. doc = doc[0]
  205. if "chunk_count" in req:
  206. if req["chunk_count"] != doc.chunk_num:
  207. return get_error_data_result(message="Can't change `chunk_count`.")
  208. if "token_count" in req:
  209. if req["token_count"] != doc.token_num:
  210. return get_error_data_result(message="Can't change `token_count`.")
  211. if "progress" in req:
  212. if req["progress"] != doc.progress:
  213. return get_error_data_result(message="Can't change `progress`.")
  214. if "name" in req and req["name"] != doc.name:
  215. if (
  216. pathlib.Path(req["name"].lower()).suffix
  217. != pathlib.Path(doc.name.lower()).suffix
  218. ):
  219. return get_result(
  220. message="The extension of file can't be changed",
  221. code=settings.RetCode.ARGUMENT_ERROR,
  222. )
  223. for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
  224. if d.name == req["name"]:
  225. return get_error_data_result(
  226. message="Duplicated document name in the same dataset."
  227. )
  228. if not DocumentService.update_by_id(document_id, {"name": req["name"]}):
  229. return get_error_data_result(message="Database error (Document rename)!")
  230. informs = File2DocumentService.get_by_document_id(document_id)
  231. if informs:
  232. e, file = FileService.get_by_id(informs[0].file_id)
  233. FileService.update_by_id(file.id, {"name": req["name"]})
  234. if "parser_config" in req:
  235. DocumentService.update_parser_config(doc.id, req["parser_config"])
  236. if "chunk_method" in req:
  237. valid_chunk_method = {
  238. "naive",
  239. "manual",
  240. "qa",
  241. "table",
  242. "paper",
  243. "book",
  244. "laws",
  245. "presentation",
  246. "picture",
  247. "one",
  248. "knowledge_graph",
  249. "email",
  250. }
  251. if req.get("chunk_method") not in valid_chunk_method:
  252. return get_error_data_result(
  253. f"`chunk_method` {req['chunk_method']} doesn't exist"
  254. )
  255. if doc.parser_id.lower() == req["chunk_method"].lower():
  256. return get_result()
  257. if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
  258. return get_error_data_result(message="Not supported yet!")
  259. e = DocumentService.update_by_id(
  260. doc.id,
  261. {
  262. "parser_id": req["chunk_method"],
  263. "progress": 0,
  264. "progress_msg": "",
  265. "run": TaskStatus.UNSTART.value,
  266. },
  267. )
  268. if not e:
  269. return get_error_data_result(message="Document not found!")
  270. req["parser_config"] = get_parser_config(
  271. req["chunk_method"], req.get("parser_config")
  272. )
  273. DocumentService.update_parser_config(doc.id, req["parser_config"])
  274. if doc.token_num > 0:
  275. e = DocumentService.increment_chunk_num(
  276. doc.id,
  277. doc.kb_id,
  278. doc.token_num * -1,
  279. doc.chunk_num * -1,
  280. doc.process_duation * -1,
  281. )
  282. if not e:
  283. return get_error_data_result(message="Document not found!")
  284. settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
  285. return get_result()
  286. @manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
  287. @token_required
  288. def download(tenant_id, dataset_id, document_id):
  289. """
  290. Download a document from a dataset.
  291. ---
  292. tags:
  293. - Documents
  294. security:
  295. - ApiKeyAuth: []
  296. produces:
  297. - application/octet-stream
  298. parameters:
  299. - in: path
  300. name: dataset_id
  301. type: string
  302. required: true
  303. description: ID of the dataset.
  304. - in: path
  305. name: document_id
  306. type: string
  307. required: true
  308. description: ID of the document to download.
  309. - in: header
  310. name: Authorization
  311. type: string
  312. required: true
  313. description: Bearer token for authentication.
  314. responses:
  315. 200:
  316. description: Document file stream.
  317. schema:
  318. type: file
  319. 400:
  320. description: Error message.
  321. schema:
  322. type: object
  323. """
  324. if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
  325. return get_error_data_result(message=f"You do not own the dataset {dataset_id}.")
  326. doc = DocumentService.query(kb_id=dataset_id, id=document_id)
  327. if not doc:
  328. return get_error_data_result(
  329. message=f"The dataset not own the document {document_id}."
  330. )
  331. # The process of downloading
  332. doc_id, doc_location = File2DocumentService.get_storage_address(
  333. doc_id=document_id
  334. ) # minio address
  335. file_stream = STORAGE_IMPL.get(doc_id, doc_location)
  336. if not file_stream:
  337. return construct_json_result(
  338. message="This file is empty.", code=settings.RetCode.DATA_ERROR
  339. )
  340. file = BytesIO(file_stream)
  341. # Use send_file with a proper filename and MIME type
  342. return send_file(
  343. file,
  344. as_attachment=True,
  345. download_name=doc[0].name,
  346. mimetype="application/octet-stream", # Set a default MIME type
  347. )
  348. @manager.route("/datasets/<dataset_id>/documents", methods=["GET"]) # noqa: F821
  349. @token_required
  350. def list_docs(dataset_id, tenant_id):
  351. """
  352. List documents in a dataset.
  353. ---
  354. tags:
  355. - Documents
  356. security:
  357. - ApiKeyAuth: []
  358. parameters:
  359. - in: path
  360. name: dataset_id
  361. type: string
  362. required: true
  363. description: ID of the dataset.
  364. - in: query
  365. name: id
  366. type: string
  367. required: false
  368. description: Filter by document ID.
  369. - in: query
  370. name: page
  371. type: integer
  372. required: false
  373. default: 1
  374. description: Page number.
  375. - in: query
  376. name: page_size
  377. type: integer
  378. required: false
  379. default: 30
  380. description: Number of items per page.
  381. - in: query
  382. name: orderby
  383. type: string
  384. required: false
  385. default: "create_time"
  386. description: Field to order by.
  387. - in: query
  388. name: desc
  389. type: boolean
  390. required: false
  391. default: true
  392. description: Order in descending.
  393. - in: header
  394. name: Authorization
  395. type: string
  396. required: true
  397. description: Bearer token for authentication.
  398. responses:
  399. 200:
  400. description: List of documents.
  401. schema:
  402. type: object
  403. properties:
  404. total:
  405. type: integer
  406. description: Total number of documents.
  407. docs:
  408. type: array
  409. items:
  410. type: object
  411. properties:
  412. id:
  413. type: string
  414. description: Document ID.
  415. name:
  416. type: string
  417. description: Document name.
  418. chunk_count:
  419. type: integer
  420. description: Number of chunks.
  421. token_count:
  422. type: integer
  423. description: Number of tokens.
  424. dataset_id:
  425. type: string
  426. description: ID of the dataset.
  427. chunk_method:
  428. type: string
  429. description: Chunking method used.
  430. run:
  431. type: string
  432. description: Processing status.
  433. """
  434. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  435. return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
  436. id = request.args.get("id")
  437. name = request.args.get("name")
  438. if not DocumentService.query(id=id, kb_id=dataset_id):
  439. return get_error_data_result(message=f"You don't own the document {id}.")
  440. if not DocumentService.query(name=name, kb_id=dataset_id):
  441. return get_error_data_result(message=f"You don't own the document {name}.")
  442. page = int(request.args.get("page", 1))
  443. keywords = request.args.get("keywords", "")
  444. page_size = int(request.args.get("page_size", 30))
  445. orderby = request.args.get("orderby", "create_time")
  446. if request.args.get("desc") == "False":
  447. desc = False
  448. else:
  449. desc = True
  450. docs, tol = DocumentService.get_list(
  451. dataset_id, page, page_size, orderby, desc, keywords, id, name
  452. )
  453. # rename key's name
  454. renamed_doc_list = []
  455. for doc in docs:
  456. key_mapping = {
  457. "chunk_num": "chunk_count",
  458. "kb_id": "dataset_id",
  459. "token_num": "token_count",
  460. "parser_id": "chunk_method",
  461. }
  462. run_mapping = {
  463. "0": "UNSTART",
  464. "1": "RUNNING",
  465. "2": "CANCEL",
  466. "3": "DONE",
  467. "4": "FAIL",
  468. }
  469. renamed_doc = {}
  470. for key, value in doc.items():
  471. if key == "run":
  472. renamed_doc["run"] = run_mapping.get(str(value))
  473. new_key = key_mapping.get(key, key)
  474. renamed_doc[new_key] = value
  475. if key == "run":
  476. renamed_doc["run"] = run_mapping.get(value)
  477. renamed_doc_list.append(renamed_doc)
  478. return get_result(data={"total": tol, "docs": renamed_doc_list})
  479. @manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
  480. @token_required
  481. def delete(tenant_id, dataset_id):
  482. """
  483. Delete documents from a dataset.
  484. ---
  485. tags:
  486. - Documents
  487. security:
  488. - ApiKeyAuth: []
  489. parameters:
  490. - in: path
  491. name: dataset_id
  492. type: string
  493. required: true
  494. description: ID of the dataset.
  495. - in: body
  496. name: body
  497. description: Document deletion parameters.
  498. required: true
  499. schema:
  500. type: object
  501. properties:
  502. ids:
  503. type: array
  504. items:
  505. type: string
  506. description: List of document IDs to delete.
  507. - in: header
  508. name: Authorization
  509. type: string
  510. required: true
  511. description: Bearer token for authentication.
  512. responses:
  513. 200:
  514. description: Documents deleted successfully.
  515. schema:
  516. type: object
  517. """
  518. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  519. return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
  520. req = request.json
  521. if not req:
  522. doc_ids = None
  523. else:
  524. doc_ids = req.get("ids")
  525. if not doc_ids:
  526. doc_list = []
  527. docs = DocumentService.query(kb_id=dataset_id)
  528. for doc in docs:
  529. doc_list.append(doc.id)
  530. else:
  531. doc_list = doc_ids
  532. root_folder = FileService.get_root_folder(tenant_id)
  533. pf_id = root_folder["id"]
  534. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  535. errors = ""
  536. for doc_id in doc_list:
  537. try:
  538. e, doc = DocumentService.get_by_id(doc_id)
  539. if not e:
  540. return get_error_data_result(message="Document not found!")
  541. tenant_id = DocumentService.get_tenant_id(doc_id)
  542. if not tenant_id:
  543. return get_error_data_result(message="Tenant not found!")
  544. b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
  545. if not DocumentService.remove_document(doc, tenant_id):
  546. return get_error_data_result(
  547. message="Database error (Document removal)!"
  548. )
  549. f2d = File2DocumentService.get_by_document_id(doc_id)
  550. FileService.filter_delete(
  551. [
  552. File.source_type == FileSource.KNOWLEDGEBASE,
  553. File.id == f2d[0].file_id,
  554. ]
  555. )
  556. File2DocumentService.delete_by_document_id(doc_id)
  557. STORAGE_IMPL.rm(b, n)
  558. except Exception as e:
  559. errors += str(e)
  560. if errors:
  561. return get_result(message=errors, code=settings.RetCode.SERVER_ERROR)
  562. return get_result()
  563. @manager.route("/datasets/<dataset_id>/chunks", methods=["POST"]) # noqa: F821
  564. @token_required
  565. def parse(tenant_id, dataset_id):
  566. """
  567. Start parsing documents into chunks.
  568. ---
  569. tags:
  570. - Chunks
  571. security:
  572. - ApiKeyAuth: []
  573. parameters:
  574. - in: path
  575. name: dataset_id
  576. type: string
  577. required: true
  578. description: ID of the dataset.
  579. - in: body
  580. name: body
  581. description: Parsing parameters.
  582. required: true
  583. schema:
  584. type: object
  585. properties:
  586. document_ids:
  587. type: array
  588. items:
  589. type: string
  590. description: List of document IDs to parse.
  591. - in: header
  592. name: Authorization
  593. type: string
  594. required: true
  595. description: Bearer token for authentication.
  596. responses:
  597. 200:
  598. description: Parsing started successfully.
  599. schema:
  600. type: object
  601. """
  602. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  603. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  604. req = request.json
  605. if not req.get("document_ids"):
  606. return get_error_data_result("`document_ids` is required")
  607. for id in req["document_ids"]:
  608. doc = DocumentService.query(id=id, kb_id=dataset_id)
  609. if not doc:
  610. return get_error_data_result(message=f"You don't own the document {id}.")
  611. if doc[0].progress != 0.0:
  612. return get_error_data_result(
  613. "Can't stop parsing document with progress at 0 or 100"
  614. )
  615. info = {"run": "1", "progress": 0}
  616. info["progress_msg"] = ""
  617. info["chunk_num"] = 0
  618. info["token_num"] = 0
  619. DocumentService.update_by_id(id, info)
  620. settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id)
  621. TaskService.filter_delete([Task.doc_id == id])
  622. e, doc = DocumentService.get_by_id(id)
  623. doc = doc.to_dict()
  624. doc["tenant_id"] = tenant_id
  625. bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
  626. queue_tasks(doc, bucket, name)
  627. return get_result()
  628. @manager.route("/datasets/<dataset_id>/chunks", methods=["DELETE"]) # noqa: F821
  629. @token_required
  630. def stop_parsing(tenant_id, dataset_id):
  631. """
  632. Stop parsing documents into chunks.
  633. ---
  634. tags:
  635. - Chunks
  636. security:
  637. - ApiKeyAuth: []
  638. parameters:
  639. - in: path
  640. name: dataset_id
  641. type: string
  642. required: true
  643. description: ID of the dataset.
  644. - in: body
  645. name: body
  646. description: Stop parsing parameters.
  647. required: true
  648. schema:
  649. type: object
  650. properties:
  651. document_ids:
  652. type: array
  653. items:
  654. type: string
  655. description: List of document IDs to stop parsing.
  656. - in: header
  657. name: Authorization
  658. type: string
  659. required: true
  660. description: Bearer token for authentication.
  661. responses:
  662. 200:
  663. description: Parsing stopped successfully.
  664. schema:
  665. type: object
  666. """
  667. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  668. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  669. req = request.json
  670. if not req.get("document_ids"):
  671. return get_error_data_result("`document_ids` is required")
  672. for id in req["document_ids"]:
  673. doc = DocumentService.query(id=id, kb_id=dataset_id)
  674. if not doc:
  675. return get_error_data_result(message=f"You don't own the document {id}.")
  676. if int(doc[0].progress) == 1 or int(doc[0].progress) == 0:
  677. return get_error_data_result(
  678. "Can't stop parsing document with progress at 0 or 1"
  679. )
  680. info = {"run": "2", "progress": 0, "chunk_num": 0}
  681. DocumentService.update_by_id(id, info)
  682. settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
  683. return get_result()
  684. @manager.route("/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["GET"]) # noqa: F821
  685. @token_required
  686. def list_chunks(tenant_id, dataset_id, document_id):
  687. """
  688. List chunks of a document.
  689. ---
  690. tags:
  691. - Chunks
  692. security:
  693. - ApiKeyAuth: []
  694. parameters:
  695. - in: path
  696. name: dataset_id
  697. type: string
  698. required: true
  699. description: ID of the dataset.
  700. - in: path
  701. name: document_id
  702. type: string
  703. required: true
  704. description: ID of the document.
  705. - in: query
  706. name: page
  707. type: integer
  708. required: false
  709. default: 1
  710. description: Page number.
  711. - in: query
  712. name: page_size
  713. type: integer
  714. required: false
  715. default: 30
  716. description: Number of items per page.
  717. - in: header
  718. name: Authorization
  719. type: string
  720. required: true
  721. description: Bearer token for authentication.
  722. responses:
  723. 200:
  724. description: List of chunks.
  725. schema:
  726. type: object
  727. properties:
  728. total:
  729. type: integer
  730. description: Total number of chunks.
  731. chunks:
  732. type: array
  733. items:
  734. type: object
  735. properties:
  736. id:
  737. type: string
  738. description: Chunk ID.
  739. content:
  740. type: string
  741. description: Chunk content.
  742. document_id:
  743. type: string
  744. description: ID of the document.
  745. important_keywords:
  746. type: array
  747. items:
  748. type: string
  749. description: Important keywords.
  750. image_id:
  751. type: string
  752. description: Image ID associated with the chunk.
  753. doc:
  754. type: object
  755. description: Document details.
  756. """
  757. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  758. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  759. doc = DocumentService.query(id=document_id, kb_id=dataset_id)
  760. if not doc:
  761. return get_error_data_result(
  762. message=f"You don't own the document {document_id}."
  763. )
  764. doc = doc[0]
  765. req = request.args
  766. doc_id = document_id
  767. page = int(req.get("page", 1))
  768. size = int(req.get("page_size", 30))
  769. question = req.get("keywords", "")
  770. query = {
  771. "doc_ids": [doc_id],
  772. "page": page,
  773. "size": size,
  774. "question": question,
  775. "sort": True,
  776. }
  777. key_mapping = {
  778. "chunk_num": "chunk_count",
  779. "kb_id": "dataset_id",
  780. "token_num": "token_count",
  781. "parser_id": "chunk_method",
  782. }
  783. run_mapping = {
  784. "0": "UNSTART",
  785. "1": "RUNNING",
  786. "2": "CANCEL",
  787. "3": "DONE",
  788. "4": "FAIL",
  789. }
  790. doc = doc.to_dict()
  791. renamed_doc = {}
  792. for key, value in doc.items():
  793. new_key = key_mapping.get(key, key)
  794. renamed_doc[new_key] = value
  795. if key == "run":
  796. renamed_doc["run"] = run_mapping.get(str(value))
  797. res = {"total": 0, "chunks": [], "doc": renamed_doc}
  798. origin_chunks = []
  799. if settings.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
  800. sres = settings.retrievaler.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None,
  801. highlight=True)
  802. res["total"] = sres.total
  803. sign = 0
  804. for id in sres.ids:
  805. d = {
  806. "id": id,
  807. "content_with_weight": (
  808. rmSpace(sres.highlight[id])
  809. if question and id in sres.highlight
  810. else sres.field[id].get("content_with_weight", "")
  811. ),
  812. "doc_id": sres.field[id]["doc_id"],
  813. "docnm_kwd": sres.field[id]["docnm_kwd"],
  814. "important_kwd": sres.field[id].get("important_kwd", []),
  815. "question_kwd": sres.field[id].get("question_kwd", []),
  816. "img_id": sres.field[id].get("img_id", ""),
  817. "available_int": sres.field[id].get("available_int", 1),
  818. "positions": sres.field[id].get("position_int", []),
  819. }
  820. if len(d["positions"]) % 5 == 0:
  821. poss = []
  822. for i in range(0, len(d["positions"]), 5):
  823. poss.append(
  824. [
  825. float(d["positions"][i]),
  826. float(d["positions"][i + 1]),
  827. float(d["positions"][i + 2]),
  828. float(d["positions"][i + 3]),
  829. float(d["positions"][i + 4]),
  830. ]
  831. )
  832. d["positions"] = poss
  833. origin_chunks.append(d)
  834. if req.get("id"):
  835. if req.get("id") == id:
  836. origin_chunks.clear()
  837. origin_chunks.append(d)
  838. sign = 1
  839. break
  840. if req.get("id"):
  841. if sign == 0:
  842. return get_error_data_result(f"Can't find this chunk {req.get('id')}")
  843. for chunk in origin_chunks:
  844. key_mapping = {
  845. "id": "id",
  846. "content_with_weight": "content",
  847. "doc_id": "document_id",
  848. "important_kwd": "important_keywords",
  849. "question_kwd": "questions",
  850. "img_id": "image_id",
  851. "available_int": "available",
  852. }
  853. renamed_chunk = {}
  854. for key, value in chunk.items():
  855. new_key = key_mapping.get(key, key)
  856. renamed_chunk[new_key] = value
  857. if renamed_chunk["available"] == 0:
  858. renamed_chunk["available"] = False
  859. if renamed_chunk["available"] == 1:
  860. renamed_chunk["available"] = True
  861. res["chunks"].append(renamed_chunk)
  862. return get_result(data=res)
  863. @manager.route( # noqa: F821
  864. "/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["POST"]
  865. )
  866. @token_required
  867. def add_chunk(tenant_id, dataset_id, document_id):
  868. """
  869. Add a chunk to a document.
  870. ---
  871. tags:
  872. - Chunks
  873. security:
  874. - ApiKeyAuth: []
  875. parameters:
  876. - in: path
  877. name: dataset_id
  878. type: string
  879. required: true
  880. description: ID of the dataset.
  881. - in: path
  882. name: document_id
  883. type: string
  884. required: true
  885. description: ID of the document.
  886. - in: body
  887. name: body
  888. description: Chunk data.
  889. required: true
  890. schema:
  891. type: object
  892. properties:
  893. content:
  894. type: string
  895. required: true
  896. description: Content of the chunk.
  897. important_keywords:
  898. type: array
  899. items:
  900. type: string
  901. description: Important keywords.
  902. - in: header
  903. name: Authorization
  904. type: string
  905. required: true
  906. description: Bearer token for authentication.
  907. responses:
  908. 200:
  909. description: Chunk added successfully.
  910. schema:
  911. type: object
  912. properties:
  913. chunk:
  914. type: object
  915. properties:
  916. id:
  917. type: string
  918. description: Chunk ID.
  919. content:
  920. type: string
  921. description: Chunk content.
  922. document_id:
  923. type: string
  924. description: ID of the document.
  925. important_keywords:
  926. type: array
  927. items:
  928. type: string
  929. description: Important keywords.
  930. """
  931. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  932. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  933. doc = DocumentService.query(id=document_id, kb_id=dataset_id)
  934. if not doc:
  935. return get_error_data_result(
  936. message=f"You don't own the document {document_id}."
  937. )
  938. doc = doc[0]
  939. req = request.json
  940. if not req.get("content"):
  941. return get_error_data_result(message="`content` is required")
  942. if "important_keywords" in req:
  943. if not isinstance(req["important_keywords"], list):
  944. return get_error_data_result(
  945. "`important_keywords` is required to be a list"
  946. )
  947. if "questions" in req:
  948. if not isinstance(req["questions"], list):
  949. return get_error_data_result(
  950. "`questions` is required to be a list"
  951. )
  952. chunk_id = xxhash.xxh64((req["content"] + document_id).encode("utf-8")).hexdigest()
  953. d = {
  954. "id": chunk_id,
  955. "content_ltks": rag_tokenizer.tokenize(req["content"]),
  956. "content_with_weight": req["content"],
  957. }
  958. d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
  959. d["important_kwd"] = req.get("important_keywords", [])
  960. d["important_tks"] = rag_tokenizer.tokenize(
  961. " ".join(req.get("important_keywords", []))
  962. )
  963. d["question_kwd"] = req.get("questions", [])
  964. d["question_tks"] = rag_tokenizer.tokenize(
  965. "\n".join(req.get("questions", []))
  966. )
  967. d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
  968. d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
  969. d["kb_id"] = dataset_id
  970. d["docnm_kwd"] = doc.name
  971. d["doc_id"] = document_id
  972. embd_id = DocumentService.get_embd_id(document_id)
  973. embd_mdl = TenantLLMService.model_instance(
  974. tenant_id, LLMType.EMBEDDING.value, embd_id
  975. )
  976. v, c = embd_mdl.encode([doc.name, req["content"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
  977. v = 0.1 * v[0] + 0.9 * v[1]
  978. d["q_%d_vec" % len(v)] = v.tolist()
  979. settings.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)
  980. DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
  981. # rename keys
  982. key_mapping = {
  983. "id": "id",
  984. "content_with_weight": "content",
  985. "doc_id": "document_id",
  986. "important_kwd": "important_keywords",
  987. "question_kwd": "questions",
  988. "kb_id": "dataset_id",
  989. "create_timestamp_flt": "create_timestamp",
  990. "create_time": "create_time",
  991. "document_keyword": "document",
  992. }
  993. renamed_chunk = {}
  994. for key, value in d.items():
  995. if key in key_mapping:
  996. new_key = key_mapping.get(key, key)
  997. renamed_chunk[new_key] = value
  998. return get_result(data={"chunk": renamed_chunk})
  999. # return get_result(data={"chunk_id": chunk_id})
  1000. @manager.route( # noqa: F821
  1001. "datasets/<dataset_id>/documents/<document_id>/chunks", methods=["DELETE"]
  1002. )
  1003. @token_required
  1004. def rm_chunk(tenant_id, dataset_id, document_id):
  1005. """
  1006. Remove chunks from a document.
  1007. ---
  1008. tags:
  1009. - Chunks
  1010. security:
  1011. - ApiKeyAuth: []
  1012. parameters:
  1013. - in: path
  1014. name: dataset_id
  1015. type: string
  1016. required: true
  1017. description: ID of the dataset.
  1018. - in: path
  1019. name: document_id
  1020. type: string
  1021. required: true
  1022. description: ID of the document.
  1023. - in: body
  1024. name: body
  1025. description: Chunk removal parameters.
  1026. required: true
  1027. schema:
  1028. type: object
  1029. properties:
  1030. chunk_ids:
  1031. type: array
  1032. items:
  1033. type: string
  1034. description: List of chunk IDs to remove.
  1035. - in: header
  1036. name: Authorization
  1037. type: string
  1038. required: true
  1039. description: Bearer token for authentication.
  1040. responses:
  1041. 200:
  1042. description: Chunks removed successfully.
  1043. schema:
  1044. type: object
  1045. """
  1046. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  1047. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  1048. req = request.json
  1049. condition = {"doc_id": document_id}
  1050. if "chunk_ids" in req:
  1051. condition["id"] = req["chunk_ids"]
  1052. chunk_number = settings.docStoreConn.delete(condition, search.index_name(tenant_id), dataset_id)
  1053. if chunk_number != 0:
  1054. DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0)
  1055. if "chunk_ids" in req and chunk_number != len(req["chunk_ids"]):
  1056. return get_error_data_result(message=f"rm_chunk deleted chunks {chunk_number}, expect {len(req['chunk_ids'])}")
  1057. return get_result(message=f"deleted {chunk_number} chunks")
  1058. @manager.route( # noqa: F821
  1059. "/datasets/<dataset_id>/documents/<document_id>/chunks/<chunk_id>", methods=["PUT"]
  1060. )
  1061. @token_required
  1062. def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
  1063. """
  1064. Update a chunk within a document.
  1065. ---
  1066. tags:
  1067. - Chunks
  1068. security:
  1069. - ApiKeyAuth: []
  1070. parameters:
  1071. - in: path
  1072. name: dataset_id
  1073. type: string
  1074. required: true
  1075. description: ID of the dataset.
  1076. - in: path
  1077. name: document_id
  1078. type: string
  1079. required: true
  1080. description: ID of the document.
  1081. - in: path
  1082. name: chunk_id
  1083. type: string
  1084. required: true
  1085. description: ID of the chunk to update.
  1086. - in: body
  1087. name: body
  1088. description: Chunk update parameters.
  1089. required: true
  1090. schema:
  1091. type: object
  1092. properties:
  1093. content:
  1094. type: string
  1095. description: Updated content of the chunk.
  1096. important_keywords:
  1097. type: array
  1098. items:
  1099. type: string
  1100. description: Updated important keywords.
  1101. available:
  1102. type: boolean
  1103. description: Availability status of the chunk.
  1104. - in: header
  1105. name: Authorization
  1106. type: string
  1107. required: true
  1108. description: Bearer token for authentication.
  1109. responses:
  1110. 200:
  1111. description: Chunk updated successfully.
  1112. schema:
  1113. type: object
  1114. """
  1115. chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant_id), [dataset_id])
  1116. if chunk is None:
  1117. return get_error_data_result(f"Can't find this chunk {chunk_id}")
  1118. if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
  1119. return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
  1120. doc = DocumentService.query(id=document_id, kb_id=dataset_id)
  1121. if not doc:
  1122. return get_error_data_result(
  1123. message=f"You don't own the document {document_id}."
  1124. )
  1125. doc = doc[0]
  1126. req = request.json
  1127. if "content" in req:
  1128. content = req["content"]
  1129. else:
  1130. content = chunk.get("content_with_weight", "")
  1131. d = {"id": chunk_id, "content_with_weight": content}
  1132. d["content_ltks"] = rag_tokenizer.tokenize(d["content_with_weight"])
  1133. d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
  1134. if "important_keywords" in req:
  1135. if not isinstance(req["important_keywords"], list):
  1136. return get_error_data_result("`important_keywords` should be a list")
  1137. d["important_kwd"] = req.get("important_keywords", [])
  1138. d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
  1139. if "questions" in req:
  1140. if not isinstance(req["questions"], list):
  1141. return get_error_data_result("`questions` should be a list")
  1142. d["question_kwd"] = req.get("questions")
  1143. d["question_tks"] = rag_tokenizer.tokenize("\n".join(req["questions"]))
  1144. if "available" in req:
  1145. d["available_int"] = int(req["available"])
  1146. embd_id = DocumentService.get_embd_id(document_id)
  1147. embd_mdl = TenantLLMService.model_instance(
  1148. tenant_id, LLMType.EMBEDDING.value, embd_id
  1149. )
  1150. if doc.parser_id == ParserType.QA:
  1151. arr = [t for t in re.split(r"[\n\t]", d["content_with_weight"]) if len(t) > 1]
  1152. if len(arr) != 2:
  1153. return get_error_data_result(
  1154. message="Q&A must be separated by TAB/ENTER key."
  1155. )
  1156. q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
  1157. d = beAdoc(
  1158. d, arr[0], arr[1], not any([rag_tokenizer.is_chinese(t) for t in q + a])
  1159. )
  1160. v, c = embd_mdl.encode([doc.name, d["content_with_weight"] if not d.get("question_kwd") else "\n".join(d["question_kwd"])])
  1161. v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
  1162. d["q_%d_vec" % len(v)] = v.tolist()
  1163. settings.docStoreConn.update({"id": chunk_id}, d, search.index_name(tenant_id), dataset_id)
  1164. return get_result()
  1165. @manager.route("/retrieval", methods=["POST"]) # noqa: F821
  1166. @token_required
  1167. def retrieval_test(tenant_id):
  1168. """
  1169. Retrieve chunks based on a query.
  1170. ---
  1171. tags:
  1172. - Retrieval
  1173. security:
  1174. - ApiKeyAuth: []
  1175. parameters:
  1176. - in: body
  1177. name: body
  1178. description: Retrieval parameters.
  1179. required: true
  1180. schema:
  1181. type: object
  1182. properties:
  1183. dataset_ids:
  1184. type: array
  1185. items:
  1186. type: string
  1187. required: true
  1188. description: List of dataset IDs to search in.
  1189. question:
  1190. type: string
  1191. required: true
  1192. description: Query string.
  1193. document_ids:
  1194. type: array
  1195. items:
  1196. type: string
  1197. description: List of document IDs to filter.
  1198. similarity_threshold:
  1199. type: number
  1200. format: float
  1201. description: Similarity threshold.
  1202. vector_similarity_weight:
  1203. type: number
  1204. format: float
  1205. description: Vector similarity weight.
  1206. top_k:
  1207. type: integer
  1208. description: Maximum number of chunks to return.
  1209. highlight:
  1210. type: boolean
  1211. description: Whether to highlight matched content.
  1212. - in: header
  1213. name: Authorization
  1214. type: string
  1215. required: true
  1216. description: Bearer token for authentication.
  1217. responses:
  1218. 200:
  1219. description: Retrieval results.
  1220. schema:
  1221. type: object
  1222. properties:
  1223. chunks:
  1224. type: array
  1225. items:
  1226. type: object
  1227. properties:
  1228. id:
  1229. type: string
  1230. description: Chunk ID.
  1231. content:
  1232. type: string
  1233. description: Chunk content.
  1234. document_id:
  1235. type: string
  1236. description: ID of the document.
  1237. dataset_id:
  1238. type: string
  1239. description: ID of the dataset.
  1240. similarity:
  1241. type: number
  1242. format: float
  1243. description: Similarity score.
  1244. """
  1245. req = request.json
  1246. if not req.get("dataset_ids"):
  1247. return get_error_data_result("`dataset_ids` is required.")
  1248. kb_ids = req["dataset_ids"]
  1249. if not isinstance(kb_ids, list):
  1250. return get_error_data_result("`dataset_ids` should be a list")
  1251. kbs = KnowledgebaseService.get_by_ids(kb_ids)
  1252. for id in kb_ids:
  1253. if not KnowledgebaseService.accessible(kb_id=id, user_id=tenant_id):
  1254. return get_error_data_result(f"You don't own the dataset {id}.")
  1255. embd_nms = list(set([kb.embd_id for kb in kbs]))
  1256. if len(embd_nms) != 1:
  1257. return get_result(
  1258. message='Datasets use different embedding models."',
  1259. code=settings.RetCode.AUTHENTICATION_ERROR,
  1260. )
  1261. if "question" not in req:
  1262. return get_error_data_result("`question` is required.")
  1263. page = int(req.get("page", 1))
  1264. size = int(req.get("page_size", 30))
  1265. question = req["question"]
  1266. doc_ids = req.get("document_ids", [])
  1267. if not isinstance(doc_ids, list):
  1268. return get_error_data_result("`documents` should be a list")
  1269. doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids)
  1270. for doc_id in doc_ids:
  1271. if doc_id not in doc_ids_list:
  1272. return get_error_data_result(
  1273. f"The datasets don't own the document {doc_id}"
  1274. )
  1275. similarity_threshold = float(req.get("similarity_threshold", 0.2))
  1276. vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
  1277. top = int(req.get("top_k", 1024))
  1278. if req.get("highlight") == "False" or req.get("highlight") == "false":
  1279. highlight = False
  1280. else:
  1281. highlight = True
  1282. try:
  1283. e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
  1284. if not e:
  1285. return get_error_data_result(message="Dataset not found!")
  1286. embd_mdl = TenantLLMService.model_instance(
  1287. kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id
  1288. )
  1289. rerank_mdl = None
  1290. if req.get("rerank_id"):
  1291. rerank_mdl = TenantLLMService.model_instance(
  1292. kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]
  1293. )
  1294. if req.get("keyword", False):
  1295. chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
  1296. question += keyword_extraction(chat_mdl, question)
  1297. retr = settings.retrievaler if kb.parser_id != ParserType.KG else settings.kg_retrievaler
  1298. ranks = retr.retrieval(
  1299. question,
  1300. embd_mdl,
  1301. kb.tenant_id,
  1302. kb_ids,
  1303. page,
  1304. size,
  1305. similarity_threshold,
  1306. vector_similarity_weight,
  1307. top,
  1308. doc_ids,
  1309. rerank_mdl=rerank_mdl,
  1310. highlight=highlight,
  1311. )
  1312. for c in ranks["chunks"]:
  1313. c.pop("vector", None)
  1314. ##rename keys
  1315. renamed_chunks = []
  1316. for chunk in ranks["chunks"]:
  1317. key_mapping = {
  1318. "chunk_id": "id",
  1319. "content_with_weight": "content",
  1320. "doc_id": "document_id",
  1321. "important_kwd": "important_keywords",
  1322. "question_kwd": "questions",
  1323. "docnm_kwd": "document_keyword",
  1324. }
  1325. rename_chunk = {}
  1326. for key, value in chunk.items():
  1327. new_key = key_mapping.get(key, key)
  1328. rename_chunk[new_key] = value
  1329. renamed_chunks.append(rename_chunk)
  1330. ranks["chunks"] = renamed_chunks
  1331. return get_result(data=ranks)
  1332. except Exception as e:
  1333. if str(e).find("not_found") > 0:
  1334. return get_result(
  1335. message="No chunk found! Check the chunk status please!",
  1336. code=settings.RetCode.DATA_ERROR,
  1337. )
  1338. return server_error_response(e)