Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

doc.py 7.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. from io import BytesIO
  2. from flask import request,send_file
  3. from api.utils.api_utils import get_json_result, construct_json_result, server_error_response
  4. from api.utils.api_utils import get_json_result, token_required, get_data_error_result
  5. from api.db import FileType, ParserType, FileSource, TaskStatus
  6. from api.db.db_models import File
  7. from api.db.services.document_service import DocumentService
  8. from api.db.services.file2document_service import File2DocumentService
  9. from api.db.services.file_service import FileService
  10. from api.db.services.knowledgebase_service import KnowledgebaseService
  11. from api.db.services.user_service import TenantService, UserTenantService
  12. from api.settings import RetCode
  13. from api.utils.api_utils import construct_json_result, construct_error_response
  14. from rag.utils.storage_factory import STORAGE_IMPL
  15. @manager.route('/dataset/<dataset_id>/documents/upload', methods=['POST'])
  16. @token_required
  17. def upload(dataset_id, tenant_id):
  18. if 'file' not in request.files:
  19. return get_json_result(
  20. data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
  21. file_objs = request.files.getlist('file')
  22. for file_obj in file_objs:
  23. if file_obj.filename == '':
  24. return get_json_result(
  25. data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
  26. e, kb = KnowledgebaseService.get_by_id(dataset_id)
  27. if not e:
  28. raise LookupError(f"Can't find the knowledgebase with ID {dataset_id}!")
  29. err, _ = FileService.upload_document(kb, file_objs, tenant_id)
  30. if err:
  31. return get_json_result(
  32. data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
  33. return get_json_result(data=True)
  34. @manager.route('/infos', methods=['GET'])
  35. @token_required
  36. def docinfos(tenant_id):
  37. req = request.args
  38. if "id" in req:
  39. doc_id = req["id"]
  40. e, doc = DocumentService.get_by_id(doc_id)
  41. return get_json_result(data=doc.to_json())
  42. if "name" in req:
  43. doc_name = req["name"]
  44. doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
  45. e, doc = DocumentService.get_by_id(doc_id)
  46. return get_json_result(data=doc.to_json())
  47. @manager.route('/save', methods=['POST'])
  48. @token_required
  49. def save_doc(tenant_id):
  50. req = request.json # Expecting JSON input
  51. if "id" in req:
  52. doc_id = req["id"]
  53. if "name" in req:
  54. doc_name = req["name"]
  55. doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
  56. data = request.json
  57. # Call the update method with the provided id and data
  58. try:
  59. num = DocumentService.update_by_id(doc_id, data)
  60. if num > 0:
  61. return get_json_result(retmsg="success", data={"updated_count": num})
  62. else:
  63. return get_json_result(retcode=404, retmsg="Document not found")
  64. except Exception as e:
  65. return get_json_result(retmsg=f"Error occurred: {str(e)}")
  66. @manager.route("/<dataset_id>/documents/<document_id>", methods=["GET"])
  67. @token_required
  68. def download_document(dataset_id, document_id):
  69. try:
  70. # Check whether there is this dataset
  71. exist, _ = KnowledgebaseService.get_by_id(dataset_id)
  72. if not exist:
  73. return construct_json_result(code=RetCode.DATA_ERROR,
  74. message=f"This dataset '{dataset_id}' cannot be found!")
  75. # Check whether there is this document
  76. exist, document = DocumentService.get_by_id(document_id)
  77. if not exist:
  78. return construct_json_result(message=f"This document '{document_id}' cannot be found!",
  79. code=RetCode.ARGUMENT_ERROR)
  80. # The process of downloading
  81. doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id) # minio address
  82. file_stream = STORAGE_IMPL.get(doc_id, doc_location)
  83. if not file_stream:
  84. return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
  85. file = BytesIO(file_stream)
  86. # Use send_file with a proper filename and MIME type
  87. return send_file(
  88. file,
  89. as_attachment=True,
  90. download_name=document.name,
  91. mimetype='application/octet-stream' # Set a default MIME type
  92. )
  93. # Error
  94. except Exception as e:
  95. return construct_error_response(e)
  96. @manager.route('/dataset/<dataset_id>/documents', methods=['GET'])
  97. @token_required
  98. def list_docs(dataset_id,tenant_id):
  99. kb_id = request.args.get("kb_id")
  100. if not kb_id:
  101. return get_json_result(
  102. data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR)
  103. tenants = UserTenantService.query(user_id=tenant_id)
  104. for tenant in tenants:
  105. if KnowledgebaseService.query(
  106. tenant_id=tenant.tenant_id, id=kb_id):
  107. break
  108. else:
  109. return get_json_result(
  110. data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.',
  111. retcode=RetCode.OPERATING_ERROR)
  112. keywords = request.args.get("keywords", "")
  113. page_number = int(request.args.get("page", 1))
  114. items_per_page = int(request.args.get("page_size", 15))
  115. orderby = request.args.get("orderby", "create_time")
  116. desc = request.args.get("desc", True)
  117. try:
  118. docs, tol = DocumentService.get_by_kb_id(
  119. kb_id, page_number, items_per_page, orderby, desc, keywords)
  120. return get_json_result(data={"total": tol, "docs": docs})
  121. except Exception as e:
  122. return server_error_response(e)
  123. @manager.route('/delete', methods=['DELETE'])
  124. @token_required
  125. def rm(tenant_id):
  126. req = request.args
  127. if "doc_id" not in req:
  128. return get_data_error_result(
  129. retmsg="doc_id is required")
  130. doc_ids = req["doc_id"]
  131. if isinstance(doc_ids, str): doc_ids = [doc_ids]
  132. root_folder = FileService.get_root_folder(tenant_id)
  133. pf_id = root_folder["id"]
  134. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  135. errors = ""
  136. for doc_id in doc_ids:
  137. try:
  138. e, doc = DocumentService.get_by_id(doc_id)
  139. if not e:
  140. return get_data_error_result(retmsg="Document not found!")
  141. tenant_id = DocumentService.get_tenant_id(doc_id)
  142. if not tenant_id:
  143. return get_data_error_result(retmsg="Tenant not found!")
  144. b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
  145. if not DocumentService.remove_document(doc, tenant_id):
  146. return get_data_error_result(
  147. retmsg="Database error (Document removal)!")
  148. f2d = File2DocumentService.get_by_document_id(doc_id)
  149. FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
  150. File2DocumentService.delete_by_document_id(doc_id)
  151. STORAGE_IMPL.rm(b, n)
  152. except Exception as e:
  153. errors += str(e)
  154. if errors:
  155. return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
  156. return get_json_result(data=True,retmsg="success")