Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

dataset.py 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from flask import request
  17. from api.db import StatusEnum, FileSource
  18. from api.db.db_models import File
  19. from api.db.services.document_service import DocumentService
  20. from api.db.services.file2document_service import File2DocumentService
  21. from api.db.services.file_service import FileService
  22. from api.db.services.knowledgebase_service import KnowledgebaseService
  23. from api.db.services.user_service import TenantService
  24. from api.settings import RetCode
  25. from api.utils import get_uuid
  26. from api.utils.api_utils import get_json_result, token_required, get_data_error_result
  27. @manager.route('/save', methods=['POST'])
  28. @token_required
  29. def save(tenant_id):
  30. req = request.json
  31. e, t = TenantService.get_by_id(tenant_id)
  32. if "id" not in req:
  33. if "tenant_id" in req or "embd_id" in req:
  34. return get_data_error_result(
  35. retmsg="Tenant_id or embedding_model must not be provided")
  36. if "name" not in req:
  37. return get_data_error_result(
  38. retmsg="Name is not empty!")
  39. req['id'] = get_uuid()
  40. req["name"] = req["name"].strip()
  41. if req["name"] == "":
  42. return get_data_error_result(
  43. retmsg="Name is not empty string!")
  44. if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
  45. return get_data_error_result(
  46. retmsg="Duplicated knowledgebase name in creating dataset.")
  47. req["tenant_id"] = tenant_id
  48. req['created_by'] = tenant_id
  49. req['embd_id'] = t.embd_id
  50. if not KnowledgebaseService.save(**req):
  51. return get_data_error_result(retmsg="Create dataset error.(Database error)")
  52. return get_json_result(data=req)
  53. else:
  54. if "tenant_id" in req:
  55. if req["tenant_id"] != tenant_id:
  56. return get_data_error_result(
  57. retmsg="Can't change tenant_id.")
  58. if "embd_id" in req:
  59. if req["embd_id"] != t.embd_id:
  60. return get_data_error_result(
  61. retmsg="Can't change embedding_model.")
  62. if not KnowledgebaseService.query(
  63. created_by=tenant_id, id=req["id"]):
  64. return get_json_result(
  65. data=False, retmsg='You do not own the dataset.',
  66. retcode=RetCode.OPERATING_ERROR)
  67. e, kb = KnowledgebaseService.get_by_id(req["id"])
  68. if "chunk_num" in req:
  69. if req["chunk_num"] != kb.chunk_num:
  70. return get_data_error_result(
  71. retmsg="Can't change chunk_count.")
  72. if "doc_num" in req:
  73. if req['doc_num'] != kb.doc_num:
  74. return get_data_error_result(
  75. retmsg="Can't change document_count.")
  76. if "parser_id" in req:
  77. if kb.chunk_num > 0 and req['parser_id'] != kb.parser_id:
  78. return get_data_error_result(
  79. retmsg="if chunk count is not 0, parse method is not changable.")
  80. if "name" in req:
  81. if req["name"].lower() != kb.name.lower() \
  82. and len(KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id,
  83. status=StatusEnum.VALID.value)) > 0:
  84. return get_data_error_result(
  85. retmsg="Duplicated knowledgebase name in updating dataset.")
  86. del req["id"]
  87. if not KnowledgebaseService.update_by_id(kb.id, req):
  88. return get_data_error_result(retmsg="Update dataset error.(Database error)")
  89. return get_json_result(data=True)
  90. @manager.route('/delete', methods=['DELETE'])
  91. @token_required
  92. def delete(tenant_id):
  93. req = request.args
  94. kbs = KnowledgebaseService.query(
  95. created_by=tenant_id, id=req["id"])
  96. if not kbs:
  97. return get_json_result(
  98. data=False, retmsg='You do not own the dataset',
  99. retcode=RetCode.OPERATING_ERROR)
  100. for doc in DocumentService.query(kb_id=req["id"]):
  101. if not DocumentService.remove_document(doc, kbs[0].tenant_id):
  102. return get_data_error_result(
  103. retmsg="Remove document error.(Database error)")
  104. f2d = File2DocumentService.get_by_document_id(doc.id)
  105. FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
  106. File2DocumentService.delete_by_document_id(doc.id)
  107. if not KnowledgebaseService.delete_by_id(req["id"]):
  108. return get_data_error_result(
  109. retmsg="Delete dataset error.(Database error)")
  110. return get_json_result(data=True)
  111. @manager.route('/list', methods=['GET'])
  112. @token_required
  113. def list_datasets(tenant_id):
  114. page_number = int(request.args.get("page", 1))
  115. items_per_page = int(request.args.get("page_size", 1024))
  116. orderby = request.args.get("orderby", "create_time")
  117. desc = bool(request.args.get("desc", True))
  118. tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
  119. kbs = KnowledgebaseService.get_by_tenant_ids(
  120. [m["tenant_id"] for m in tenants], tenant_id, page_number, items_per_page, orderby, desc)
  121. return get_json_result(data=kbs)
  122. @manager.route('/detail', methods=['GET'])
  123. @token_required
  124. def detail(tenant_id):
  125. req = request.args
  126. if "id" in req:
  127. id = req["id"]
  128. kb = KnowledgebaseService.query(created_by=tenant_id, id=req["id"])
  129. if not kb:
  130. return get_json_result(
  131. data=False, retmsg='You do not own the dataset',
  132. retcode=RetCode.OPERATING_ERROR)
  133. if "name" in req:
  134. name = req["name"]
  135. if kb[0].name != name:
  136. return get_json_result(
  137. data=False, retmsg='You do not own the dataset',
  138. retcode=RetCode.OPERATING_ERROR)
  139. e, k = KnowledgebaseService.get_by_id(id)
  140. return get_json_result(data=k.to_dict())
  141. else:
  142. if "name" in req:
  143. name = req["name"]
  144. e, k = KnowledgebaseService.get_by_name(kb_name=name, tenant_id=tenant_id)
  145. if not e:
  146. return get_json_result(
  147. data=False, retmsg='You do not own the dataset',
  148. retcode=RetCode.OPERATING_ERROR)
  149. return get_json_result(data=k.to_dict())
  150. else:
  151. return get_data_error_result(
  152. retmsg="At least one of `id` or `name` must be provided.")