Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

metadata_service.py 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. import copy
  2. import datetime
  3. import logging
  4. from typing import Optional
  5. from flask_login import current_user
  6. from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
  7. from extensions.ext_database import db
  8. from extensions.ext_redis import redis_client
  9. from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
  10. from services.dataset_service import DocumentService
  11. from services.entities.knowledge_entities.knowledge_entities import (
  12. MetadataArgs,
  13. MetadataOperationData,
  14. )
  15. class MetadataService:
  16. @staticmethod
  17. def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata:
  18. # check if metadata name already exists
  19. if (
  20. db.session.query(DatasetMetadata)
  21. .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
  22. .first()
  23. ):
  24. raise ValueError("Metadata name already exists.")
  25. for field in BuiltInField:
  26. if field.value == metadata_args.name:
  27. raise ValueError("Metadata name already exists in Built-in fields.")
  28. metadata = DatasetMetadata(
  29. tenant_id=current_user.current_tenant_id,
  30. dataset_id=dataset_id,
  31. type=metadata_args.type,
  32. name=metadata_args.name,
  33. created_by=current_user.id,
  34. )
  35. db.session.add(metadata)
  36. db.session.commit()
  37. return metadata
  38. @staticmethod
  39. def update_metadata_name(dataset_id: str, metadata_id: str, name: str) -> DatasetMetadata: # type: ignore
  40. lock_key = f"dataset_metadata_lock_{dataset_id}"
  41. # check if metadata name already exists
  42. if (
  43. db.session.query(DatasetMetadata)
  44. .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=name)
  45. .first()
  46. ):
  47. raise ValueError("Metadata name already exists.")
  48. for field in BuiltInField:
  49. if field.value == name:
  50. raise ValueError("Metadata name already exists in Built-in fields.")
  51. try:
  52. MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
  53. metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
  54. if metadata is None:
  55. raise ValueError("Metadata not found.")
  56. old_name = metadata.name
  57. metadata.name = name
  58. metadata.updated_by = current_user.id
  59. metadata.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
  60. # update related documents
  61. dataset_metadata_bindings = (
  62. db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
  63. )
  64. if dataset_metadata_bindings:
  65. document_ids = [binding.document_id for binding in dataset_metadata_bindings]
  66. documents = DocumentService.get_document_by_ids(document_ids)
  67. for document in documents:
  68. doc_metadata = copy.deepcopy(document.doc_metadata)
  69. value = doc_metadata.pop(old_name, None)
  70. doc_metadata[name] = value
  71. document.doc_metadata = doc_metadata
  72. db.session.add(document)
  73. db.session.commit()
  74. return metadata # type: ignore
  75. except Exception:
  76. logging.exception("Update metadata name failed")
  77. finally:
  78. redis_client.delete(lock_key)
  79. @staticmethod
  80. def delete_metadata(dataset_id: str, metadata_id: str):
  81. lock_key = f"dataset_metadata_lock_{dataset_id}"
  82. try:
  83. MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
  84. metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
  85. if metadata is None:
  86. raise ValueError("Metadata not found.")
  87. db.session.delete(metadata)
  88. # deal related documents
  89. dataset_metadata_bindings = (
  90. db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
  91. )
  92. if dataset_metadata_bindings:
  93. document_ids = [binding.document_id for binding in dataset_metadata_bindings]
  94. documents = DocumentService.get_document_by_ids(document_ids)
  95. for document in documents:
  96. doc_metadata = copy.deepcopy(document.doc_metadata)
  97. doc_metadata.pop(metadata.name, None)
  98. document.doc_metadata = doc_metadata
  99. db.session.add(document)
  100. db.session.commit()
  101. return metadata
  102. except Exception:
  103. logging.exception("Delete metadata failed")
  104. finally:
  105. redis_client.delete(lock_key)
  106. @staticmethod
  107. def get_built_in_fields():
  108. return [
  109. {"name": BuiltInField.document_name.value, "type": "string"},
  110. {"name": BuiltInField.uploader.value, "type": "string"},
  111. {"name": BuiltInField.upload_date.value, "type": "time"},
  112. {"name": BuiltInField.last_update_date.value, "type": "time"},
  113. {"name": BuiltInField.source.value, "type": "string"},
  114. ]
  115. @staticmethod
  116. def enable_built_in_field(dataset: Dataset):
  117. if dataset.built_in_field_enabled:
  118. return
  119. lock_key = f"dataset_metadata_lock_{dataset.id}"
  120. try:
  121. MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
  122. dataset.built_in_field_enabled = True
  123. db.session.add(dataset)
  124. documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
  125. if documents:
  126. for document in documents:
  127. if not document.doc_metadata:
  128. doc_metadata = {}
  129. else:
  130. doc_metadata = copy.deepcopy(document.doc_metadata)
  131. doc_metadata[BuiltInField.document_name.value] = document.name
  132. doc_metadata[BuiltInField.uploader.value] = document.uploader
  133. doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
  134. doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
  135. doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
  136. document.doc_metadata = doc_metadata
  137. db.session.add(document)
  138. db.session.commit()
  139. except Exception:
  140. logging.exception("Enable built-in field failed")
  141. finally:
  142. redis_client.delete(lock_key)
  143. @staticmethod
  144. def disable_built_in_field(dataset: Dataset):
  145. if not dataset.built_in_field_enabled:
  146. return
  147. lock_key = f"dataset_metadata_lock_{dataset.id}"
  148. try:
  149. MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
  150. dataset.built_in_field_enabled = False
  151. db.session.add(dataset)
  152. documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
  153. document_ids = []
  154. if documents:
  155. for document in documents:
  156. doc_metadata = copy.deepcopy(document.doc_metadata)
  157. doc_metadata.pop(BuiltInField.document_name.value, None)
  158. doc_metadata.pop(BuiltInField.uploader.value, None)
  159. doc_metadata.pop(BuiltInField.upload_date.value, None)
  160. doc_metadata.pop(BuiltInField.last_update_date.value, None)
  161. doc_metadata.pop(BuiltInField.source.value, None)
  162. document.doc_metadata = doc_metadata
  163. db.session.add(document)
  164. document_ids.append(document.id)
  165. db.session.commit()
  166. except Exception:
  167. logging.exception("Disable built-in field failed")
  168. finally:
  169. redis_client.delete(lock_key)
  170. @staticmethod
  171. def update_documents_metadata(dataset: Dataset, metadata_args: MetadataOperationData):
  172. for operation in metadata_args.operation_data:
  173. lock_key = f"document_metadata_lock_{operation.document_id}"
  174. try:
  175. MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
  176. document = DocumentService.get_document(dataset.id, operation.document_id)
  177. if document is None:
  178. raise ValueError("Document not found.")
  179. doc_metadata = {}
  180. for metadata_value in operation.metadata_list:
  181. doc_metadata[metadata_value.name] = metadata_value.value
  182. if dataset.built_in_field_enabled:
  183. doc_metadata[BuiltInField.document_name.value] = document.name
  184. doc_metadata[BuiltInField.uploader.value] = document.uploader
  185. doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
  186. doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
  187. doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
  188. document.doc_metadata = doc_metadata
  189. db.session.add(document)
  190. db.session.commit()
  191. # deal metadata binding
  192. db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
  193. for metadata_value in operation.metadata_list:
  194. dataset_metadata_binding = DatasetMetadataBinding(
  195. tenant_id=current_user.current_tenant_id,
  196. dataset_id=dataset.id,
  197. document_id=operation.document_id,
  198. metadata_id=metadata_value.id,
  199. created_by=current_user.id,
  200. )
  201. db.session.add(dataset_metadata_binding)
  202. db.session.commit()
  203. except Exception:
  204. logging.exception("Update documents metadata failed")
  205. finally:
  206. redis_client.delete(lock_key)
  207. @staticmethod
  208. def knowledge_base_metadata_lock_check(dataset_id: Optional[str], document_id: Optional[str]):
  209. if dataset_id:
  210. lock_key = f"dataset_metadata_lock_{dataset_id}"
  211. if redis_client.get(lock_key):
  212. raise ValueError("Another knowledge base metadata operation is running, please wait a moment.")
  213. redis_client.set(lock_key, 1, ex=3600)
  214. if document_id:
  215. lock_key = f"document_metadata_lock_{document_id}"
  216. if redis_client.get(lock_key):
  217. raise ValueError("Another document metadata operation is running, please wait a moment.")
  218. redis_client.set(lock_key, 1, ex=3600)
  219. @staticmethod
  220. def get_dataset_metadatas(dataset: Dataset):
  221. return {
  222. "doc_metadata": [
  223. {
  224. "id": item.get("id"),
  225. "name": item.get("name"),
  226. "type": item.get("type"),
  227. "count": db.session.query(DatasetMetadataBinding)
  228. .filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
  229. .count(),
  230. }
  231. for item in dataset.doc_metadata or []
  232. if item.get("id") != "built-in"
  233. ],
  234. "built_in_field_enabled": dataset.built_in_field_enabled,
  235. }