You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. import copy
  2. import logging
  3. from typing import Optional
  4. from flask_login import current_user
  5. from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
  6. from extensions.ext_database import db
  7. from extensions.ext_redis import redis_client
  8. from libs.datetime_utils import naive_utc_now
  9. from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
  10. from services.dataset_service import DocumentService
  11. from services.entities.knowledge_entities.knowledge_entities import (
  12. MetadataArgs,
  13. MetadataOperationData,
  14. )
  15. logger = logging.getLogger(__name__)
  16. class MetadataService:
  17. @staticmethod
  18. def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata:
  19. # check if metadata name is too long
  20. if len(metadata_args.name) > 255:
  21. raise ValueError("Metadata name cannot exceed 255 characters.")
  22. # check if metadata name already exists
  23. if (
  24. db.session.query(DatasetMetadata)
  25. .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
  26. .first()
  27. ):
  28. raise ValueError("Metadata name already exists.")
  29. for field in BuiltInField:
  30. if field.value == metadata_args.name:
  31. raise ValueError("Metadata name already exists in Built-in fields.")
  32. metadata = DatasetMetadata(
  33. tenant_id=current_user.current_tenant_id,
  34. dataset_id=dataset_id,
  35. type=metadata_args.type,
  36. name=metadata_args.name,
  37. created_by=current_user.id,
  38. )
  39. db.session.add(metadata)
  40. db.session.commit()
  41. return metadata
  42. @staticmethod
  43. def update_metadata_name(dataset_id: str, metadata_id: str, name: str) -> DatasetMetadata: # type: ignore
  44. # check if metadata name is too long
  45. if len(name) > 255:
  46. raise ValueError("Metadata name cannot exceed 255 characters.")
  47. lock_key = f"dataset_metadata_lock_{dataset_id}"
  48. # check if metadata name already exists
  49. if (
  50. db.session.query(DatasetMetadata)
  51. .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=name)
  52. .first()
  53. ):
  54. raise ValueError("Metadata name already exists.")
  55. for field in BuiltInField:
  56. if field.value == name:
  57. raise ValueError("Metadata name already exists in Built-in fields.")
  58. try:
  59. MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
  60. metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
  61. if metadata is None:
  62. raise ValueError("Metadata not found.")
  63. old_name = metadata.name
  64. metadata.name = name
  65. metadata.updated_by = current_user.id
  66. metadata.updated_at = naive_utc_now()
  67. # update related documents
  68. dataset_metadata_bindings = (
  69. db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
  70. )
  71. if dataset_metadata_bindings:
  72. document_ids = [binding.document_id for binding in dataset_metadata_bindings]
  73. documents = DocumentService.get_document_by_ids(document_ids)
  74. for document in documents:
  75. if not document.doc_metadata:
  76. doc_metadata = {}
  77. else:
  78. doc_metadata = copy.deepcopy(document.doc_metadata)
  79. value = doc_metadata.pop(old_name, None)
  80. doc_metadata[name] = value
  81. document.doc_metadata = doc_metadata
  82. db.session.add(document)
  83. db.session.commit()
  84. return metadata # type: ignore
  85. except Exception:
  86. logger.exception("Update metadata name failed")
  87. finally:
  88. redis_client.delete(lock_key)
  89. @staticmethod
  90. def delete_metadata(dataset_id: str, metadata_id: str):
  91. lock_key = f"dataset_metadata_lock_{dataset_id}"
  92. try:
  93. MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
  94. metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
  95. if metadata is None:
  96. raise ValueError("Metadata not found.")
  97. db.session.delete(metadata)
  98. # deal related documents
  99. dataset_metadata_bindings = (
  100. db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
  101. )
  102. if dataset_metadata_bindings:
  103. document_ids = [binding.document_id for binding in dataset_metadata_bindings]
  104. documents = DocumentService.get_document_by_ids(document_ids)
  105. for document in documents:
  106. if not document.doc_metadata:
  107. doc_metadata = {}
  108. else:
  109. doc_metadata = copy.deepcopy(document.doc_metadata)
  110. doc_metadata.pop(metadata.name, None)
  111. document.doc_metadata = doc_metadata
  112. db.session.add(document)
  113. db.session.commit()
  114. return metadata
  115. except Exception:
  116. logger.exception("Delete metadata failed")
  117. finally:
  118. redis_client.delete(lock_key)
  119. @staticmethod
  120. def get_built_in_fields():
  121. return [
  122. {"name": BuiltInField.document_name.value, "type": "string"},
  123. {"name": BuiltInField.uploader.value, "type": "string"},
  124. {"name": BuiltInField.upload_date.value, "type": "time"},
  125. {"name": BuiltInField.last_update_date.value, "type": "time"},
  126. {"name": BuiltInField.source.value, "type": "string"},
  127. ]
  128. @staticmethod
  129. def enable_built_in_field(dataset: Dataset):
  130. if dataset.built_in_field_enabled:
  131. return
  132. lock_key = f"dataset_metadata_lock_{dataset.id}"
  133. try:
  134. MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
  135. db.session.add(dataset)
  136. documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
  137. if documents:
  138. for document in documents:
  139. if not document.doc_metadata:
  140. doc_metadata = {}
  141. else:
  142. doc_metadata = copy.deepcopy(document.doc_metadata)
  143. doc_metadata[BuiltInField.document_name.value] = document.name
  144. doc_metadata[BuiltInField.uploader.value] = document.uploader
  145. doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
  146. doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
  147. doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
  148. document.doc_metadata = doc_metadata
  149. db.session.add(document)
  150. dataset.built_in_field_enabled = True
  151. db.session.commit()
  152. except Exception:
  153. logger.exception("Enable built-in field failed")
  154. finally:
  155. redis_client.delete(lock_key)
  156. @staticmethod
  157. def disable_built_in_field(dataset: Dataset):
  158. if not dataset.built_in_field_enabled:
  159. return
  160. lock_key = f"dataset_metadata_lock_{dataset.id}"
  161. try:
  162. MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
  163. db.session.add(dataset)
  164. documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
  165. document_ids = []
  166. if documents:
  167. for document in documents:
  168. if not document.doc_metadata:
  169. doc_metadata = {}
  170. else:
  171. doc_metadata = copy.deepcopy(document.doc_metadata)
  172. doc_metadata.pop(BuiltInField.document_name.value, None)
  173. doc_metadata.pop(BuiltInField.uploader.value, None)
  174. doc_metadata.pop(BuiltInField.upload_date.value, None)
  175. doc_metadata.pop(BuiltInField.last_update_date.value, None)
  176. doc_metadata.pop(BuiltInField.source.value, None)
  177. document.doc_metadata = doc_metadata
  178. db.session.add(document)
  179. document_ids.append(document.id)
  180. dataset.built_in_field_enabled = False
  181. db.session.commit()
  182. except Exception:
  183. logger.exception("Disable built-in field failed")
  184. finally:
  185. redis_client.delete(lock_key)
  186. @staticmethod
  187. def update_documents_metadata(dataset: Dataset, metadata_args: MetadataOperationData):
  188. for operation in metadata_args.operation_data:
  189. lock_key = f"document_metadata_lock_{operation.document_id}"
  190. try:
  191. MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
  192. document = DocumentService.get_document(dataset.id, operation.document_id)
  193. if document is None:
  194. raise ValueError("Document not found.")
  195. doc_metadata = {}
  196. for metadata_value in operation.metadata_list:
  197. doc_metadata[metadata_value.name] = metadata_value.value
  198. if dataset.built_in_field_enabled:
  199. doc_metadata[BuiltInField.document_name.value] = document.name
  200. doc_metadata[BuiltInField.uploader.value] = document.uploader
  201. doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
  202. doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
  203. doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
  204. document.doc_metadata = doc_metadata
  205. db.session.add(document)
  206. db.session.commit()
  207. # deal metadata binding
  208. db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
  209. for metadata_value in operation.metadata_list:
  210. dataset_metadata_binding = DatasetMetadataBinding(
  211. tenant_id=current_user.current_tenant_id,
  212. dataset_id=dataset.id,
  213. document_id=operation.document_id,
  214. metadata_id=metadata_value.id,
  215. created_by=current_user.id,
  216. )
  217. db.session.add(dataset_metadata_binding)
  218. db.session.commit()
  219. except Exception:
  220. logger.exception("Update documents metadata failed")
  221. finally:
  222. redis_client.delete(lock_key)
  223. @staticmethod
  224. def knowledge_base_metadata_lock_check(dataset_id: Optional[str], document_id: Optional[str]):
  225. if dataset_id:
  226. lock_key = f"dataset_metadata_lock_{dataset_id}"
  227. if redis_client.get(lock_key):
  228. raise ValueError("Another knowledge base metadata operation is running, please wait a moment.")
  229. redis_client.set(lock_key, 1, ex=3600)
  230. if document_id:
  231. lock_key = f"document_metadata_lock_{document_id}"
  232. if redis_client.get(lock_key):
  233. raise ValueError("Another document metadata operation is running, please wait a moment.")
  234. redis_client.set(lock_key, 1, ex=3600)
  235. @staticmethod
  236. def get_dataset_metadatas(dataset: Dataset):
  237. return {
  238. "doc_metadata": [
  239. {
  240. "id": item.get("id"),
  241. "name": item.get("name"),
  242. "type": item.get("type"),
  243. "count": db.session.query(DatasetMetadataBinding)
  244. .filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
  245. .count(),
  246. }
  247. for item in dataset.doc_metadata or []
  248. if item.get("id") != "built-in"
  249. ],
  250. "built_in_field_enabled": dataset.built_in_field_enabled,
  251. }