Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

metadata_service.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. import copy
  2. import datetime
  3. import logging
  4. from typing import Optional
  5. from flask_login import current_user
  6. from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
  7. from extensions.ext_database import db
  8. from extensions.ext_redis import redis_client
  9. from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
  10. from services.dataset_service import DocumentService
  11. from services.entities.knowledge_entities.knowledge_entities import (
  12. MetadataArgs,
  13. MetadataOperationData,
  14. )
  15. class MetadataService:
  16. @staticmethod
  17. def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata:
  18. # check if metadata name is too long
  19. if len(metadata_args.name) > 255:
  20. raise ValueError("Metadata name cannot exceed 255 characters.")
  21. # check if metadata name already exists
  22. if (
  23. db.session.query(DatasetMetadata)
  24. .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
  25. .first()
  26. ):
  27. raise ValueError("Metadata name already exists.")
  28. for field in BuiltInField:
  29. if field.value == metadata_args.name:
  30. raise ValueError("Metadata name already exists in Built-in fields.")
  31. metadata = DatasetMetadata(
  32. tenant_id=current_user.current_tenant_id,
  33. dataset_id=dataset_id,
  34. type=metadata_args.type,
  35. name=metadata_args.name,
  36. created_by=current_user.id,
  37. )
  38. db.session.add(metadata)
  39. db.session.commit()
  40. return metadata
  41. @staticmethod
  42. def update_metadata_name(dataset_id: str, metadata_id: str, name: str) -> DatasetMetadata: # type: ignore
  43. # check if metadata name is too long
  44. if len(name) > 255:
  45. raise ValueError("Metadata name cannot exceed 255 characters.")
  46. lock_key = f"dataset_metadata_lock_{dataset_id}"
  47. # check if metadata name already exists
  48. if (
  49. db.session.query(DatasetMetadata)
  50. .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=name)
  51. .first()
  52. ):
  53. raise ValueError("Metadata name already exists.")
  54. for field in BuiltInField:
  55. if field.value == name:
  56. raise ValueError("Metadata name already exists in Built-in fields.")
  57. try:
  58. MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
  59. metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
  60. if metadata is None:
  61. raise ValueError("Metadata not found.")
  62. old_name = metadata.name
  63. metadata.name = name
  64. metadata.updated_by = current_user.id
  65. metadata.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
  66. # update related documents
  67. dataset_metadata_bindings = (
  68. db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
  69. )
  70. if dataset_metadata_bindings:
  71. document_ids = [binding.document_id for binding in dataset_metadata_bindings]
  72. documents = DocumentService.get_document_by_ids(document_ids)
  73. for document in documents:
  74. if not document.doc_metadata:
  75. doc_metadata = {}
  76. else:
  77. doc_metadata = copy.deepcopy(document.doc_metadata)
  78. value = doc_metadata.pop(old_name, None)
  79. doc_metadata[name] = value
  80. document.doc_metadata = doc_metadata
  81. db.session.add(document)
  82. db.session.commit()
  83. return metadata # type: ignore
  84. except Exception:
  85. logging.exception("Update metadata name failed")
  86. finally:
  87. redis_client.delete(lock_key)
  88. @staticmethod
  89. def delete_metadata(dataset_id: str, metadata_id: str):
  90. lock_key = f"dataset_metadata_lock_{dataset_id}"
  91. try:
  92. MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
  93. metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
  94. if metadata is None:
  95. raise ValueError("Metadata not found.")
  96. db.session.delete(metadata)
  97. # deal related documents
  98. dataset_metadata_bindings = (
  99. db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
  100. )
  101. if dataset_metadata_bindings:
  102. document_ids = [binding.document_id for binding in dataset_metadata_bindings]
  103. documents = DocumentService.get_document_by_ids(document_ids)
  104. for document in documents:
  105. if not document.doc_metadata:
  106. doc_metadata = {}
  107. else:
  108. doc_metadata = copy.deepcopy(document.doc_metadata)
  109. doc_metadata.pop(metadata.name, None)
  110. document.doc_metadata = doc_metadata
  111. db.session.add(document)
  112. db.session.commit()
  113. return metadata
  114. except Exception:
  115. logging.exception("Delete metadata failed")
  116. finally:
  117. redis_client.delete(lock_key)
  118. @staticmethod
  119. def get_built_in_fields():
  120. return [
  121. {"name": BuiltInField.document_name.value, "type": "string"},
  122. {"name": BuiltInField.uploader.value, "type": "string"},
  123. {"name": BuiltInField.upload_date.value, "type": "time"},
  124. {"name": BuiltInField.last_update_date.value, "type": "time"},
  125. {"name": BuiltInField.source.value, "type": "string"},
  126. ]
  127. @staticmethod
  128. def enable_built_in_field(dataset: Dataset):
  129. if dataset.built_in_field_enabled:
  130. return
  131. lock_key = f"dataset_metadata_lock_{dataset.id}"
  132. try:
  133. MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
  134. db.session.add(dataset)
  135. documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
  136. if documents:
  137. for document in documents:
  138. if not document.doc_metadata:
  139. doc_metadata = {}
  140. else:
  141. doc_metadata = copy.deepcopy(document.doc_metadata)
  142. doc_metadata[BuiltInField.document_name.value] = document.name
  143. doc_metadata[BuiltInField.uploader.value] = document.uploader
  144. doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
  145. doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
  146. doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
  147. document.doc_metadata = doc_metadata
  148. db.session.add(document)
  149. dataset.built_in_field_enabled = True
  150. db.session.commit()
  151. except Exception:
  152. logging.exception("Enable built-in field failed")
  153. finally:
  154. redis_client.delete(lock_key)
  155. @staticmethod
  156. def disable_built_in_field(dataset: Dataset):
  157. if not dataset.built_in_field_enabled:
  158. return
  159. lock_key = f"dataset_metadata_lock_{dataset.id}"
  160. try:
  161. MetadataService.knowledge_base_metadata_lock_check(dataset.id, None)
  162. db.session.add(dataset)
  163. documents = DocumentService.get_working_documents_by_dataset_id(dataset.id)
  164. document_ids = []
  165. if documents:
  166. for document in documents:
  167. if not document.doc_metadata:
  168. doc_metadata = {}
  169. else:
  170. doc_metadata = copy.deepcopy(document.doc_metadata)
  171. doc_metadata.pop(BuiltInField.document_name.value, None)
  172. doc_metadata.pop(BuiltInField.uploader.value, None)
  173. doc_metadata.pop(BuiltInField.upload_date.value, None)
  174. doc_metadata.pop(BuiltInField.last_update_date.value, None)
  175. doc_metadata.pop(BuiltInField.source.value, None)
  176. document.doc_metadata = doc_metadata
  177. db.session.add(document)
  178. document_ids.append(document.id)
  179. dataset.built_in_field_enabled = False
  180. db.session.commit()
  181. except Exception:
  182. logging.exception("Disable built-in field failed")
  183. finally:
  184. redis_client.delete(lock_key)
  185. @staticmethod
  186. def update_documents_metadata(dataset: Dataset, metadata_args: MetadataOperationData):
  187. for operation in metadata_args.operation_data:
  188. lock_key = f"document_metadata_lock_{operation.document_id}"
  189. try:
  190. MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
  191. document = DocumentService.get_document(dataset.id, operation.document_id)
  192. if document is None:
  193. raise ValueError("Document not found.")
  194. doc_metadata = {}
  195. for metadata_value in operation.metadata_list:
  196. doc_metadata[metadata_value.name] = metadata_value.value
  197. if dataset.built_in_field_enabled:
  198. doc_metadata[BuiltInField.document_name.value] = document.name
  199. doc_metadata[BuiltInField.uploader.value] = document.uploader
  200. doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
  201. doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
  202. doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
  203. document.doc_metadata = doc_metadata
  204. db.session.add(document)
  205. db.session.commit()
  206. # deal metadata binding
  207. db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
  208. for metadata_value in operation.metadata_list:
  209. dataset_metadata_binding = DatasetMetadataBinding(
  210. tenant_id=current_user.current_tenant_id,
  211. dataset_id=dataset.id,
  212. document_id=operation.document_id,
  213. metadata_id=metadata_value.id,
  214. created_by=current_user.id,
  215. )
  216. db.session.add(dataset_metadata_binding)
  217. db.session.commit()
  218. except Exception:
  219. logging.exception("Update documents metadata failed")
  220. finally:
  221. redis_client.delete(lock_key)
  222. @staticmethod
  223. def knowledge_base_metadata_lock_check(dataset_id: Optional[str], document_id: Optional[str]):
  224. if dataset_id:
  225. lock_key = f"dataset_metadata_lock_{dataset_id}"
  226. if redis_client.get(lock_key):
  227. raise ValueError("Another knowledge base metadata operation is running, please wait a moment.")
  228. redis_client.set(lock_key, 1, ex=3600)
  229. if document_id:
  230. lock_key = f"document_metadata_lock_{document_id}"
  231. if redis_client.get(lock_key):
  232. raise ValueError("Another document metadata operation is running, please wait a moment.")
  233. redis_client.set(lock_key, 1, ex=3600)
  234. @staticmethod
  235. def get_dataset_metadatas(dataset: Dataset):
  236. return {
  237. "doc_metadata": [
  238. {
  239. "id": item.get("id"),
  240. "name": item.get("name"),
  241. "type": item.get("type"),
  242. "count": db.session.query(DatasetMetadataBinding)
  243. .filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
  244. .count(),
  245. }
  246. for item in dataset.doc_metadata or []
  247. if item.get("id") != "built-in"
  248. ],
  249. "built_in_field_enabled": dataset.built_in_field_enabled,
  250. }