You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

enable_annotation_reply_task.py 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import datetime
  2. import logging
  3. import time
  4. import click
  5. from celery import shared_task # type: ignore
  6. from core.rag.datasource.vdb.vector_factory import Vector
  7. from core.rag.models.document import Document
  8. from extensions.ext_database import db
  9. from extensions.ext_redis import redis_client
  10. from models.dataset import Dataset
  11. from models.model import App, AppAnnotationSetting, MessageAnnotation
  12. from services.dataset_service import DatasetCollectionBindingService
  13. @shared_task(queue="dataset")
  14. def enable_annotation_reply_task(
  15. job_id: str,
  16. app_id: str,
  17. user_id: str,
  18. tenant_id: str,
  19. score_threshold: float,
  20. embedding_provider_name: str,
  21. embedding_model_name: str,
  22. ):
  23. """
  24. Async enable annotation reply task
  25. """
  26. logging.info(click.style("Start add app annotation to index: {}".format(app_id), fg="green"))
  27. start_at = time.perf_counter()
  28. # get app info
  29. app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
  30. if not app:
  31. logging.info(click.style("App not found: {}".format(app_id), fg="red"))
  32. db.session.close()
  33. return
  34. annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).all()
  35. enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id))
  36. enable_app_annotation_job_key = "enable_app_annotation_job_{}".format(str(job_id))
  37. try:
  38. documents = []
  39. dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
  40. embedding_provider_name, embedding_model_name, "annotation"
  41. )
  42. annotation_setting = (
  43. db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app_id).first()
  44. )
  45. if annotation_setting:
  46. if dataset_collection_binding.id != annotation_setting.collection_binding_id:
  47. old_dataset_collection_binding = (
  48. DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
  49. annotation_setting.collection_binding_id, "annotation"
  50. )
  51. )
  52. if old_dataset_collection_binding and annotations:
  53. old_dataset = Dataset(
  54. id=app_id,
  55. tenant_id=tenant_id,
  56. indexing_technique="high_quality",
  57. embedding_model_provider=old_dataset_collection_binding.provider_name,
  58. embedding_model=old_dataset_collection_binding.model_name,
  59. collection_binding_id=old_dataset_collection_binding.id,
  60. )
  61. old_vector = Vector(old_dataset, attributes=["doc_id", "annotation_id", "app_id"])
  62. try:
  63. old_vector.delete()
  64. except Exception as e:
  65. logging.info(click.style("Delete annotation index error: {}".format(str(e)), fg="red"))
  66. annotation_setting.score_threshold = score_threshold
  67. annotation_setting.collection_binding_id = dataset_collection_binding.id
  68. annotation_setting.updated_user_id = user_id
  69. annotation_setting.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
  70. db.session.add(annotation_setting)
  71. else:
  72. new_app_annotation_setting = AppAnnotationSetting(
  73. app_id=app_id,
  74. score_threshold=score_threshold,
  75. collection_binding_id=dataset_collection_binding.id,
  76. created_user_id=user_id,
  77. updated_user_id=user_id,
  78. )
  79. db.session.add(new_app_annotation_setting)
  80. dataset = Dataset(
  81. id=app_id,
  82. tenant_id=tenant_id,
  83. indexing_technique="high_quality",
  84. embedding_model_provider=embedding_provider_name,
  85. embedding_model=embedding_model_name,
  86. collection_binding_id=dataset_collection_binding.id,
  87. )
  88. if annotations:
  89. for annotation in annotations:
  90. document = Document(
  91. page_content=annotation.question,
  92. metadata={"annotation_id": annotation.id, "app_id": app_id, "doc_id": annotation.id},
  93. )
  94. documents.append(document)
  95. vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
  96. try:
  97. vector.delete_by_metadata_field("app_id", app_id)
  98. except Exception as e:
  99. logging.info(click.style("Delete annotation index error: {}".format(str(e)), fg="red"))
  100. vector.create(documents)
  101. db.session.commit()
  102. redis_client.setex(enable_app_annotation_job_key, 600, "completed")
  103. end_at = time.perf_counter()
  104. logging.info(
  105. click.style("App annotations added to index: {} latency: {}".format(app_id, end_at - start_at), fg="green")
  106. )
  107. except Exception as e:
  108. logging.exception("Annotation batch created index failed")
  109. redis_client.setex(enable_app_annotation_job_key, 600, "error")
  110. enable_app_annotation_error_key = "enable_app_annotation_error_{}".format(str(job_id))
  111. redis_client.setex(enable_app_annotation_error_key, 600, str(e))
  112. db.session.rollback()
  113. finally:
  114. redis_client.delete(enable_app_annotation_key)
  115. db.session.close()