Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

enable_annotation_reply_task.py 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import logging
  2. import time
  3. import click
  4. from celery import shared_task
  5. from core.rag.datasource.vdb.vector_factory import Vector
  6. from core.rag.models.document import Document
  7. from extensions.ext_database import db
  8. from extensions.ext_redis import redis_client
  9. from libs.datetime_utils import naive_utc_now
  10. from models.dataset import Dataset
  11. from models.model import App, AppAnnotationSetting, MessageAnnotation
  12. from services.dataset_service import DatasetCollectionBindingService
  13. logger = logging.getLogger(__name__)
  14. @shared_task(queue="dataset")
  15. def enable_annotation_reply_task(
  16. job_id: str,
  17. app_id: str,
  18. user_id: str,
  19. tenant_id: str,
  20. score_threshold: float,
  21. embedding_provider_name: str,
  22. embedding_model_name: str,
  23. ):
  24. """
  25. Async enable annotation reply task
  26. """
  27. logger.info(click.style(f"Start add app annotation to index: {app_id}", fg="green"))
  28. start_at = time.perf_counter()
  29. # get app info
  30. app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
  31. if not app:
  32. logger.info(click.style(f"App not found: {app_id}", fg="red"))
  33. db.session.close()
  34. return
  35. annotations = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).all()
  36. enable_app_annotation_key = f"enable_app_annotation_{str(app_id)}"
  37. enable_app_annotation_job_key = f"enable_app_annotation_job_{str(job_id)}"
  38. try:
  39. documents = []
  40. dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
  41. embedding_provider_name, embedding_model_name, "annotation"
  42. )
  43. annotation_setting = db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first()
  44. if annotation_setting:
  45. if dataset_collection_binding.id != annotation_setting.collection_binding_id:
  46. old_dataset_collection_binding = (
  47. DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
  48. annotation_setting.collection_binding_id, "annotation"
  49. )
  50. )
  51. if old_dataset_collection_binding and annotations:
  52. old_dataset = Dataset(
  53. id=app_id,
  54. tenant_id=tenant_id,
  55. indexing_technique="high_quality",
  56. embedding_model_provider=old_dataset_collection_binding.provider_name,
  57. embedding_model=old_dataset_collection_binding.model_name,
  58. collection_binding_id=old_dataset_collection_binding.id,
  59. )
  60. old_vector = Vector(old_dataset, attributes=["doc_id", "annotation_id", "app_id"])
  61. try:
  62. old_vector.delete()
  63. except Exception as e:
  64. logger.info(click.style(f"Delete annotation index error: {str(e)}", fg="red"))
  65. annotation_setting.score_threshold = score_threshold
  66. annotation_setting.collection_binding_id = dataset_collection_binding.id
  67. annotation_setting.updated_user_id = user_id
  68. annotation_setting.updated_at = naive_utc_now()
  69. db.session.add(annotation_setting)
  70. else:
  71. new_app_annotation_setting = AppAnnotationSetting(
  72. app_id=app_id,
  73. score_threshold=score_threshold,
  74. collection_binding_id=dataset_collection_binding.id,
  75. created_user_id=user_id,
  76. updated_user_id=user_id,
  77. )
  78. db.session.add(new_app_annotation_setting)
  79. dataset = Dataset(
  80. id=app_id,
  81. tenant_id=tenant_id,
  82. indexing_technique="high_quality",
  83. embedding_model_provider=embedding_provider_name,
  84. embedding_model=embedding_model_name,
  85. collection_binding_id=dataset_collection_binding.id,
  86. )
  87. if annotations:
  88. for annotation in annotations:
  89. document = Document(
  90. page_content=annotation.question,
  91. metadata={"annotation_id": annotation.id, "app_id": app_id, "doc_id": annotation.id},
  92. )
  93. documents.append(document)
  94. vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
  95. try:
  96. vector.delete_by_metadata_field("app_id", app_id)
  97. except Exception as e:
  98. logger.info(click.style(f"Delete annotation index error: {str(e)}", fg="red"))
  99. vector.create(documents)
  100. db.session.commit()
  101. redis_client.setex(enable_app_annotation_job_key, 600, "completed")
  102. end_at = time.perf_counter()
  103. logger.info(click.style(f"App annotations added to index: {app_id} latency: {end_at - start_at}", fg="green"))
  104. except Exception as e:
  105. logger.exception("Annotation batch created index failed")
  106. redis_client.setex(enable_app_annotation_job_key, 600, "error")
  107. enable_app_annotation_error_key = f"enable_app_annotation_error_{str(job_id)}"
  108. redis_client.setex(enable_app_annotation_error_key, 600, str(e))
  109. db.session.rollback()
  110. finally:
  111. redis_client.delete(enable_app_annotation_key)
  112. db.session.close()