您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

add_annotation_to_index_task.py 2.1KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import logging
  2. import time
  3. import click
  4. from celery import shared_task
  5. from core.rag.datasource.vdb.vector_factory import Vector
  6. from core.rag.models.document import Document
  7. from extensions.ext_database import db
  8. from models.dataset import Dataset
  9. from services.dataset_service import DatasetCollectionBindingService
  10. logger = logging.getLogger(__name__)
  11. @shared_task(queue="dataset")
  12. def add_annotation_to_index_task(
  13. annotation_id: str, question: str, tenant_id: str, app_id: str, collection_binding_id: str
  14. ):
  15. """
  16. Add annotation to index.
  17. :param annotation_id: annotation id
  18. :param question: question
  19. :param tenant_id: tenant id
  20. :param app_id: app id
  21. :param collection_binding_id: embedding binding id
  22. Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
  23. """
  24. logger.info(click.style(f"Start build index for annotation: {annotation_id}", fg="green"))
  25. start_at = time.perf_counter()
  26. try:
  27. dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
  28. collection_binding_id, "annotation"
  29. )
  30. dataset = Dataset(
  31. id=app_id,
  32. tenant_id=tenant_id,
  33. indexing_technique="high_quality",
  34. embedding_model_provider=dataset_collection_binding.provider_name,
  35. embedding_model=dataset_collection_binding.model_name,
  36. collection_binding_id=dataset_collection_binding.id,
  37. )
  38. document = Document(
  39. page_content=question, metadata={"annotation_id": annotation_id, "app_id": app_id, "doc_id": annotation_id}
  40. )
  41. vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
  42. vector.create([document], duplicate_check=True)
  43. end_at = time.perf_counter()
  44. logger.info(
  45. click.style(
  46. f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}",
  47. fg="green",
  48. )
  49. )
  50. except Exception:
  51. logger.exception("Build index for annotation failed")
  52. finally:
  53. db.session.close()