Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

update_annotation_to_index_task.py 2.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import logging
  2. import time
  3. import click
  4. from celery import shared_task
  5. from core.rag.datasource.vdb.vector_factory import Vector
  6. from core.rag.models.document import Document
  7. from extensions.ext_database import db
  8. from models.dataset import Dataset
  9. from services.dataset_service import DatasetCollectionBindingService
  10. logger = logging.getLogger(__name__)
  11. @shared_task(queue="dataset")
  12. def update_annotation_to_index_task(
  13. annotation_id: str, question: str, tenant_id: str, app_id: str, collection_binding_id: str
  14. ):
  15. """
  16. Update annotation to index.
  17. :param annotation_id: annotation id
  18. :param question: question
  19. :param tenant_id: tenant id
  20. :param app_id: app id
  21. :param collection_binding_id: embedding binding id
  22. Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
  23. """
  24. logger.info(click.style(f"Start update index for annotation: {annotation_id}", fg="green"))
  25. start_at = time.perf_counter()
  26. try:
  27. dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
  28. collection_binding_id, "annotation"
  29. )
  30. dataset = Dataset(
  31. id=app_id,
  32. tenant_id=tenant_id,
  33. indexing_technique="high_quality",
  34. embedding_model_provider=dataset_collection_binding.provider_name,
  35. embedding_model=dataset_collection_binding.model_name,
  36. collection_binding_id=dataset_collection_binding.id,
  37. )
  38. document = Document(
  39. page_content=question, metadata={"annotation_id": annotation_id, "app_id": app_id, "doc_id": annotation_id}
  40. )
  41. vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
  42. vector.delete_by_metadata_field("annotation_id", annotation_id)
  43. vector.add_texts([document])
  44. end_at = time.perf_counter()
  45. logger.info(
  46. click.style(
  47. f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}",
  48. fg="green",
  49. )
  50. )
  51. except Exception:
  52. logger.exception("Build index for annotation failed")
  53. finally:
  54. db.session.close()