Bläddra i källkod

deal db session in celery worker (#17549)

tags/1.2.0
Jyong 6 månader sedan
förälder
incheckning
c8145ce581
Inget konto är kopplat till bidragsgivarens mejladress

+ 3
- 2
api/tasks/add_document_to_index_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory


dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document_id).first() dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document_id).first()
if not dataset_document: if not dataset_document:
raise NotFound("Document not found")
logging.info(click.style("Document not found: {}".format(dataset_document_id), fg="red"))
db.session.close()
return


if dataset_document.indexing_status != "completed": if dataset_document.indexing_status != "completed":
return return

+ 3
- 0
api/tasks/annotation/add_annotation_to_index_task.py Visa fil



from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_database import db
from models.dataset import Dataset from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


) )
except Exception: except Exception:
logging.exception("Build index for annotation failed") logging.exception("Build index for annotation failed")
finally:
db.session.close()

+ 2
- 0
api/tasks/annotation/batch_import_annotations_task.py Visa fil

indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id)) indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id))
redis_client.setex(indexing_error_msg_key, 600, str(e)) redis_client.setex(indexing_error_msg_key, 600, str(e))
logging.exception("Build index for batch import annotations failed") logging.exception("Build index for batch import annotations failed")
finally:
db.session.close()

+ 3
- 0
api/tasks/annotation/delete_annotation_index_task.py Visa fil

from celery import shared_task # type: ignore from celery import shared_task # type: ignore


from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from extensions.ext_database import db
from models.dataset import Dataset from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


) )
except Exception as e: except Exception as e:
logging.exception("Annotation deleted index failed") logging.exception("Annotation deleted index failed")
finally:
db.session.close()

+ 7
- 3
api/tasks/annotation/disable_annotation_reply_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from extensions.ext_database import db from extensions.ext_database import db
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
annotations_count = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).count() annotations_count = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).count()
if not app: if not app:
raise NotFound("App not found")
logging.info(click.style("App not found: {}".format(app_id), fg="red"))
db.session.close()
return


app_annotation_setting = ( app_annotation_setting = (
db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app_id).first() db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app_id).first()
) )


if not app_annotation_setting: if not app_annotation_setting:
raise NotFound("App annotation setting not found")
logging.info(click.style("App annotation setting not found: {}".format(app_id), fg="red"))
db.session.close()
return


disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id)) disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id))
disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id)) disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id))
redis_client.setex(disable_app_annotation_error_key, 600, str(e)) redis_client.setex(disable_app_annotation_error_key, 600, str(e))
finally: finally:
redis_client.delete(disable_app_annotation_key) redis_client.delete(disable_app_annotation_key)
db.session.close()

+ 4
- 2
api/tasks/annotation/enable_annotation_reply_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.models.document import Document from core.rag.models.document import Document
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()


if not app: if not app:
raise NotFound("App not found")
logging.info(click.style("App not found: {}".format(app_id), fg="red"))
db.session.close()
return


annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).all() annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).all()
enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id)) enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id))
db.session.rollback() db.session.rollback()
finally: finally:
redis_client.delete(enable_app_annotation_key) redis_client.delete(enable_app_annotation_key)
db.session.close()

+ 3
- 0
api/tasks/annotation/update_annotation_to_index_task.py Visa fil



from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_database import db
from models.dataset import Dataset from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


) )
except Exception: except Exception:
logging.exception("Build index for annotation failed") logging.exception("Build index for annotation failed")
finally:
db.session.close()

+ 2
- 0
api/tasks/batch_clean_document_task.py Visa fil

) )
except Exception: except Exception:
logging.exception("Cleaned documents when documents deleted failed") logging.exception("Cleaned documents when documents deleted failed")
finally:
db.session.close()

+ 2
- 0
api/tasks/batch_create_segment_to_index_task.py Visa fil

except Exception: except Exception:
logging.exception("Segments batch created index failed") logging.exception("Segments batch created index failed")
redis_client.setex(indexing_cache_key, 600, "error") redis_client.setex(indexing_cache_key, 600, "error")
finally:
db.session.close()

+ 2
- 0
api/tasks/clean_dataset_task.py Visa fil

) )
except Exception: except Exception:
logging.exception("Cleaned dataset when dataset deleted failed") logging.exception("Cleaned dataset when dataset deleted failed")
finally:
db.session.close()

+ 2
- 0
api/tasks/clean_document_task.py Visa fil

) )
except Exception: except Exception:
logging.exception("Cleaned document when document deleted failed") logging.exception("Cleaned document when document deleted failed")
finally:
db.session.close()

+ 2
- 0
api/tasks/clean_notion_document_task.py Visa fil

) )
except Exception: except Exception:
logging.exception("Cleaned document when import form notion document deleted failed") logging.exception("Cleaned document when import form notion document deleted failed")
finally:
db.session.close()

+ 4
- 2
api/tasks/create_segment_to_index_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.rag.models.document import Document from core.rag.models.document import Document


segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first() segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
if not segment: if not segment:
raise NotFound("Segment not found")
logging.info(click.style("Segment not found: {}".format(segment_id), fg="red"))
db.session.close()
return


if segment.status != "waiting": if segment.status != "waiting":
return return
db.session.commit() db.session.commit()
finally: finally:
redis_client.delete(indexing_cache_key) redis_client.delete(indexing_cache_key)
db.session.close()

+ 2
- 0
api/tasks/deal_dataset_vector_index_task.py Visa fil

) )
except Exception: except Exception:
logging.exception("Deal dataset vector index failed") logging.exception("Deal dataset vector index failed")
finally:
db.session.close()

+ 2
- 0
api/tasks/delete_segment_from_index_task.py Visa fil

logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green")) logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green"))
except Exception: except Exception:
logging.exception("delete segment from index failed") logging.exception("delete segment from index failed")
finally:
db.session.close()

+ 7
- 3
api/tasks/disable_segment_from_index_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_database import db from extensions.ext_database import db


segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first() segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
if not segment: if not segment:
raise NotFound("Segment not found")
logging.info(click.style("Segment not found: {}".format(segment_id), fg="red"))
db.session.close()
return


if segment.status != "completed": if segment.status != "completed":
raise NotFound("Segment is not completed , disable action is not allowed.")
logging.info(click.style("Segment is not completed, disable is not allowed: {}".format(segment_id), fg="red"))
db.session.close()
return


indexing_cache_key = "segment_{}_indexing".format(segment.id) indexing_cache_key = "segment_{}_indexing".format(segment.id)


db.session.commit() db.session.commit()
finally: finally:
redis_client.delete(indexing_cache_key) redis_client.delete(indexing_cache_key)
db.session.close()

+ 5
- 0
api/tasks/disable_segments_from_index_task.py Visa fil

dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan")) logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan"))
db.session.close()
return return


dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first() dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first()


if not dataset_document: if not dataset_document:
logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan"))
db.session.close()
return return
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan"))
db.session.close()
return return
# sync index processor # sync index processor
index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
) )


if not segments: if not segments:
db.session.close()
return return


try: try:
for segment in segments: for segment in segments:
indexing_cache_key = "segment_{}_indexing".format(segment.id) indexing_cache_key = "segment_{}_indexing".format(segment.id)
redis_client.delete(indexing_cache_key) redis_client.delete(indexing_cache_key)
db.session.close()

+ 3
- 2
api/tasks/document_indexing_sync_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.extractor.notion_extractor import NotionExtractor from core.rag.extractor.notion_extractor import NotionExtractor
document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()


if not document: if not document:
raise NotFound("Document not found")
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
db.session.close()
return


data_source_info = document.data_source_info_dict data_source_info = document.data_source_info_dict
if document.data_source_type == "notion_import": if document.data_source_type == "notion_import":

+ 4
- 0
api/tasks/document_indexing_task.py Visa fil

dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow")) logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow"))
db.session.close()
return return
# check document limit # check document limit
features = FeatureService.get_features(dataset.tenant_id) features = FeatureService.get_features(dataset.tenant_id)
document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
db.session.close()
return return


for document_id in document_ids: for document_id in document_ids:
logging.info(click.style(str(ex), fg="yellow")) logging.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
pass pass
finally:
db.session.close()

+ 5
- 2
api/tasks/document_indexing_update_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()


if not document: if not document:
raise NotFound("Document not found")
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
db.session.close()
return


document.indexing_status = "parsing" document.indexing_status = "parsing"
document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
logging.info(click.style(str(ex), fg="yellow")) logging.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
pass pass
finally:
db.session.close()

+ 7
- 1
api/tasks/duplicate_document_indexing_task.py Visa fil



dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if dataset is None: if dataset is None:
raise ValueError("Dataset not found")
logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red"))
db.session.close()
return


# check document limit # check document limit
features = FeatureService.get_features(dataset.tenant_id) features = FeatureService.get_features(dataset.tenant_id)
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
return return
finally:
db.session.close()


for document_id in document_ids: for document_id in document_ids:
logging.info(click.style("Start process document: {}".format(document_id), fg="green")) logging.info(click.style("Start process document: {}".format(document_id), fg="green"))
logging.info(click.style(str(ex), fg="yellow")) logging.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
pass pass
finally:
db.session.close()

+ 7
- 3
api/tasks/enable_segment_to_index_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory


segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first() segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
if not segment: if not segment:
raise NotFound("Segment not found")
logging.info(click.style("Segment not found: {}".format(segment_id), fg="red"))
db.session.close()
return


if segment.status != "completed": if segment.status != "completed":
raise NotFound("Segment is not completed, enable action is not allowed.")
logging.info(click.style("Segment is not completed, enable is not allowed: {}".format(segment_id), fg="red"))
db.session.close()
return


indexing_cache_key = "segment_{}_indexing".format(segment.id) indexing_cache_key = "segment_{}_indexing".format(segment.id)


db.session.commit() db.session.commit()
finally: finally:
redis_client.delete(indexing_cache_key) redis_client.delete(indexing_cache_key)
db.session.close()

+ 5
- 0
api/tasks/enable_segments_to_index_task.py Visa fil



if not dataset_document: if not dataset_document:
logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan")) logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan"))
db.session.close()
return return
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan")) logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan"))
db.session.close()
return return
# sync index processor # sync index processor
index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
.all() .all()
) )
if not segments: if not segments:
logging.info(click.style("Segments not found: {}".format(segment_ids), fg="cyan"))
db.session.close()
return return


try: try:
for segment in segments: for segment in segments:
indexing_cache_key = "segment_{}_indexing".format(segment.id) indexing_cache_key = "segment_{}_indexing".format(segment.id)
redis_client.delete(indexing_cache_key) redis_client.delete(indexing_cache_key)
db.session.close()

+ 5
- 2
api/tasks/recover_document_indexing_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from extensions.ext_database import db from extensions.ext_database import db
document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()


if not document: if not document:
raise NotFound("Document not found")
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
db.session.close()
return


try: try:
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
logging.info(click.style(str(ex), fg="yellow")) logging.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
pass pass
finally:
db.session.close()

+ 6
- 2
api/tasks/remove_document_from_index_task.py Visa fil



import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from werkzeug.exceptions import NotFound


from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_database import db from extensions.ext_database import db


document = db.session.query(Document).filter(Document.id == document_id).first() document = db.session.query(Document).filter(Document.id == document_id).first()
if not document: if not document:
raise NotFound("Document not found")
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
db.session.close()
return


if document.indexing_status != "completed": if document.indexing_status != "completed":
logging.info(click.style("Document is not completed, remove is not allowed: {}".format(document_id), fg="red"))
db.session.close()
return return


indexing_cache_key = "document_{}_indexing".format(document.id) indexing_cache_key = "document_{}_indexing".format(document.id)
db.session.commit() db.session.commit()
finally: finally:
redis_client.delete(indexing_cache_key) redis_client.delete(indexing_cache_key)
db.session.close()

+ 7
- 1
api/tasks/retry_document_indexing_task.py Visa fil



dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
raise ValueError("Dataset not found")
logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red"))
db.session.close()
return


for document_id in document_ids: for document_id in document_ids:
retry_indexing_cache_key = "document_{}_is_retried".format(document_id) retry_indexing_cache_key = "document_{}_is_retried".format(document_id)
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
redis_client.delete(retry_indexing_cache_key) redis_client.delete(retry_indexing_cache_key)
db.session.close()
return return


logging.info(click.style("Start retry document: {}".format(document_id), fg="green")) logging.info(click.style("Start retry document: {}".format(document_id), fg="green"))
) )
if not document: if not document:
logging.info(click.style("Document not found: {}".format(document_id), fg="yellow")) logging.info(click.style("Document not found: {}".format(document_id), fg="yellow"))
db.session.close()
return return
try: try:
# clean old data # clean old data
logging.info(click.style(str(ex), fg="yellow")) logging.info(click.style(str(ex), fg="yellow"))
redis_client.delete(retry_indexing_cache_key) redis_client.delete(retry_indexing_cache_key)
pass pass
finally:
db.session.close()
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style("Retry dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) logging.info(click.style("Retry dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green"))

Laddar…
Avbryt
Spara