- import datetime
 - import time
 - 
 - import click
 - from sqlalchemy import text
 - from werkzeug.exceptions import NotFound
 - 
 - import app
 - from configs import dify_config
 - from extensions.ext_database import db
 - from models.dataset import Embedding
 - 
 - 
 - @app.celery.task(queue="dataset")
 - def clean_embedding_cache_task():
 -     click.echo(click.style("Start clean embedding cache.", fg="green"))
 -     clean_days = int(dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING)
 -     start_at = time.perf_counter()
 -     thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
 -     while True:
 -         try:
 -             embedding_ids = (
 -                 db.session.query(Embedding.id)
 -                 .filter(Embedding.created_at < thirty_days_ago)
 -                 .order_by(Embedding.created_at.desc())
 -                 .limit(100)
 -                 .all()
 -             )
 -             embedding_ids = [embedding_id[0] for embedding_id in embedding_ids]
 -         except NotFound:
 -             break
 -         if embedding_ids:
 -             for embedding_id in embedding_ids:
 -                 db.session.execute(
 -                     text("DELETE FROM embeddings WHERE id = :embedding_id"), {"embedding_id": embedding_id}
 -                 )
 - 
 -             db.session.commit()
 -         else:
 -             break
 -     end_at = time.perf_counter()
 -     click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green"))
 
 
  |