ソースを参照

update clean embedding cache query logic (#6483)

tags/0.6.15
Jyong 1年前
コミット
1e0e573165
コミッターのメールアドレスに関連付けられたアカウントが存在しません

+ 32
- 0
api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py ファイルの表示

@@ -0,0 +1,32 @@
"""add-embedding-cache-created_at_index

Revision ID: 6e957a32015b
Revises: fecff1c3da27
Create Date: 2024-07-19 17:21:34.414705

"""
from alembic import op

import models as models

# revision identifiers, used by Alembic.
revision = '6e957a32015b'
down_revision = 'fecff1c3da27'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('embeddings', schema=None) as batch_op:
batch_op.create_index('created_at_idx', ['created_at'], unique=False)

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('embeddings', schema=None) as batch_op:
batch_op.drop_index('created_at_idx')

# ### end Alembic commands ###

+ 2
- 1
api/models/dataset.py ファイルの表示

@@ -630,7 +630,8 @@ class Embedding(db.Model):
__tablename__ = 'embeddings'
__table_args__ = (
db.PrimaryKeyConstraint('id', name='embedding_pkey'),
db.UniqueConstraint('model_name', 'hash', 'provider_name', name='embedding_hash_idx')
db.UniqueConstraint('model_name', 'hash', 'provider_name', name='embedding_hash_idx'),
db.Index('created_at_idx', 'created_at')
)

id = db.Column(StringUUID, primary_key=True, server_default=db.text('uuid_generate_v4()'))

+ 1
- 1
api/models/model.py ファイルの表示

@@ -1383,7 +1383,7 @@ class TraceAppConfig(db.Model):
__tablename__ = 'trace_app_config'
__table_args__ = (
db.PrimaryKeyConstraint('id', name='tracing_app_config_pkey'),
db.Index('tracing_app_config_app_id_idx', 'app_id'),
db.Index('trace_app_config_app_id_idx', 'app_id'),
)

id = db.Column(StringUUID, server_default=db.text('uuid_generate_v4()'))

+ 11
- 4
api/schedule/clean_embedding_cache_task.py ファイルの表示

@@ -2,6 +2,7 @@ import datetime
import time

import click
from sqlalchemy import text
from werkzeug.exceptions import NotFound

import app
@@ -18,12 +19,18 @@ def clean_embedding_cache_task():
thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
while True:
try:
embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \
embedding_ids = db.session.query(Embedding.id).filter(Embedding.created_at < thirty_days_ago) \
.order_by(Embedding.created_at.desc()).limit(100).all()
embedding_ids = [embedding_id[0] for embedding_id in embedding_ids]
except NotFound:
break
for embedding in embeddings:
db.session.delete(embedding)
db.session.commit()
if embedding_ids:
db.session.execute(text(
"DELETE FROM embeddings WHERE id in :embedding_ids"
), {'embedding_ids': tuple(embedding_ids)})

db.session.commit()
else:
break
end_at = time.perf_counter()
click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green'))

読み込み中…
キャンセル
保存