|
|
|
@@ -0,0 +1,305 @@ |
|
|
|
import datetime |
|
|
|
import json |
|
|
|
import logging |
|
|
|
import time |
|
|
|
from concurrent.futures import ThreadPoolExecutor |
|
|
|
|
|
|
|
import click |
|
|
|
from flask import Flask, current_app |
|
|
|
from sqlalchemy.orm import Session |
|
|
|
|
|
|
|
from configs import dify_config |
|
|
|
from core.model_runtime.utils.encoders import jsonable_encoder |
|
|
|
from extensions.ext_database import db |
|
|
|
from extensions.ext_storage import storage |
|
|
|
from models.account import Tenant |
|
|
|
from models.model import App, Conversation, Message |
|
|
|
from models.workflow import WorkflowNodeExecution, WorkflowRun |
|
|
|
from services.billing_service import BillingService |
|
|
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
|
|
class ClearFreePlanTenantExpiredLogs: |
|
|
|
@classmethod |
|
|
|
def process_tenant(cls, flask_app: Flask, tenant_id: str, days: int, batch: int): |
|
|
|
with flask_app.app_context(): |
|
|
|
apps = db.session.query(App).filter(App.tenant_id == tenant_id).all() |
|
|
|
app_ids = [app.id for app in apps] |
|
|
|
while True: |
|
|
|
with Session(db.engine).no_autoflush as session: |
|
|
|
messages = ( |
|
|
|
session.query(Message) |
|
|
|
.filter( |
|
|
|
Message.app_id.in_(app_ids), |
|
|
|
Message.created_at < datetime.datetime.now() - datetime.timedelta(days=days), |
|
|
|
) |
|
|
|
.limit(batch) |
|
|
|
.all() |
|
|
|
) |
|
|
|
if len(messages) == 0: |
|
|
|
break |
|
|
|
|
|
|
|
storage.save( |
|
|
|
f"free_plan_tenant_expired_logs/" |
|
|
|
f"{tenant_id}/messages/{datetime.datetime.now().strftime('%Y-%m-%d')}" |
|
|
|
f"-{time.time()}.json", |
|
|
|
json.dumps( |
|
|
|
jsonable_encoder( |
|
|
|
[message.to_dict() for message in messages], |
|
|
|
), |
|
|
|
).encode("utf-8"), |
|
|
|
) |
|
|
|
|
|
|
|
message_ids = [message.id for message in messages] |
|
|
|
|
|
|
|
# delete messages |
|
|
|
session.query(Message).filter( |
|
|
|
Message.id.in_(message_ids), |
|
|
|
).delete(synchronize_session=False) |
|
|
|
|
|
|
|
session.commit() |
|
|
|
|
|
|
|
click.echo( |
|
|
|
click.style( |
|
|
|
f"[{datetime.datetime.now()}] Processed {len(message_ids)} messages for tenant {tenant_id} " |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
while True: |
|
|
|
with Session(db.engine).no_autoflush as session: |
|
|
|
conversations = ( |
|
|
|
session.query(Conversation) |
|
|
|
.filter( |
|
|
|
Conversation.app_id.in_(app_ids), |
|
|
|
Conversation.updated_at < datetime.datetime.now() - datetime.timedelta(days=days), |
|
|
|
) |
|
|
|
.limit(batch) |
|
|
|
.all() |
|
|
|
) |
|
|
|
|
|
|
|
if len(conversations) == 0: |
|
|
|
break |
|
|
|
|
|
|
|
storage.save( |
|
|
|
f"free_plan_tenant_expired_logs/" |
|
|
|
f"{tenant_id}/conversations/{datetime.datetime.now().strftime('%Y-%m-%d')}" |
|
|
|
f"-{time.time()}.json", |
|
|
|
json.dumps( |
|
|
|
jsonable_encoder( |
|
|
|
[conversation.to_dict() for conversation in conversations], |
|
|
|
), |
|
|
|
).encode("utf-8"), |
|
|
|
) |
|
|
|
|
|
|
|
conversation_ids = [conversation.id for conversation in conversations] |
|
|
|
session.query(Conversation).filter( |
|
|
|
Conversation.id.in_(conversation_ids), |
|
|
|
).delete(synchronize_session=False) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
click.echo( |
|
|
|
click.style( |
|
|
|
f"[{datetime.datetime.now()}] Processed {len(conversation_ids)}" |
|
|
|
f" conversations for tenant {tenant_id}" |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
while True: |
|
|
|
with Session(db.engine).no_autoflush as session: |
|
|
|
workflow_node_executions = ( |
|
|
|
session.query(WorkflowNodeExecution) |
|
|
|
.filter( |
|
|
|
WorkflowNodeExecution.tenant_id == tenant_id, |
|
|
|
WorkflowNodeExecution.created_at < datetime.datetime.now() - datetime.timedelta(days=days), |
|
|
|
) |
|
|
|
.limit(batch) |
|
|
|
.all() |
|
|
|
) |
|
|
|
|
|
|
|
if len(workflow_node_executions) == 0: |
|
|
|
break |
|
|
|
|
|
|
|
# save workflow node executions |
|
|
|
storage.save( |
|
|
|
f"free_plan_tenant_expired_logs/" |
|
|
|
f"{tenant_id}/workflow_node_executions/{datetime.datetime.now().strftime('%Y-%m-%d')}" |
|
|
|
f"-{time.time()}.json", |
|
|
|
json.dumps( |
|
|
|
jsonable_encoder(workflow_node_executions), |
|
|
|
).encode("utf-8"), |
|
|
|
) |
|
|
|
|
|
|
|
workflow_node_execution_ids = [ |
|
|
|
workflow_node_execution.id for workflow_node_execution in workflow_node_executions |
|
|
|
] |
|
|
|
|
|
|
|
# delete workflow node executions |
|
|
|
session.query(WorkflowNodeExecution).filter( |
|
|
|
WorkflowNodeExecution.id.in_(workflow_node_execution_ids), |
|
|
|
).delete(synchronize_session=False) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
click.echo( |
|
|
|
click.style( |
|
|
|
f"[{datetime.datetime.now()}] Processed {len(workflow_node_execution_ids)}" |
|
|
|
f" workflow node executions for tenant {tenant_id}" |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
while True: |
|
|
|
with Session(db.engine).no_autoflush as session: |
|
|
|
workflow_runs = session.query(WorkflowRun).filter(WorkflowRun.tenant_id == tenant_id).all() |
|
|
|
|
|
|
|
if len(workflow_runs) == 0: |
|
|
|
break |
|
|
|
|
|
|
|
# save workflow runs |
|
|
|
|
|
|
|
storage.save( |
|
|
|
f"free_plan_tenant_expired_logs/" |
|
|
|
f"{tenant_id}/workflow_runs/{datetime.datetime.now().strftime('%Y-%m-%d')}" |
|
|
|
f"-{time.time()}.json", |
|
|
|
json.dumps( |
|
|
|
jsonable_encoder( |
|
|
|
[workflow_run.to_dict() for workflow_run in workflow_runs], |
|
|
|
), |
|
|
|
).encode("utf-8"), |
|
|
|
) |
|
|
|
|
|
|
|
workflow_run_ids = [workflow_run.id for workflow_run in workflow_runs] |
|
|
|
|
|
|
|
# delete workflow runs |
|
|
|
session.query(WorkflowRun).filter( |
|
|
|
WorkflowRun.id.in_(workflow_run_ids), |
|
|
|
).delete(synchronize_session=False) |
|
|
|
session.commit() |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def process(cls, days: int, batch: int, tenant_ids: list[str]): |
|
|
|
""" |
|
|
|
Clear free plan tenant expired logs. |
|
|
|
""" |
|
|
|
|
|
|
|
click.echo(click.style("Clearing free plan tenant expired logs", fg="white")) |
|
|
|
ended_at = datetime.datetime.now() |
|
|
|
started_at = datetime.datetime(2023, 4, 3, 8, 59, 24) |
|
|
|
current_time = started_at |
|
|
|
|
|
|
|
with Session(db.engine) as session: |
|
|
|
total_tenant_count = session.query(Tenant.id).count() |
|
|
|
|
|
|
|
click.echo(click.style(f"Total tenant count: {total_tenant_count}", fg="white")) |
|
|
|
|
|
|
|
handled_tenant_count = 0 |
|
|
|
|
|
|
|
thread_pool = ThreadPoolExecutor(max_workers=10) |
|
|
|
|
|
|
|
def process_tenant(flask_app: Flask, tenant_id: str) -> None: |
|
|
|
try: |
|
|
|
if ( |
|
|
|
not dify_config.BILLING_ENABLED |
|
|
|
or BillingService.get_info(tenant_id)["subscription"]["plan"] == "sandbox" |
|
|
|
): |
|
|
|
# only process sandbox tenant |
|
|
|
cls.process_tenant(flask_app, tenant_id, days, batch) |
|
|
|
except Exception: |
|
|
|
logger.exception(f"Failed to process tenant {tenant_id}") |
|
|
|
finally: |
|
|
|
nonlocal handled_tenant_count |
|
|
|
handled_tenant_count += 1 |
|
|
|
if handled_tenant_count % 100 == 0: |
|
|
|
click.echo( |
|
|
|
click.style( |
|
|
|
f"[{datetime.datetime.now()}] " |
|
|
|
f"Processed {handled_tenant_count} tenants " |
|
|
|
f"({(handled_tenant_count / total_tenant_count) * 100:.1f}%), " |
|
|
|
f"{handled_tenant_count}/{total_tenant_count}", |
|
|
|
fg="green", |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
futures = [] |
|
|
|
|
|
|
|
if tenant_ids: |
|
|
|
for tenant_id in tenant_ids: |
|
|
|
futures.append( |
|
|
|
thread_pool.submit( |
|
|
|
process_tenant, |
|
|
|
current_app._get_current_object(), # type: ignore[attr-defined] |
|
|
|
tenant_id, |
|
|
|
) |
|
|
|
) |
|
|
|
else: |
|
|
|
while current_time < ended_at: |
|
|
|
click.echo( |
|
|
|
click.style(f"Current time: {current_time}, Started at: {datetime.datetime.now()}", fg="white") |
|
|
|
) |
|
|
|
# Initial interval of 1 day, will be dynamically adjusted based on tenant count |
|
|
|
interval = datetime.timedelta(days=1) |
|
|
|
# Process tenants in this batch |
|
|
|
with Session(db.engine) as session: |
|
|
|
# Calculate tenant count in next batch with current interval |
|
|
|
# Try different intervals until we find one with a reasonable tenant count |
|
|
|
test_intervals = [ |
|
|
|
datetime.timedelta(days=1), |
|
|
|
datetime.timedelta(hours=12), |
|
|
|
datetime.timedelta(hours=6), |
|
|
|
datetime.timedelta(hours=3), |
|
|
|
datetime.timedelta(hours=1), |
|
|
|
] |
|
|
|
|
|
|
|
for test_interval in test_intervals: |
|
|
|
tenant_count = ( |
|
|
|
session.query(Tenant.id) |
|
|
|
.filter(Tenant.created_at.between(current_time, current_time + test_interval)) |
|
|
|
.count() |
|
|
|
) |
|
|
|
if tenant_count <= 100: |
|
|
|
interval = test_interval |
|
|
|
break |
|
|
|
else: |
|
|
|
# If all intervals have too many tenants, use minimum interval |
|
|
|
interval = datetime.timedelta(hours=1) |
|
|
|
|
|
|
|
# Adjust interval to target ~100 tenants per batch |
|
|
|
if tenant_count > 0: |
|
|
|
# Scale interval based on ratio to target count |
|
|
|
interval = min( |
|
|
|
datetime.timedelta(days=1), # Max 1 day |
|
|
|
max( |
|
|
|
datetime.timedelta(hours=1), # Min 1 hour |
|
|
|
interval * (100 / tenant_count), # Scale to target 100 |
|
|
|
), |
|
|
|
) |
|
|
|
|
|
|
|
batch_end = min(current_time + interval, ended_at) |
|
|
|
|
|
|
|
rs = ( |
|
|
|
session.query(Tenant.id) |
|
|
|
.filter(Tenant.created_at.between(current_time, batch_end)) |
|
|
|
.order_by(Tenant.created_at) |
|
|
|
) |
|
|
|
|
|
|
|
tenants = [] |
|
|
|
for row in rs: |
|
|
|
tenant_id = str(row.id) |
|
|
|
try: |
|
|
|
tenants.append(tenant_id) |
|
|
|
except Exception: |
|
|
|
logger.exception(f"Failed to process tenant {tenant_id}") |
|
|
|
continue |
|
|
|
|
|
|
|
futures.append( |
|
|
|
thread_pool.submit( |
|
|
|
process_tenant, |
|
|
|
current_app._get_current_object(), # type: ignore[attr-defined] |
|
|
|
tenant_id, |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
current_time = batch_end |
|
|
|
|
|
|
|
# wait for all threads to finish |
|
|
|
for future in futures: |
|
|
|
future.result() |