You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

remove_app_and_related_data_task.py 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. import logging
  2. import time
  3. from collections.abc import Callable
  4. import click
  5. import sqlalchemy as sa
  6. from celery import shared_task
  7. from sqlalchemy import delete
  8. from sqlalchemy.exc import SQLAlchemyError
  9. from sqlalchemy.orm import sessionmaker
  10. from extensions.ext_database import db
  11. from models import (
  12. ApiToken,
  13. AppAnnotationHitHistory,
  14. AppAnnotationSetting,
  15. AppDatasetJoin,
  16. AppMCPServer,
  17. AppModelConfig,
  18. Conversation,
  19. EndUser,
  20. InstalledApp,
  21. Message,
  22. MessageAgentThought,
  23. MessageAnnotation,
  24. MessageChain,
  25. MessageFeedback,
  26. MessageFile,
  27. RecommendedApp,
  28. Site,
  29. TagBinding,
  30. TraceAppConfig,
  31. )
  32. from models.tools import WorkflowToolProvider
  33. from models.web import PinnedConversation, SavedMessage
  34. from models.workflow import (
  35. ConversationVariable,
  36. Workflow,
  37. WorkflowAppLog,
  38. )
  39. from repositories.factory import DifyAPIRepositoryFactory
  40. logger = logging.getLogger(__name__)
  41. @shared_task(queue="app_deletion", bind=True, max_retries=3)
  42. def remove_app_and_related_data_task(self, tenant_id: str, app_id: str):
  43. logger.info(click.style(f"Start deleting app and related data: {tenant_id}:{app_id}", fg="green"))
  44. start_at = time.perf_counter()
  45. try:
  46. # Delete related data
  47. _delete_app_model_configs(tenant_id, app_id)
  48. _delete_app_site(tenant_id, app_id)
  49. _delete_app_mcp_servers(tenant_id, app_id)
  50. _delete_app_api_tokens(tenant_id, app_id)
  51. _delete_installed_apps(tenant_id, app_id)
  52. _delete_recommended_apps(tenant_id, app_id)
  53. _delete_app_annotation_data(tenant_id, app_id)
  54. _delete_app_dataset_joins(tenant_id, app_id)
  55. _delete_app_workflows(tenant_id, app_id)
  56. _delete_app_workflow_runs(tenant_id, app_id)
  57. _delete_app_workflow_node_executions(tenant_id, app_id)
  58. _delete_app_workflow_app_logs(tenant_id, app_id)
  59. _delete_app_conversations(tenant_id, app_id)
  60. _delete_app_messages(tenant_id, app_id)
  61. _delete_workflow_tool_providers(tenant_id, app_id)
  62. _delete_app_tag_bindings(tenant_id, app_id)
  63. _delete_end_users(tenant_id, app_id)
  64. _delete_trace_app_configs(tenant_id, app_id)
  65. _delete_conversation_variables(app_id=app_id)
  66. _delete_draft_variables(app_id)
  67. end_at = time.perf_counter()
  68. logger.info(click.style(f"App and related data deleted: {app_id} latency: {end_at - start_at}", fg="green"))
  69. except SQLAlchemyError as e:
  70. logger.exception(click.style(f"Database error occurred while deleting app {app_id} and related data", fg="red"))
  71. raise self.retry(exc=e, countdown=60) # Retry after 60 seconds
  72. except Exception as e:
  73. logger.exception(click.style(f"Error occurred while deleting app {app_id} and related data", fg="red"))
  74. raise self.retry(exc=e, countdown=60) # Retry after 60 seconds
  75. def _delete_app_model_configs(tenant_id: str, app_id: str):
  76. def del_model_config(model_config_id: str):
  77. db.session.query(AppModelConfig).where(AppModelConfig.id == model_config_id).delete(synchronize_session=False)
  78. _delete_records(
  79. """select id from app_model_configs where app_id=:app_id limit 1000""",
  80. {"app_id": app_id},
  81. del_model_config,
  82. "app model config",
  83. )
  84. def _delete_app_site(tenant_id: str, app_id: str):
  85. def del_site(site_id: str):
  86. db.session.query(Site).where(Site.id == site_id).delete(synchronize_session=False)
  87. _delete_records(
  88. """select id from sites where app_id=:app_id limit 1000""",
  89. {"app_id": app_id},
  90. del_site,
  91. "site",
  92. )
  93. def _delete_app_mcp_servers(tenant_id: str, app_id: str):
  94. def del_mcp_server(mcp_server_id: str):
  95. db.session.query(AppMCPServer).where(AppMCPServer.id == mcp_server_id).delete(synchronize_session=False)
  96. _delete_records(
  97. """select id from app_mcp_servers where app_id=:app_id limit 1000""",
  98. {"app_id": app_id},
  99. del_mcp_server,
  100. "app mcp server",
  101. )
  102. def _delete_app_api_tokens(tenant_id: str, app_id: str):
  103. def del_api_token(api_token_id: str):
  104. db.session.query(ApiToken).where(ApiToken.id == api_token_id).delete(synchronize_session=False)
  105. _delete_records(
  106. """select id from api_tokens where app_id=:app_id limit 1000""",
  107. {"app_id": app_id},
  108. del_api_token,
  109. "api token",
  110. )
  111. def _delete_installed_apps(tenant_id: str, app_id: str):
  112. def del_installed_app(installed_app_id: str):
  113. db.session.query(InstalledApp).where(InstalledApp.id == installed_app_id).delete(synchronize_session=False)
  114. _delete_records(
  115. """select id from installed_apps where tenant_id=:tenant_id and app_id=:app_id limit 1000""",
  116. {"tenant_id": tenant_id, "app_id": app_id},
  117. del_installed_app,
  118. "installed app",
  119. )
  120. def _delete_recommended_apps(tenant_id: str, app_id: str):
  121. def del_recommended_app(recommended_app_id: str):
  122. db.session.query(RecommendedApp).where(RecommendedApp.id == recommended_app_id).delete(
  123. synchronize_session=False
  124. )
  125. _delete_records(
  126. """select id from recommended_apps where app_id=:app_id limit 1000""",
  127. {"app_id": app_id},
  128. del_recommended_app,
  129. "recommended app",
  130. )
  131. def _delete_app_annotation_data(tenant_id: str, app_id: str):
  132. def del_annotation_hit_history(annotation_hit_history_id: str):
  133. db.session.query(AppAnnotationHitHistory).where(AppAnnotationHitHistory.id == annotation_hit_history_id).delete(
  134. synchronize_session=False
  135. )
  136. _delete_records(
  137. """select id from app_annotation_hit_histories where app_id=:app_id limit 1000""",
  138. {"app_id": app_id},
  139. del_annotation_hit_history,
  140. "annotation hit history",
  141. )
  142. def del_annotation_setting(annotation_setting_id: str):
  143. db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.id == annotation_setting_id).delete(
  144. synchronize_session=False
  145. )
  146. _delete_records(
  147. """select id from app_annotation_settings where app_id=:app_id limit 1000""",
  148. {"app_id": app_id},
  149. del_annotation_setting,
  150. "annotation setting",
  151. )
  152. def _delete_app_dataset_joins(tenant_id: str, app_id: str):
  153. def del_dataset_join(dataset_join_id: str):
  154. db.session.query(AppDatasetJoin).where(AppDatasetJoin.id == dataset_join_id).delete(synchronize_session=False)
  155. _delete_records(
  156. """select id from app_dataset_joins where app_id=:app_id limit 1000""",
  157. {"app_id": app_id},
  158. del_dataset_join,
  159. "dataset join",
  160. )
  161. def _delete_app_workflows(tenant_id: str, app_id: str):
  162. def del_workflow(workflow_id: str):
  163. db.session.query(Workflow).where(Workflow.id == workflow_id).delete(synchronize_session=False)
  164. _delete_records(
  165. """select id from workflows where tenant_id=:tenant_id and app_id=:app_id limit 1000""",
  166. {"tenant_id": tenant_id, "app_id": app_id},
  167. del_workflow,
  168. "workflow",
  169. )
  170. def _delete_app_workflow_runs(tenant_id: str, app_id: str):
  171. """Delete all workflow runs for an app using the service repository."""
  172. session_maker = sessionmaker(bind=db.engine)
  173. workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
  174. deleted_count = workflow_run_repo.delete_runs_by_app(
  175. tenant_id=tenant_id,
  176. app_id=app_id,
  177. batch_size=1000,
  178. )
  179. logger.info("Deleted %s workflow runs for app %s", deleted_count, app_id)
  180. def _delete_app_workflow_node_executions(tenant_id: str, app_id: str):
  181. """Delete all workflow node executions for an app using the service repository."""
  182. session_maker = sessionmaker(bind=db.engine)
  183. node_execution_repo = DifyAPIRepositoryFactory.create_api_workflow_node_execution_repository(session_maker)
  184. deleted_count = node_execution_repo.delete_executions_by_app(
  185. tenant_id=tenant_id,
  186. app_id=app_id,
  187. batch_size=1000,
  188. )
  189. logger.info("Deleted %s workflow node executions for app %s", deleted_count, app_id)
  190. def _delete_app_workflow_app_logs(tenant_id: str, app_id: str):
  191. def del_workflow_app_log(workflow_app_log_id: str):
  192. db.session.query(WorkflowAppLog).where(WorkflowAppLog.id == workflow_app_log_id).delete(
  193. synchronize_session=False
  194. )
  195. _delete_records(
  196. """select id from workflow_app_logs where tenant_id=:tenant_id and app_id=:app_id limit 1000""",
  197. {"tenant_id": tenant_id, "app_id": app_id},
  198. del_workflow_app_log,
  199. "workflow app log",
  200. )
  201. def _delete_app_conversations(tenant_id: str, app_id: str):
  202. def del_conversation(conversation_id: str):
  203. db.session.query(PinnedConversation).where(PinnedConversation.conversation_id == conversation_id).delete(
  204. synchronize_session=False
  205. )
  206. db.session.query(Conversation).where(Conversation.id == conversation_id).delete(synchronize_session=False)
  207. _delete_records(
  208. """select id from conversations where app_id=:app_id limit 1000""",
  209. {"app_id": app_id},
  210. del_conversation,
  211. "conversation",
  212. )
  213. def _delete_conversation_variables(*, app_id: str):
  214. stmt = delete(ConversationVariable).where(ConversationVariable.app_id == app_id)
  215. with db.engine.connect() as conn:
  216. conn.execute(stmt)
  217. conn.commit()
  218. logger.info(click.style(f"Deleted conversation variables for app {app_id}", fg="green"))
  219. def _delete_app_messages(tenant_id: str, app_id: str):
  220. def del_message(message_id: str):
  221. db.session.query(MessageFeedback).where(MessageFeedback.message_id == message_id).delete(
  222. synchronize_session=False
  223. )
  224. db.session.query(MessageAnnotation).where(MessageAnnotation.message_id == message_id).delete(
  225. synchronize_session=False
  226. )
  227. db.session.query(MessageChain).where(MessageChain.message_id == message_id).delete(synchronize_session=False)
  228. db.session.query(MessageAgentThought).where(MessageAgentThought.message_id == message_id).delete(
  229. synchronize_session=False
  230. )
  231. db.session.query(MessageFile).where(MessageFile.message_id == message_id).delete(synchronize_session=False)
  232. db.session.query(SavedMessage).where(SavedMessage.message_id == message_id).delete(synchronize_session=False)
  233. db.session.query(Message).where(Message.id == message_id).delete()
  234. _delete_records(
  235. """select id from messages where app_id=:app_id limit 1000""",
  236. {"app_id": app_id},
  237. del_message,
  238. "message",
  239. )
  240. def _delete_workflow_tool_providers(tenant_id: str, app_id: str):
  241. def del_tool_provider(tool_provider_id: str):
  242. db.session.query(WorkflowToolProvider).where(WorkflowToolProvider.id == tool_provider_id).delete(
  243. synchronize_session=False
  244. )
  245. _delete_records(
  246. """select id from tool_workflow_providers where tenant_id=:tenant_id and app_id=:app_id limit 1000""",
  247. {"tenant_id": tenant_id, "app_id": app_id},
  248. del_tool_provider,
  249. "tool workflow provider",
  250. )
  251. def _delete_app_tag_bindings(tenant_id: str, app_id: str):
  252. def del_tag_binding(tag_binding_id: str):
  253. db.session.query(TagBinding).where(TagBinding.id == tag_binding_id).delete(synchronize_session=False)
  254. _delete_records(
  255. """select id from tag_bindings where tenant_id=:tenant_id and target_id=:app_id limit 1000""",
  256. {"tenant_id": tenant_id, "app_id": app_id},
  257. del_tag_binding,
  258. "tag binding",
  259. )
  260. def _delete_end_users(tenant_id: str, app_id: str):
  261. def del_end_user(end_user_id: str):
  262. db.session.query(EndUser).where(EndUser.id == end_user_id).delete(synchronize_session=False)
  263. _delete_records(
  264. """select id from end_users where tenant_id=:tenant_id and app_id=:app_id limit 1000""",
  265. {"tenant_id": tenant_id, "app_id": app_id},
  266. del_end_user,
  267. "end user",
  268. )
  269. def _delete_trace_app_configs(tenant_id: str, app_id: str):
  270. def del_trace_app_config(trace_app_config_id: str):
  271. db.session.query(TraceAppConfig).where(TraceAppConfig.id == trace_app_config_id).delete(
  272. synchronize_session=False
  273. )
  274. _delete_records(
  275. """select id from trace_app_config where app_id=:app_id limit 1000""",
  276. {"app_id": app_id},
  277. del_trace_app_config,
  278. "trace app config",
  279. )
  280. def _delete_draft_variables(app_id: str):
  281. """Delete all workflow draft variables for an app in batches."""
  282. return delete_draft_variables_batch(app_id, batch_size=1000)
  283. def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int:
  284. """
  285. Delete draft variables for an app in batches.
  286. This function now handles cleanup of associated Offload data including:
  287. - WorkflowDraftVariableFile records
  288. - UploadFile records
  289. - Object storage files
  290. Args:
  291. app_id: The ID of the app whose draft variables should be deleted
  292. batch_size: Number of records to delete per batch
  293. Returns:
  294. Total number of records deleted
  295. """
  296. if batch_size <= 0:
  297. raise ValueError("batch_size must be positive")
  298. total_deleted = 0
  299. total_files_deleted = 0
  300. while True:
  301. with db.engine.begin() as conn:
  302. # Get a batch of draft variable IDs along with their file_ids
  303. query_sql = """
  304. SELECT id, file_id FROM workflow_draft_variables
  305. WHERE app_id = :app_id
  306. LIMIT :batch_size
  307. """
  308. result = conn.execute(sa.text(query_sql), {"app_id": app_id, "batch_size": batch_size})
  309. rows = list(result)
  310. if not rows:
  311. break
  312. draft_var_ids = [row[0] for row in rows]
  313. file_ids = [row[1] for row in rows if row[1] is not None]
  314. # Clean up associated Offload data first
  315. if file_ids:
  316. files_deleted = _delete_draft_variable_offload_data(conn, file_ids)
  317. total_files_deleted += files_deleted
  318. # Delete the draft variables
  319. delete_sql = """
  320. DELETE FROM workflow_draft_variables
  321. WHERE id IN :ids
  322. """
  323. deleted_result = conn.execute(sa.text(delete_sql), {"ids": tuple(draft_var_ids)})
  324. batch_deleted = deleted_result.rowcount
  325. total_deleted += batch_deleted
  326. logger.info(click.style(f"Deleted {batch_deleted} draft variables (batch) for app {app_id}", fg="green"))
  327. logger.info(
  328. click.style(
  329. f"Deleted {total_deleted} total draft variables for app {app_id}. "
  330. f"Cleaned up {total_files_deleted} total associated files.",
  331. fg="green",
  332. )
  333. )
  334. return total_deleted
  335. def _delete_draft_variable_offload_data(conn, file_ids: list[str]) -> int:
  336. """
  337. Delete Offload data associated with WorkflowDraftVariable file_ids.
  338. This function:
  339. 1. Finds WorkflowDraftVariableFile records by file_ids
  340. 2. Deletes associated files from object storage
  341. 3. Deletes UploadFile records
  342. 4. Deletes WorkflowDraftVariableFile records
  343. Args:
  344. conn: Database connection
  345. file_ids: List of WorkflowDraftVariableFile IDs
  346. Returns:
  347. Number of files cleaned up
  348. """
  349. from extensions.ext_storage import storage
  350. if not file_ids:
  351. return 0
  352. files_deleted = 0
  353. try:
  354. # Get WorkflowDraftVariableFile records and their associated UploadFile keys
  355. query_sql = """
  356. SELECT wdvf.id, uf.key, uf.id as upload_file_id
  357. FROM workflow_draft_variable_files wdvf
  358. JOIN upload_files uf ON wdvf.upload_file_id = uf.id
  359. WHERE wdvf.id IN :file_ids
  360. """
  361. result = conn.execute(sa.text(query_sql), {"file_ids": tuple(file_ids)})
  362. file_records = list(result)
  363. # Delete from object storage and collect upload file IDs
  364. upload_file_ids = []
  365. for variable_file_id, storage_key, upload_file_id in file_records:
  366. try:
  367. storage.delete(storage_key)
  368. upload_file_ids.append(upload_file_id)
  369. files_deleted += 1
  370. except Exception as e:
  371. logging.exception("Failed to delete storage object %s", storage_key)
  372. # Continue with database cleanup even if storage deletion fails
  373. upload_file_ids.append(upload_file_id)
  374. # Delete UploadFile records
  375. if upload_file_ids:
  376. delete_upload_files_sql = """
  377. DELETE FROM upload_files
  378. WHERE id IN :upload_file_ids
  379. """
  380. conn.execute(sa.text(delete_upload_files_sql), {"upload_file_ids": tuple(upload_file_ids)})
  381. # Delete WorkflowDraftVariableFile records
  382. delete_variable_files_sql = """
  383. DELETE FROM workflow_draft_variable_files
  384. WHERE id IN :file_ids
  385. """
  386. conn.execute(sa.text(delete_variable_files_sql), {"file_ids": tuple(file_ids)})
  387. except Exception:
  388. logging.exception("Error deleting draft variable offload data:")
  389. # Don't raise, as we want to continue with the main deletion process
  390. return files_deleted
  391. def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: str) -> None:
  392. while True:
  393. with db.engine.begin() as conn:
  394. rs = conn.execute(sa.text(query_sql), params)
  395. if rs.rowcount == 0:
  396. break
  397. for i in rs:
  398. record_id = str(i.id)
  399. try:
  400. delete_func(record_id)
  401. db.session.commit()
  402. logger.info(click.style(f"Deleted {name} {record_id}", fg="green"))
  403. except Exception:
  404. logger.exception("Error occurred while deleting %s %s", name, record_id)
  405. continue
  406. rs.close()