|  |  | @@ -17,6 +17,7 @@ from core.rag.models.document import Document | 
		
	
		
			
			|  |  |  | from events.app_event import app_was_created | 
		
	
		
			
			|  |  |  | from extensions.ext_database import db | 
		
	
		
			
			|  |  |  | from extensions.ext_redis import redis_client | 
		
	
		
			
			|  |  |  | from extensions.ext_storage import storage | 
		
	
		
			
			|  |  |  | from libs.helper import email as email_validate | 
		
	
		
			
			|  |  |  | from libs.password import hash_password, password_pattern, valid_password | 
		
	
		
			
			|  |  |  | from libs.rsa import generate_key_pair | 
		
	
	
		
			
			|  |  | @@ -815,3 +816,256 @@ def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[ | 
		
	
		
			
			|  |  |  | ClearFreePlanTenantExpiredLogs.process(days, batch, tenant_ids) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @click.command("clear-orphaned-file-records", help="Clear orphaned file records.") | 
		
	
		
			
			|  |  |  | def clear_orphaned_file_records(): | 
		
	
		
			
			|  |  |  | """ | 
		
	
		
			
			|  |  |  | Clear orphaned file records in the database. | 
		
	
		
			
			|  |  |  | """ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # define tables and columns to process | 
		
	
		
			
			|  |  |  | files_tables = [ | 
		
	
		
			
			|  |  |  | {"table": "upload_files", "id_column": "id", "key_column": "key"}, | 
		
	
		
			
			|  |  |  | {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, | 
		
	
		
			
			|  |  |  | ] | 
		
	
		
			
			|  |  |  | ids_tables = [ | 
		
	
		
			
			|  |  |  | {"type": "uuid", "table": "message_files", "column": "upload_file_id"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "documents", "column": "data_source_info"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "document_segments", "column": "content"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "messages", "column": "answer"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "workflow_node_executions", "column": "inputs"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "workflow_node_executions", "column": "process_data"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "workflow_node_executions", "column": "outputs"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "conversations", "column": "introduction"}, | 
		
	
		
			
			|  |  |  | {"type": "text", "table": "conversations", "column": "system_instruction"}, | 
		
	
		
			
			|  |  |  | {"type": "json", "table": "messages", "column": "inputs"}, | 
		
	
		
			
			|  |  |  | {"type": "json", "table": "messages", "column": "message"}, | 
		
	
		
			
			|  |  |  | ] | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # notify user and ask for confirmation | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style("This command will find and delete orphaned file records in the following tables:", fg="yellow") | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | for files_table in files_tables: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- {files_table['table']}", fg="yellow")) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style("The following tables and columns will be scanned to find orphaned file records:", fg="yellow") | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | for ids_table in ids_tables: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- {ids_table['table']} ({ids_table['column']})", fg="yellow")) | 
		
	
		
			
			|  |  |  | click.echo("") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style( | 
		
	
		
			
			|  |  |  | ( | 
		
	
		
			
			|  |  |  | "Since not all patterns have been fully tested, " | 
		
	
		
			
			|  |  |  | "please note that this command may delete unintended file records." | 
		
	
		
			
			|  |  |  | ), | 
		
	
		
			
			|  |  |  | fg="yellow", | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style("This cannot be undone. Please make sure to back up your database before proceeding.", fg="yellow") | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | click.confirm("Do you want to proceed?", abort=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # start the cleanup process | 
		
	
		
			
			|  |  |  | click.echo(click.style("Starting orphaned file records cleanup.", fg="white")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | # fetch file id and keys from each table | 
		
	
		
			
			|  |  |  | all_files_in_tables = [] | 
		
	
		
			
			|  |  |  | for files_table in files_tables: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white")) | 
		
	
		
			
			|  |  |  | query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" | 
		
	
		
			
			|  |  |  | with db.engine.begin() as conn: | 
		
	
		
			
			|  |  |  | rs = conn.execute(db.text(query)) | 
		
	
		
			
			|  |  |  | for i in rs: | 
		
	
		
			
			|  |  |  | all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]}) | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # fetch referred table and columns | 
		
	
		
			
			|  |  |  | guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" | 
		
	
		
			
			|  |  |  | all_ids_in_tables = [] | 
		
	
		
			
			|  |  |  | for ids_table in ids_tables: | 
		
	
		
			
			|  |  |  | query = "" | 
		
	
		
			
			|  |  |  | if ids_table["type"] == "uuid": | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style( | 
		
	
		
			
			|  |  |  | f"- Listing file ids in column {ids_table['column']} in table {ids_table['table']}", fg="white" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | query = ( | 
		
	
		
			
			|  |  |  | f"SELECT {ids_table['column']} FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | with db.engine.begin() as conn: | 
		
	
		
			
			|  |  |  | rs = conn.execute(db.text(query)) | 
		
	
		
			
			|  |  |  | for i in rs: | 
		
	
		
			
			|  |  |  | all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])}) | 
		
	
		
			
			|  |  |  | elif ids_table["type"] == "text": | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style( | 
		
	
		
			
			|  |  |  | f"- Listing file-id-like strings in column {ids_table['column']} in table {ids_table['table']}", | 
		
	
		
			
			|  |  |  | fg="white", | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | query = ( | 
		
	
		
			
			|  |  |  | f"SELECT regexp_matches({ids_table['column']}, '{guid_regexp}', 'g') AS extracted_id " | 
		
	
		
			
			|  |  |  | f"FROM {ids_table['table']}" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | with db.engine.begin() as conn: | 
		
	
		
			
			|  |  |  | rs = conn.execute(db.text(query)) | 
		
	
		
			
			|  |  |  | for i in rs: | 
		
	
		
			
			|  |  |  | for j in i[0]: | 
		
	
		
			
			|  |  |  | all_ids_in_tables.append({"table": ids_table["table"], "id": j}) | 
		
	
		
			
			|  |  |  | elif ids_table["type"] == "json": | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style( | 
		
	
		
			
			|  |  |  | ( | 
		
	
		
			
			|  |  |  | f"- Listing file-id-like JSON string in column {ids_table['column']} " | 
		
	
		
			
			|  |  |  | f"in table {ids_table['table']}" | 
		
	
		
			
			|  |  |  | ), | 
		
	
		
			
			|  |  |  | fg="white", | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | query = ( | 
		
	
		
			
			|  |  |  | f"SELECT regexp_matches({ids_table['column']}::text, '{guid_regexp}', 'g') AS extracted_id " | 
		
	
		
			
			|  |  |  | f"FROM {ids_table['table']}" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | with db.engine.begin() as conn: | 
		
	
		
			
			|  |  |  | rs = conn.execute(db.text(query)) | 
		
	
		
			
			|  |  |  | for i in rs: | 
		
	
		
			
			|  |  |  | for j in i[0]: | 
		
	
		
			
			|  |  |  | all_ids_in_tables.append({"table": ids_table["table"], "id": j}) | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Found {len(all_ids_in_tables)} file ids in tables.", fg="white")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) | 
		
	
		
			
			|  |  |  | return | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # find orphaned files | 
		
	
		
			
			|  |  |  | all_files = [file["id"] for file in all_files_in_tables] | 
		
	
		
			
			|  |  |  | all_ids = [file["id"] for file in all_ids_in_tables] | 
		
	
		
			
			|  |  |  | orphaned_files = list(set(all_files) - set(all_ids)) | 
		
	
		
			
			|  |  |  | if not orphaned_files: | 
		
	
		
			
			|  |  |  | click.echo(click.style("No orphaned file records found. There is nothing to delete.", fg="green")) | 
		
	
		
			
			|  |  |  | return | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white")) | 
		
	
		
			
			|  |  |  | for file in orphaned_files: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- orphaned file id: {file}", fg="black")) | 
		
	
		
			
			|  |  |  | click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # delete orphaned records for each file | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | for files_table in files_tables: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white")) | 
		
	
		
			
			|  |  |  | query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids" | 
		
	
		
			
			|  |  |  | with db.engine.begin() as conn: | 
		
	
		
			
			|  |  |  | conn.execute(db.text(query), {"ids": tuple(orphaned_files)}) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red")) | 
		
	
		
			
			|  |  |  | return | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.") | 
		
	
		
			
			|  |  |  | def remove_orphaned_files_on_storage(): | 
		
	
		
			
			|  |  |  | """ | 
		
	
		
			
			|  |  |  | Remove orphaned files on the storage. | 
		
	
		
			
			|  |  |  | """ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # define tables and columns to process | 
		
	
		
			
			|  |  |  | files_tables = [ | 
		
	
		
			
			|  |  |  | {"table": "upload_files", "key_column": "key"}, | 
		
	
		
			
			|  |  |  | {"table": "tool_files", "key_column": "file_key"}, | 
		
	
		
			
			|  |  |  | ] | 
		
	
		
			
			|  |  |  | storage_paths = ["image_files", "tools", "upload_files"] | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # notify user and ask for confirmation | 
		
	
		
			
			|  |  |  | click.echo(click.style("This command will find and remove orphaned files on the storage,", fg="yellow")) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style("by comparing the files on the storage with the records in the following tables:", fg="yellow") | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | for files_table in files_tables: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- {files_table['table']}", fg="yellow")) | 
		
	
		
			
			|  |  |  | click.echo(click.style("The following paths on the storage will be scanned to find orphaned files:", fg="yellow")) | 
		
	
		
			
			|  |  |  | for storage_path in storage_paths: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- {storage_path}", fg="yellow")) | 
		
	
		
			
			|  |  |  | click.echo("") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style( | 
		
	
		
			
			|  |  |  | "Currently, this command will work only for opendal based storage (STORAGE_TYPE=opendal).", fg="yellow" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style( | 
		
	
		
			
			|  |  |  | "Since not all patterns have been fully tested, please note that this command may delete unintended files.", | 
		
	
		
			
			|  |  |  | fg="yellow", | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | click.echo( | 
		
	
		
			
			|  |  |  | click.style("This cannot be undone. Please make sure to back up your database before proceeding.", fg="yellow") | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | click.confirm("Do you want to proceed?", abort=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # start the cleanup process | 
		
	
		
			
			|  |  |  | click.echo(click.style("Starting orphaned files cleanup.", fg="white")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # fetch file id and keys from each table | 
		
	
		
			
			|  |  |  | all_files_in_tables = [] | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | for files_table in files_tables: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white")) | 
		
	
		
			
			|  |  |  | query = f"SELECT {files_table['key_column']} FROM {files_table['table']}" | 
		
	
		
			
			|  |  |  | with db.engine.begin() as conn: | 
		
	
		
			
			|  |  |  | rs = conn.execute(db.text(query)) | 
		
	
		
			
			|  |  |  | for i in rs: | 
		
	
		
			
			|  |  |  | all_files_in_tables.append(str(i[0])) | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | all_files_on_storage = [] | 
		
	
		
			
			|  |  |  | for storage_path in storage_paths: | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- Scanning files on storage path {storage_path}", fg="white")) | 
		
	
		
			
			|  |  |  | files = storage.scan(path=storage_path, files=True, directories=False) | 
		
	
		
			
			|  |  |  | all_files_on_storage.extend(files) | 
		
	
		
			
			|  |  |  | except FileNotFoundError as e: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"  -> Skipping path {storage_path} as it does not exist.", fg="yellow")) | 
		
	
		
			
			|  |  |  | continue | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"  -> Error scanning files on storage path {storage_path}: {str(e)}", fg="red")) | 
		
	
		
			
			|  |  |  | continue | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Found {len(all_files_on_storage)} files on storage.", fg="white")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # find orphaned files | 
		
	
		
			
			|  |  |  | orphaned_files = list(set(all_files_on_storage) - set(all_files_in_tables)) | 
		
	
		
			
			|  |  |  | if not orphaned_files: | 
		
	
		
			
			|  |  |  | click.echo(click.style("No orphaned files found. There is nothing to remove.", fg="green")) | 
		
	
		
			
			|  |  |  | return | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white")) | 
		
	
		
			
			|  |  |  | for file in orphaned_files: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- orphaned file: {file}", fg="black")) | 
		
	
		
			
			|  |  |  | click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # delete orphaned files | 
		
	
		
			
			|  |  |  | removed_files = 0 | 
		
	
		
			
			|  |  |  | error_files = 0 | 
		
	
		
			
			|  |  |  | for file in orphaned_files: | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | storage.delete(file) | 
		
	
		
			
			|  |  |  | removed_files += 1 | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- Removing orphaned file: {file}", fg="white")) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | error_files += 1 | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"- Error deleting orphaned file {file}: {str(e)}", fg="red")) | 
		
	
		
			
			|  |  |  | continue | 
		
	
		
			
			|  |  |  | if error_files == 0: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Removed {removed_files} orphaned files without errors.", fg="green")) | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | click.echo(click.style(f"Removed {removed_files} orphaned files, with {error_files} errors.", fg="yellow")) |