| documents = db.session.query(Document).where(Document.dataset_id == dataset_id).all() | documents = db.session.query(Document).where(Document.dataset_id == dataset_id).all() | ||||
| segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all() | segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all() | ||||
| # Fix: Always clean vector database resources regardless of document existence | |||||
| # This ensures all 33 vector databases properly drop tables/collections/indices | |||||
| if doc_form is None: | |||||
| # Use default paragraph index type for empty datasets to enable vector database cleanup | |||||
| # Enhanced validation: Check if doc_form is None, empty string, or contains only whitespace | |||||
| # This ensures all invalid doc_form values are properly handled | |||||
| if doc_form is None or (isinstance(doc_form, str) and not doc_form.strip()): | |||||
| # Use default paragraph index type for empty/invalid datasets to enable vector database cleanup | |||||
| from core.rag.index_processor.constant.index_type import IndexType | from core.rag.index_processor.constant.index_type import IndexType | ||||
| doc_form = IndexType.PARAGRAPH_INDEX | doc_form = IndexType.PARAGRAPH_INDEX | ||||
| logging.info( | logging.info( | ||||
| click.style(f"No documents found, using default index type for cleanup: {doc_form}", fg="yellow") | |||||
| click.style(f"Invalid doc_form detected, using default index type for cleanup: {doc_form}", fg="yellow") | |||||
| ) | ) | ||||
| index_processor = IndexProcessorFactory(doc_form).init_index_processor() | |||||
| index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True) | |||||
| # Add exception handling around IndexProcessorFactory.clean() to prevent single point of failure | |||||
| # This ensures Document/Segment deletion can continue even if vector database cleanup fails | |||||
| try: | |||||
| index_processor = IndexProcessorFactory(doc_form).init_index_processor() | |||||
| index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True) | |||||
| logging.info(click.style(f"Successfully cleaned vector database for dataset: {dataset_id}", fg="green")) | |||||
| except Exception as index_cleanup_error: | |||||
| logging.exception(click.style(f"Failed to clean vector database for dataset {dataset_id}", fg="red")) | |||||
| # Continue with document and segment deletion even if vector cleanup fails | |||||
| logging.info( | |||||
| click.style(f"Continuing with document and segment deletion for dataset: {dataset_id}", fg="yellow") | |||||
| ) | |||||
| if documents is None or len(documents) == 0: | if documents is None or len(documents) == 0: | ||||
| logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green")) | logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green")) | ||||
| click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green") | click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green") | ||||
| ) | ) | ||||
| except Exception: | except Exception: | ||||
| # Add rollback to prevent dirty session state in case of exceptions | |||||
| # This ensures the database session is properly cleaned up | |||||
| try: | |||||
| db.session.rollback() | |||||
| logging.info(click.style(f"Rolled back database session for dataset: {dataset_id}", fg="yellow")) | |||||
| except Exception as rollback_error: | |||||
| logging.exception("Failed to rollback database session") | |||||
| logging.exception("Cleaned dataset when dataset deleted failed") | logging.exception("Cleaned dataset when dataset deleted failed") | ||||
| finally: | finally: | ||||
| db.session.close() | db.session.close() |