| ).all() | ).all() | ||||
| if dataset_documents: | if dataset_documents: | ||||
| documents = [] | |||||
| for dataset_document in dataset_documents: | |||||
| # delete from vector index | |||||
| segments = db.session.query(DocumentSegment).filter( | |||||
| DocumentSegment.document_id == dataset_document.id, | |||||
| DocumentSegment.enabled == True | |||||
| ) .order_by(DocumentSegment.position.asc()).all() | |||||
| for segment in segments: | |||||
| document = Document( | |||||
| page_content=segment.content, | |||||
| metadata={ | |||||
| "doc_id": segment.index_node_id, | |||||
| "doc_hash": segment.index_node_hash, | |||||
| "document_id": segment.document_id, | |||||
| "dataset_id": segment.dataset_id, | |||||
| } | |||||
| ) | |||||
| dataset_documents_ids = [doc.id for doc in dataset_documents] | |||||
| db.session.query(DatasetDocument).filter(DatasetDocument.id.in_(dataset_documents_ids)) \ | |||||
| .update({"indexing_status": "indexing"}, synchronize_session=False) | |||||
| db.session.commit() | |||||
| documents.append(document) | |||||
| for dataset_document in dataset_documents: | |||||
| try: | |||||
| # add from vector index | |||||
| segments = db.session.query(DocumentSegment).filter( | |||||
| DocumentSegment.document_id == dataset_document.id, | |||||
| DocumentSegment.enabled == True | |||||
| ) .order_by(DocumentSegment.position.asc()).all() | |||||
| if segments: | |||||
| documents = [] | |||||
| for segment in segments: | |||||
| document = Document( | |||||
| page_content=segment.content, | |||||
| metadata={ | |||||
| "doc_id": segment.index_node_id, | |||||
| "doc_hash": segment.index_node_hash, | |||||
| "document_id": segment.document_id, | |||||
| "dataset_id": segment.dataset_id, | |||||
| } | |||||
| ) | |||||
| # save vector index | |||||
| index_processor.load(dataset, documents, with_keywords=False) | |||||
| documents.append(document) | |||||
| # save vector index | |||||
| index_processor.load(dataset, documents, with_keywords=False) | |||||
| db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ | |||||
| .update({"indexing_status": "completed"}, synchronize_session=False) | |||||
| db.session.commit() | |||||
| except Exception as e: | |||||
| db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ | |||||
| .update({"indexing_status": "error", "error": str(e)}, synchronize_session=False) | |||||
| db.session.commit() | |||||
| elif action == 'update': | elif action == 'update': | ||||
| # clean index | |||||
| index_processor.clean(dataset, None, with_keywords=False) | |||||
| dataset_documents = db.session.query(DatasetDocument).filter( | dataset_documents = db.session.query(DatasetDocument).filter( | ||||
| DatasetDocument.dataset_id == dataset_id, | DatasetDocument.dataset_id == dataset_id, | ||||
| DatasetDocument.indexing_status == 'completed', | DatasetDocument.indexing_status == 'completed', | ||||
| ).all() | ).all() | ||||
| # add new index | # add new index | ||||
| if dataset_documents: | if dataset_documents: | ||||
| documents = [] | |||||
| # update document status | |||||
| dataset_documents_ids = [doc.id for doc in dataset_documents] | |||||
| db.session.query(DatasetDocument).filter(DatasetDocument.id.in_(dataset_documents_ids)) \ | |||||
| .update({"indexing_status": "indexing"}, synchronize_session=False) | |||||
| db.session.commit() | |||||
| # clean index | |||||
| index_processor.clean(dataset, None, with_keywords=False) | |||||
| for dataset_document in dataset_documents: | for dataset_document in dataset_documents: | ||||
| # delete from vector index | |||||
| segments = db.session.query(DocumentSegment).filter( | |||||
| DocumentSegment.document_id == dataset_document.id, | |||||
| DocumentSegment.enabled == True | |||||
| ).order_by(DocumentSegment.position.asc()).all() | |||||
| for segment in segments: | |||||
| document = Document( | |||||
| page_content=segment.content, | |||||
| metadata={ | |||||
| "doc_id": segment.index_node_id, | |||||
| "doc_hash": segment.index_node_hash, | |||||
| "document_id": segment.document_id, | |||||
| "dataset_id": segment.dataset_id, | |||||
| } | |||||
| ) | |||||
| # update from vector index | |||||
| try: | |||||
| segments = db.session.query(DocumentSegment).filter( | |||||
| DocumentSegment.document_id == dataset_document.id, | |||||
| DocumentSegment.enabled == True | |||||
| ).order_by(DocumentSegment.position.asc()).all() | |||||
| if segments: | |||||
| documents = [] | |||||
| for segment in segments: | |||||
| document = Document( | |||||
| page_content=segment.content, | |||||
| metadata={ | |||||
| "doc_id": segment.index_node_id, | |||||
| "doc_hash": segment.index_node_hash, | |||||
| "document_id": segment.document_id, | |||||
| "dataset_id": segment.dataset_id, | |||||
| } | |||||
| ) | |||||
| documents.append(document) | |||||
| documents.append(document) | |||||
| # save vector index | |||||
| index_processor.load(dataset, documents, with_keywords=False) | |||||
| db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ | |||||
| .update({"indexing_status": "completed"}, synchronize_session=False) | |||||
| db.session.commit() | |||||
| except Exception as e: | |||||
| db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ | |||||
| .update({"indexing_status": "error", "error": str(e)}, synchronize_session=False) | |||||
| db.session.commit() | |||||
| # save vector index | |||||
| index_processor.load(dataset, documents, with_keywords=False) | |||||
| end_at = time.perf_counter() | end_at = time.perf_counter() | ||||
| logging.info( | logging.info( |