|
|
|
@@ -661,7 +661,7 @@ class IndexingRunner: |
|
|
|
# create keyword index |
|
|
|
create_keyword_thread = threading.Thread(target=self._process_keyword_index, |
|
|
|
args=(current_app._get_current_object(), |
|
|
|
dataset, dataset_document, documents)) |
|
|
|
dataset.id, dataset_document.id, documents)) |
|
|
|
create_keyword_thread.start() |
|
|
|
if dataset.indexing_technique == 'high_quality': |
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: |
|
|
|
@@ -690,14 +690,17 @@ class IndexingRunner: |
|
|
|
} |
|
|
|
) |
|
|
|
|
|
|
|
def _process_keyword_index(self, flask_app, dataset, dataset_document, documents): |
|
|
|
def _process_keyword_index(self, flask_app, dataset_id, document_id, documents): |
|
|
|
with flask_app.app_context(): |
|
|
|
dataset = Dataset.query.filter_by(id=dataset_id).first() |
|
|
|
if not dataset: |
|
|
|
raise ValueError("no dataset found") |
|
|
|
keyword = Keyword(dataset) |
|
|
|
keyword.create(documents) |
|
|
|
if dataset.indexing_technique != 'high_quality': |
|
|
|
document_ids = [document.metadata['doc_id'] for document in documents] |
|
|
|
db.session.query(DocumentSegment).filter( |
|
|
|
DocumentSegment.document_id == dataset_document.id, |
|
|
|
DocumentSegment.document_id == document_id, |
|
|
|
DocumentSegment.index_node_id.in_(document_ids), |
|
|
|
DocumentSegment.status == "indexing" |
|
|
|
).update({ |