|
|
|
@@ -1,5 +1,7 @@ |
|
|
|
from typing import Any, cast |
|
|
|
|
|
|
|
from sqlalchemy import func |
|
|
|
|
|
|
|
from core.app.app_config.entities import DatasetRetrieveConfigEntity |
|
|
|
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity |
|
|
|
from core.entities.agent_entities import PlanningStrategy |
|
|
|
@@ -73,30 +75,33 @@ class KnowledgeRetrievalNode(BaseNode): |
|
|
|
|
|
|
|
def _fetch_dataset_retriever(self, node_data: KnowledgeRetrievalNodeData, query: str) -> list[ |
|
|
|
dict[str, Any]]: |
|
|
|
""" |
|
|
|
A dataset tool is a tool that can be used to retrieve information from a dataset |
|
|
|
:param node_data: node data |
|
|
|
:param query: query |
|
|
|
""" |
|
|
|
tools = [] |
|
|
|
available_datasets = [] |
|
|
|
dataset_ids = node_data.dataset_ids |
|
|
|
for dataset_id in dataset_ids: |
|
|
|
# get dataset from dataset id |
|
|
|
dataset = db.session.query(Dataset).filter( |
|
|
|
Dataset.tenant_id == self.tenant_id, |
|
|
|
Dataset.id == dataset_id |
|
|
|
).first() |
|
|
|
|
|
|
|
# pass if dataset is not available |
|
|
|
if not dataset: |
|
|
|
continue |
|
|
|
# Subquery: Count the number of available documents for each dataset |
|
|
|
subquery = db.session.query( |
|
|
|
Document.dataset_id, |
|
|
|
func.count(Document.id).label('available_document_count') |
|
|
|
).filter( |
|
|
|
Document.indexing_status == 'completed', |
|
|
|
Document.enabled == True, |
|
|
|
Document.archived == False, |
|
|
|
Document.dataset_id.in_(dataset_ids) |
|
|
|
).group_by(Document.dataset_id).having( |
|
|
|
func.count(Document.id) > 0 |
|
|
|
).subquery() |
|
|
|
|
|
|
|
results = db.session.query(Dataset).join( |
|
|
|
subquery, Dataset.id == subquery.c.dataset_id |
|
|
|
).filter( |
|
|
|
Dataset.tenant_id == self.tenant_id, |
|
|
|
Dataset.id.in_(dataset_ids) |
|
|
|
).all() |
|
|
|
|
|
|
|
for dataset in results: |
|
|
|
# pass if dataset is not available |
|
|
|
if (dataset and dataset.available_document_count == 0 |
|
|
|
and dataset.available_document_count == 0): |
|
|
|
if not dataset: |
|
|
|
continue |
|
|
|
|
|
|
|
available_datasets.append(dataset) |
|
|
|
all_documents = [] |
|
|
|
dataset_retrieval = DatasetRetrieval() |