| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 | 
							- from typing import Optional
 - 
 - from langchain.callbacks import CallbackManager
 - from llama_index.langchain_helpers.agents import IndexToolConfig
 - 
 - from core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandler
 - from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler
 - from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
 - from core.index.keyword_table_index import KeywordTableIndex
 - from core.index.vector_index import VectorIndex
 - from core.prompt.prompts import QUERY_KEYWORD_EXTRACT_TEMPLATE
 - from core.tool.llama_index_tool import EnhanceLlamaIndexTool
 - from extensions.ext_database import db
 - from models.dataset import Dataset
 - 
 - 
 - class DatasetToolBuilder:
 -     @classmethod
 -     def build_dataset_tool(cls, tenant_id: str, dataset_id: str,
 -                            response_mode: str = "no_synthesizer",
 -                            callback_handler: Optional[DatasetToolCallbackHandler] = None):
 -         # get dataset from dataset id
 -         dataset = db.session.query(Dataset).filter(
 -             Dataset.tenant_id == tenant_id,
 -             Dataset.id == dataset_id
 -         ).first()
 - 
 -         if not dataset:
 -             return None
 - 
 -         if dataset.indexing_technique == "economy":
 -             # use keyword table query
 -             index = KeywordTableIndex(dataset=dataset).query_index
 - 
 -             if not index:
 -                 return None
 - 
 -             query_kwargs = {
 -                 "mode": "default",
 -                 "response_mode": response_mode,
 -                 "query_keyword_extract_template": QUERY_KEYWORD_EXTRACT_TEMPLATE,
 -                 "max_keywords_per_query": 5,
 -                 # If num_chunks_per_query is too large,
 -                 # it will slow down the synthesis process due to multiple iterations of refinement.
 -                 "num_chunks_per_query": 2
 -             }
 -         else:
 -             index = VectorIndex(dataset=dataset).query_index
 - 
 -             if not index:
 -                 return None
 - 
 -             query_kwargs = {
 -                 "mode": "default",
 -                 "response_mode": response_mode,
 -                 # If top_k is too large,
 -                 # it will slow down the synthesis process due to multiple iterations of refinement.
 -                 "similarity_top_k": 2
 -             }
 - 
 -         # fulfill description when it is empty
 -         description = dataset.description
 -         if not description:
 -             description = 'useful for when you want to answer queries about the ' + dataset.name
 - 
 -         index_tool_config = IndexToolConfig(
 -             index=index,
 -             name=f"dataset-{dataset_id}",
 -             description=description,
 -             index_query_kwargs=query_kwargs,
 -             tool_kwargs={
 -                 "callback_manager": CallbackManager([callback_handler, DifyStdOutCallbackHandler()])
 -             },
 -             # tool_kwargs={"return_direct": True},
 -             # return_direct: Whether to return LLM results directly or process the output data with an Output Parser
 -         )
 - 
 -         index_callback_handler = DatasetIndexToolCallbackHandler(dataset_id=dataset_id)
 - 
 -         return EnhanceLlamaIndexTool.from_tool_config(
 -             tool_config=index_tool_config,
 -             callback_handler=index_callback_handler
 -         )
 
 
  |