### What problem does this PR solve? Using the mcp server in n8n sometimes (with smaller models) results in errors because the llm misses a char or adds one to the list of dataset_ids provided. It first asks for the list of datasets and if you got a larger list of them it makes a error recalling the list completely. So adding the feature to just search through all available datasets solves this and makes the retrieval of data more stable. The functionality to just call special datasets by id is not changed, the dataset_ids are now not required anymore (only the "question" is). You can provide (like before) a list of datasets, a empty list or no list at all. ### Type of change - [X] New Feature (non-breaking change which adds functionality) <img width="1897" height="880" alt="mcp error dataset id" src="https://github.com/user-attachments/assets/71076d24-f875-4663-a69a-60839fc7a545" />

2 months ago · 9433f64fe2
--- a/mcp/server/server.py
+++ b/mcp/server/server.py
@@ -180,7 +180,7 @@ async def list_tools(*, connector) -> list[types.Tool]:
    return [
        types.Tool(
            name="ragflow_retrieval",
            description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question, using the specified dataset_ids and optionally document_ids. Below is the list of all available datasets, including their descriptions and IDs. If you're unsure which datasets are relevant to the question, simply pass all dataset IDs to the function."
            description="Retrieve relevant chunks from the RAGFlow retrieve interface based on the question. You can optionally specify dataset_ids to search only specific datasets, or omit dataset_ids entirely to search across ALL available datasets. You can also optionally specify document_ids to search within specific documents. When dataset_ids is not provided or is empty, the system will automatically search across all available datasets. Below is the list of all available datasets, including their descriptions and IDs:"
            + dataset_description,
            inputSchema={
                "type": "object",
@@ -188,14 +188,16 @@ async def list_tools(*, connector) -> list[types.Tool]:
                    "dataset_ids": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "Optional array of dataset IDs to search. If not provided or empty, all datasets will be searched."
                    },
                    "document_ids": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "Optional array of document IDs to search within."
                    },
                    "question": {"type": "string"},
                    "question": {"type": "string", "description": "The question or query to search for."},
                },
                "required": ["dataset_ids", "question"],
                "required": ["question"],
            },
        ),
    ]
@@ -206,8 +208,26 @@ async def list_tools(*, connector) -> list[types.Tool]:
 async def call_tool(name: str, arguments: dict, *, connector) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
    if name == "ragflow_retrieval":
        document_ids = arguments.get("document_ids", [])
        dataset_ids = arguments.get("dataset_ids", [])
        
        # If no dataset_ids provided or empty list, get all available dataset IDs
        if not dataset_ids:
            dataset_list_str = connector.list_datasets()
            dataset_ids = []
            
            # Parse the dataset list to extract IDs
            if dataset_list_str:
                for line in dataset_list_str.strip().split('\n'):
                    if line.strip():
                        try:
                            dataset_info = json.loads(line.strip())
                            dataset_ids.append(dataset_info["id"])
                        except (json.JSONDecodeError, KeyError):
                            # Skip malformed lines
                            continue
        
        return connector.retrieval(
            dataset_ids=arguments["dataset_ids"],
            dataset_ids=dataset_ids,
            document_ids=document_ids,
            question=arguments["question"],
        )