| @@ -1,14 +1,12 @@ | |||
| identity: | |||
| author: Yash Parmar | |||
| author: Yash Parmar, Kalo Chin | |||
| name: tavily | |||
| label: | |||
| en_US: Tavily | |||
| zh_Hans: Tavily | |||
| pt_BR: Tavily | |||
| en_US: Tavily Search & Extract | |||
| zh_Hans: Tavily 搜索和提取 | |||
| description: | |||
| en_US: Tavily | |||
| zh_Hans: Tavily | |||
| pt_BR: Tavily | |||
| en_US: A powerful AI-native search engine and web content extraction tool that provides highly relevant search results and raw content extraction from web pages. | |||
| zh_Hans: 一个强大的原生AI搜索引擎和网页内容提取工具,提供高度相关的搜索结果和网页原始内容提取。 | |||
| icon: icon.png | |||
| tags: | |||
| - search | |||
| @@ -19,13 +17,10 @@ credentials_for_provider: | |||
| label: | |||
| en_US: Tavily API key | |||
| zh_Hans: Tavily API key | |||
| pt_BR: Tavily API key | |||
| placeholder: | |||
| en_US: Please input your Tavily API key | |||
| zh_Hans: 请输入你的 Tavily API key | |||
| pt_BR: Please input your Tavily API key | |||
| help: | |||
| en_US: Get your Tavily API key from Tavily | |||
| zh_Hans: 从 TavilyApi 获取您的 Tavily API key | |||
| pt_BR: Get your Tavily API key from Tavily | |||
| url: https://docs.tavily.com/docs/welcome | |||
| url: https://app.tavily.com/home | |||
| @@ -0,0 +1,145 @@ | |||
| from typing import Any | |||
| import requests | |||
| from core.tools.entities.tool_entities import ToolInvokeMessage | |||
| from core.tools.tool.builtin_tool import BuiltinTool | |||
| TAVILY_API_URL = "https://api.tavily.com" | |||
| class TavilyExtract: | |||
| """ | |||
| A class for extracting content from web pages using the Tavily Extract API. | |||
| Args: | |||
| api_key (str): The API key for accessing the Tavily Extract API. | |||
| Methods: | |||
| extract_content: Retrieves extracted content from the Tavily Extract API. | |||
| """ | |||
| def __init__(self, api_key: str) -> None: | |||
| self.api_key = api_key | |||
| def extract_content(self, params: dict[str, Any]) -> dict: | |||
| """ | |||
| Retrieves extracted content from the Tavily Extract API. | |||
| Args: | |||
| params (Dict[str, Any]): The extraction parameters. | |||
| Returns: | |||
| dict: The extracted content. | |||
| """ | |||
| # Ensure required parameters are set | |||
| if "api_key" not in params: | |||
| params["api_key"] = self.api_key | |||
| # Process parameters | |||
| processed_params = self._process_params(params) | |||
| response = requests.post(f"{TAVILY_API_URL}/extract", json=processed_params) | |||
| response.raise_for_status() | |||
| return response.json() | |||
| def _process_params(self, params: dict[str, Any]) -> dict: | |||
| """ | |||
| Processes and validates the extraction parameters. | |||
| Args: | |||
| params (Dict[str, Any]): The extraction parameters. | |||
| Returns: | |||
| dict: The processed parameters. | |||
| """ | |||
| processed_params = {} | |||
| # Process 'urls' | |||
| if "urls" in params: | |||
| urls = params["urls"] | |||
| if isinstance(urls, str): | |||
| processed_params["urls"] = [url.strip() for url in urls.replace(",", " ").split()] | |||
| elif isinstance(urls, list): | |||
| processed_params["urls"] = urls | |||
| else: | |||
| raise ValueError("The 'urls' parameter is required.") | |||
| # Only include 'api_key' | |||
| processed_params["api_key"] = params.get("api_key", self.api_key) | |||
| return processed_params | |||
| class TavilyExtractTool(BuiltinTool): | |||
| """ | |||
| A tool for extracting content from web pages using Tavily Extract. | |||
| """ | |||
| def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]: | |||
| """ | |||
| Invokes the Tavily Extract tool with the given user ID and tool parameters. | |||
| Args: | |||
| user_id (str): The ID of the user invoking the tool. | |||
| tool_parameters (Dict[str, Any]): The parameters for the Tavily Extract tool. | |||
| Returns: | |||
| ToolInvokeMessage | list[ToolInvokeMessage]: The result of the Tavily Extract tool invocation. | |||
| """ | |||
| urls = tool_parameters.get("urls", "") | |||
| api_key = self.runtime.credentials.get("tavily_api_key") | |||
| if not api_key: | |||
| return self.create_text_message( | |||
| "Tavily API key is missing. Please set the 'tavily_api_key' in credentials." | |||
| ) | |||
| if not urls: | |||
| return self.create_text_message("Please input at least one URL to extract.") | |||
| tavily_extract = TavilyExtract(api_key) | |||
| try: | |||
| raw_results = tavily_extract.extract_content(tool_parameters) | |||
| except requests.HTTPError as e: | |||
| return self.create_text_message(f"Error occurred while extracting content: {str(e)}") | |||
| if not raw_results.get("results"): | |||
| return self.create_text_message("No content could be extracted from the provided URLs.") | |||
| else: | |||
| # Always return JSON message with all data | |||
| json_message = self.create_json_message(raw_results) | |||
| # Create text message based on user-selected parameters | |||
| text_message_content = self._format_results_as_text(raw_results) | |||
| text_message = self.create_text_message(text=text_message_content) | |||
| return [json_message, text_message] | |||
| def _format_results_as_text(self, raw_results: dict) -> str: | |||
| """ | |||
| Formats the raw extraction results into a markdown text based on user-selected parameters. | |||
| Args: | |||
| raw_results (dict): The raw extraction results. | |||
| Returns: | |||
| str: The formatted markdown text. | |||
| """ | |||
| output_lines = [] | |||
| for idx, result in enumerate(raw_results.get("results", []), 1): | |||
| url = result.get("url", "") | |||
| raw_content = result.get("raw_content", "") | |||
| output_lines.append(f"## Extracted Content {idx}: {url}\n") | |||
| output_lines.append(f"**Raw Content:**\n{raw_content}\n") | |||
| output_lines.append("---\n") | |||
| if raw_results.get("failed_results"): | |||
| output_lines.append("## Failed URLs:\n") | |||
| for failed in raw_results["failed_results"]: | |||
| url = failed.get("url", "") | |||
| error = failed.get("error", "Unknown error") | |||
| output_lines.append(f"- {url}: {error}\n") | |||
| return "\n".join(output_lines) | |||
| @@ -0,0 +1,23 @@ | |||
| identity: | |||
| name: tavily_extract | |||
| author: Kalo Chin | |||
| label: | |||
| en_US: Tavily Extract | |||
| zh_Hans: Tavily Extract | |||
| description: | |||
| human: | |||
| en_US: A web extraction tool built specifically for AI agents (LLMs), delivering raw content from web pages. | |||
| zh_Hans: 专为人工智能代理 (LLM) 构建的网页提取工具,提供网页的原始内容。 | |||
| llm: A tool for extracting raw content from web pages, designed for AI agents (LLMs). | |||
| parameters: | |||
| - name: urls | |||
| type: string | |||
| required: true | |||
| label: | |||
| en_US: URLs | |||
| zh_Hans: URLs | |||
| human_description: | |||
| en_US: A comma-separated list of URLs to extract content from. | |||
| zh_Hans: 要从中提取内容的 URL 的逗号分隔列表。 | |||
| llm_description: A comma-separated list of URLs to extract content from. | |||
| form: llm | |||
| @@ -17,8 +17,6 @@ class TavilySearch: | |||
| Methods: | |||
| raw_results: Retrieves raw search results from the Tavily Search API. | |||
| results: Retrieves cleaned search results from the Tavily Search API. | |||
| clean_results: Cleans the raw search results. | |||
| """ | |||
| def __init__(self, api_key: str) -> None: | |||
| @@ -35,63 +33,62 @@ class TavilySearch: | |||
| dict: The raw search results. | |||
| """ | |||
| # Ensure required parameters are set | |||
| params["api_key"] = self.api_key | |||
| if ( | |||
| "exclude_domains" in params | |||
| and isinstance(params["exclude_domains"], str) | |||
| and params["exclude_domains"] != "None" | |||
| ): | |||
| params["exclude_domains"] = params["exclude_domains"].split() | |||
| else: | |||
| params["exclude_domains"] = [] | |||
| if ( | |||
| "include_domains" in params | |||
| and isinstance(params["include_domains"], str) | |||
| and params["include_domains"] != "None" | |||
| ): | |||
| params["include_domains"] = params["include_domains"].split() | |||
| else: | |||
| params["include_domains"] = [] | |||
| response = requests.post(f"{TAVILY_API_URL}/search", json=params) | |||
| # Process parameters to ensure correct types | |||
| processed_params = self._process_params(params) | |||
| response = requests.post(f"{TAVILY_API_URL}/search", json=processed_params) | |||
| response.raise_for_status() | |||
| return response.json() | |||
| def results(self, params: dict[str, Any]) -> list[dict]: | |||
| def _process_params(self, params: dict[str, Any]) -> dict: | |||
| """ | |||
| Retrieves cleaned search results from the Tavily Search API. | |||
| Processes and validates the search parameters. | |||
| Args: | |||
| params (Dict[str, Any]): The search parameters. | |||
| Returns: | |||
| list: The cleaned search results. | |||
| dict: The processed parameters. | |||
| """ | |||
| raw_search_results = self.raw_results(params) | |||
| return self.clean_results(raw_search_results["results"]) | |||
| def clean_results(self, results: list[dict]) -> list[dict]: | |||
| """ | |||
| Cleans the raw search results. | |||
| Args: | |||
| results (list): The raw search results. | |||
| Returns: | |||
| list: The cleaned search results. | |||
| """ | |||
| clean_results = [] | |||
| for result in results: | |||
| clean_results.append( | |||
| { | |||
| "url": result["url"], | |||
| "content": result["content"], | |||
| } | |||
| ) | |||
| # return clean results as a string | |||
| return "\n".join([f"{res['url']}\n{res['content']}" for res in clean_results]) | |||
| processed_params = {} | |||
| for key, value in params.items(): | |||
| if value is None or value == "None": | |||
| continue | |||
| if key in ["include_domains", "exclude_domains"]: | |||
| if isinstance(value, str): | |||
| # Split the string by commas or spaces and strip whitespace | |||
| processed_params[key] = [domain.strip() for domain in value.replace(",", " ").split()] | |||
| elif key in ["include_images", "include_image_descriptions", "include_answer", "include_raw_content"]: | |||
| # Ensure boolean type | |||
| if isinstance(value, str): | |||
| processed_params[key] = value.lower() == "true" | |||
| else: | |||
| processed_params[key] = bool(value) | |||
| elif key in ["max_results", "days"]: | |||
| if isinstance(value, str): | |||
| processed_params[key] = int(value) | |||
| else: | |||
| processed_params[key] = value | |||
| elif key in ["search_depth", "topic", "query", "api_key"]: | |||
| processed_params[key] = value | |||
| else: | |||
| # Unrecognized parameter | |||
| pass | |||
| # Set defaults if not present | |||
| processed_params.setdefault("search_depth", "basic") | |||
| processed_params.setdefault("topic", "general") | |||
| processed_params.setdefault("max_results", 5) | |||
| # If topic is 'news', ensure 'days' is set | |||
| if processed_params.get("topic") == "news": | |||
| processed_params.setdefault("days", 3) | |||
| return processed_params | |||
| class TavilySearchTool(BuiltinTool): | |||
| @@ -111,14 +108,88 @@ class TavilySearchTool(BuiltinTool): | |||
| ToolInvokeMessage | list[ToolInvokeMessage]: The result of the Tavily search tool invocation. | |||
| """ | |||
| query = tool_parameters.get("query", "") | |||
| api_key = self.runtime.credentials["tavily_api_key"] | |||
| api_key = self.runtime.credentials.get("tavily_api_key") | |||
| if not api_key: | |||
| return self.create_text_message( | |||
| "Tavily API key is missing. Please set the 'tavily_api_key' in credentials." | |||
| ) | |||
| if not query: | |||
| return self.create_text_message("Please input query") | |||
| return self.create_text_message("Please input a query.") | |||
| tavily_search = TavilySearch(api_key) | |||
| results = tavily_search.results(tool_parameters) | |||
| print(results) | |||
| if not results: | |||
| return self.create_text_message(f"No results found for '{query}' in Tavily") | |||
| try: | |||
| raw_results = tavily_search.raw_results(tool_parameters) | |||
| except requests.HTTPError as e: | |||
| return self.create_text_message(f"Error occurred while searching: {str(e)}") | |||
| if not raw_results.get("results"): | |||
| return self.create_text_message(f"No results found for '{query}' in Tavily.") | |||
| else: | |||
| return self.create_text_message(text=results) | |||
| # Always return JSON message with all data | |||
| json_message = self.create_json_message(raw_results) | |||
| # Create text message based on user-selected parameters | |||
| text_message_content = self._format_results_as_text(raw_results, tool_parameters) | |||
| text_message = self.create_text_message(text=text_message_content) | |||
| return [json_message, text_message] | |||
| def _format_results_as_text(self, raw_results: dict, tool_parameters: dict[str, Any]) -> str: | |||
| """ | |||
| Formats the raw results into a markdown text based on user-selected parameters. | |||
| Args: | |||
| raw_results (dict): The raw search results. | |||
| tool_parameters (dict): The tool parameters selected by the user. | |||
| Returns: | |||
| str: The formatted markdown text. | |||
| """ | |||
| output_lines = [] | |||
| # Include answer if requested | |||
| if tool_parameters.get("include_answer", False) and raw_results.get("answer"): | |||
| output_lines.append(f"**Answer:** {raw_results['answer']}\n") | |||
| # Include images if requested | |||
| if tool_parameters.get("include_images", False) and raw_results.get("images"): | |||
| output_lines.append("**Images:**\n") | |||
| for image in raw_results["images"]: | |||
| if tool_parameters.get("include_image_descriptions", False) and "description" in image: | |||
| output_lines.append(f"![{image['description']}]({image['url']})\n") | |||
| else: | |||
| output_lines.append(f"\n") | |||
| # Process each result | |||
| if "results" in raw_results: | |||
| for idx, result in enumerate(raw_results["results"], 1): | |||
| title = result.get("title", "No Title") | |||
| url = result.get("url", "") | |||
| content = result.get("content", "") | |||
| published_date = result.get("published_date", "") | |||
| score = result.get("score", "") | |||
| output_lines.append(f"### Result {idx}: [{title}]({url})\n") | |||
| # Include published date if available and topic is 'news' | |||
| if tool_parameters.get("topic") == "news" and published_date: | |||
| output_lines.append(f"**Published Date:** {published_date}\n") | |||
| output_lines.append(f"**URL:** {url}\n") | |||
| # Include score (relevance) | |||
| if score: | |||
| output_lines.append(f"**Relevance Score:** {score}\n") | |||
| # Include content | |||
| if content: | |||
| output_lines.append(f"**Content:**\n{content}\n") | |||
| # Include raw content if requested | |||
| if tool_parameters.get("include_raw_content", False) and result.get("raw_content"): | |||
| output_lines.append(f"**Raw Content:**\n{result['raw_content']}\n") | |||
| # Add a separator | |||
| output_lines.append("---\n") | |||
| return "\n".join(output_lines) | |||
| @@ -2,28 +2,24 @@ identity: | |||
| name: tavily_search | |||
| author: Yash Parmar | |||
| label: | |||
| en_US: TavilySearch | |||
| zh_Hans: TavilySearch | |||
| pt_BR: TavilySearch | |||
| en_US: Tavily Search | |||
| zh_Hans: Tavily Search | |||
| description: | |||
| human: | |||
| en_US: A tool for search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. | |||
| en_US: A search engine tool built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. | |||
| zh_Hans: 专为人工智能代理 (LLM) 构建的搜索引擎工具,可快速提供实时、准确和真实的结果。 | |||
| pt_BR: A tool for search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. | |||
| llm: A tool for search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. | |||
| parameters: | |||
| - name: query | |||
| type: string | |||
| required: true | |||
| label: | |||
| en_US: Query string | |||
| zh_Hans: 查询语句 | |||
| pt_BR: Query string | |||
| en_US: Query | |||
| zh_Hans: 查询 | |||
| human_description: | |||
| en_US: used for searching | |||
| zh_Hans: 用于搜索网页内容 | |||
| pt_BR: used for searching | |||
| llm_description: key words for searching | |||
| en_US: The search query you want to execute with Tavily. | |||
| zh_Hans: 您想用 Tavily 执行的搜索查询。 | |||
| llm_description: The search query. | |||
| form: llm | |||
| - name: search_depth | |||
| type: select | |||
| @@ -31,122 +27,118 @@ parameters: | |||
| label: | |||
| en_US: Search Depth | |||
| zh_Hans: 搜索深度 | |||
| pt_BR: Search Depth | |||
| human_description: | |||
| en_US: The depth of search results | |||
| zh_Hans: 搜索结果的深度 | |||
| pt_BR: The depth of search results | |||
| en_US: The depth of the search. | |||
| zh_Hans: 搜索的深度。 | |||
| form: form | |||
| options: | |||
| - value: basic | |||
| label: | |||
| en_US: Basic | |||
| zh_Hans: 基本 | |||
| pt_BR: Basic | |||
| - value: advanced | |||
| label: | |||
| en_US: Advanced | |||
| zh_Hans: 高级 | |||
| pt_BR: Advanced | |||
| default: basic | |||
| - name: topic | |||
| type: select | |||
| required: false | |||
| label: | |||
| en_US: Topic | |||
| zh_Hans: 主题 | |||
| human_description: | |||
| en_US: The category of the search. | |||
| zh_Hans: 搜索的类别。 | |||
| form: form | |||
| options: | |||
| - value: general | |||
| label: | |||
| en_US: General | |||
| zh_Hans: 一般 | |||
| - value: news | |||
| label: | |||
| en_US: News | |||
| zh_Hans: 新闻 | |||
| default: general | |||
| - name: days | |||
| type: number | |||
| required: false | |||
| label: | |||
| en_US: Days | |||
| zh_Hans: 天数 | |||
| human_description: | |||
| en_US: The number of days back from the current date to include in the search results (only applicable when "topic" is "news"). | |||
| zh_Hans: 从当前日期起向前追溯的天数,以包含在搜索结果中(仅当“topic”为“news”时适用)。 | |||
| form: form | |||
| min: 1 | |||
| default: 3 | |||
| - name: max_results | |||
| type: number | |||
| required: false | |||
| label: | |||
| en_US: Max Results | |||
| zh_Hans: 最大结果数 | |||
| human_description: | |||
| en_US: The maximum number of search results to return. | |||
| zh_Hans: 要返回的最大搜索结果数。 | |||
| form: form | |||
| min: 1 | |||
| max: 20 | |||
| default: 5 | |||
| - name: include_images | |||
| type: boolean | |||
| required: false | |||
| label: | |||
| en_US: Include Images | |||
| zh_Hans: 包含图片 | |||
| pt_BR: Include Images | |||
| human_description: | |||
| en_US: Include images in the search results | |||
| zh_Hans: 在搜索结果中包含图片 | |||
| pt_BR: Include images in the search results | |||
| en_US: Include a list of query-related images in the response. | |||
| zh_Hans: 在响应中包含与查询相关的图片列表。 | |||
| form: form | |||
| options: | |||
| - value: 'true' | |||
| label: | |||
| en_US: 'Yes' | |||
| zh_Hans: 是 | |||
| pt_BR: 'Yes' | |||
| - value: 'false' | |||
| label: | |||
| en_US: 'No' | |||
| zh_Hans: 否 | |||
| pt_BR: 'No' | |||
| default: 'false' | |||
| default: false | |||
| - name: include_image_descriptions | |||
| type: boolean | |||
| required: false | |||
| label: | |||
| en_US: Include Image Descriptions | |||
| zh_Hans: 包含图片描述 | |||
| human_description: | |||
| en_US: When include_images is True, adds descriptive text for each image. | |||
| zh_Hans: 当 include_images 为 True 时,为每个图像添加描述文本。 | |||
| form: form | |||
| default: false | |||
| - name: include_answer | |||
| type: boolean | |||
| required: false | |||
| label: | |||
| en_US: Include Answer | |||
| zh_Hans: 包含答案 | |||
| pt_BR: Include Answer | |||
| human_description: | |||
| en_US: Include answers in the search results | |||
| zh_Hans: 在搜索结果中包含答案 | |||
| pt_BR: Include answers in the search results | |||
| en_US: Include a short answer to the original query in the response. | |||
| zh_Hans: 在响应中包含对原始查询的简短回答。 | |||
| form: form | |||
| options: | |||
| - value: 'true' | |||
| label: | |||
| en_US: 'Yes' | |||
| zh_Hans: 是 | |||
| pt_BR: 'Yes' | |||
| - value: 'false' | |||
| label: | |||
| en_US: 'No' | |||
| zh_Hans: 否 | |||
| pt_BR: 'No' | |||
| default: 'false' | |||
| default: false | |||
| - name: include_raw_content | |||
| type: boolean | |||
| required: false | |||
| label: | |||
| en_US: Include Raw Content | |||
| zh_Hans: 包含原始内容 | |||
| pt_BR: Include Raw Content | |||
| human_description: | |||
| en_US: Include raw content in the search results | |||
| zh_Hans: 在搜索结果中包含原始内容 | |||
| pt_BR: Include raw content in the search results | |||
| form: form | |||
| options: | |||
| - value: 'true' | |||
| label: | |||
| en_US: 'Yes' | |||
| zh_Hans: 是 | |||
| pt_BR: 'Yes' | |||
| - value: 'false' | |||
| label: | |||
| en_US: 'No' | |||
| zh_Hans: 否 | |||
| pt_BR: 'No' | |||
| default: 'false' | |||
| - name: max_results | |||
| type: number | |||
| required: false | |||
| label: | |||
| en_US: Max Results | |||
| zh_Hans: 最大结果 | |||
| pt_BR: Max Results | |||
| human_description: | |||
| en_US: The number of maximum search results to return | |||
| zh_Hans: 返回的最大搜索结果数 | |||
| pt_BR: The number of maximum search results to return | |||
| en_US: Include the cleaned and parsed HTML content of each search result. | |||
| zh_Hans: 包含每个搜索结果的已清理和解析的HTML内容。 | |||
| form: form | |||
| min: 1 | |||
| max: 20 | |||
| default: 5 | |||
| default: false | |||
| - name: include_domains | |||
| type: string | |||
| required: false | |||
| label: | |||
| en_US: Include Domains | |||
| zh_Hans: 包含域 | |||
| pt_BR: Include Domains | |||
| human_description: | |||
| en_US: A list of domains to specifically include in the search results | |||
| zh_Hans: 在搜索结果中特别包含的域名列表 | |||
| pt_BR: A list of domains to specifically include in the search results | |||
| en_US: A comma-separated list of domains to specifically include in the search results. | |||
| zh_Hans: 要在搜索结果中特别包含的域的逗号分隔列表。 | |||
| form: form | |||
| - name: exclude_domains | |||
| type: string | |||
| @@ -154,9 +146,7 @@ parameters: | |||
| label: | |||
| en_US: Exclude Domains | |||
| zh_Hans: 排除域 | |||
| pt_BR: Exclude Domains | |||
| human_description: | |||
| en_US: A list of domains to specifically exclude from the search results | |||
| zh_Hans: 从搜索结果中特别排除的域名列表 | |||
| pt_BR: A list of domains to specifically exclude from the search results | |||
| en_US: A comma-separated list of domains to specifically exclude from the search results. | |||
| zh_Hans: 要从搜索结果中特别排除的域的逗号分隔列表。 | |||
| form: form | |||