Co-authored-by: crazywoola <427733928@qq.com>tags/0.11.2
| @@ -0,0 +1,11 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <svg width="800px" height="800px" viewBox="0 -38 256 256" version="1.1" xmlns="http://www.w3.org/2000/svg" | |||
| xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid"> | |||
| <g> | |||
| <path d="M250.346231,28.0746923 C247.358133,17.0320558 238.732098,8.40602109 227.689461,5.41792308 C207.823743,0 127.868333,0 127.868333,0 C127.868333,0 47.9129229,0.164179487 28.0472049,5.58210256 C17.0045684,8.57020058 8.37853373,17.1962353 5.39043571,28.2388718 C-0.618533519,63.5374615 -2.94988224,117.322662 5.5546152,151.209308 C8.54271322,162.251944 17.1687479,170.877979 28.2113844,173.866077 C48.0771024,179.284 128.032513,179.284 128.032513,179.284 C128.032513,179.284 207.987923,179.284 227.853641,173.866077 C238.896277,170.877979 247.522312,162.251944 250.51041,151.209308 C256.847738,115.861464 258.801474,62.1091 250.346231,28.0746923 Z" | |||
| fill="#FF0000"> | |||
| </path> | |||
| <polygon fill="#FFFFFF" points="102.420513 128.06 168.749025 89.642 102.420513 51.224"> | |||
| </polygon> | |||
| </g> | |||
| </svg> | |||
| @@ -0,0 +1,81 @@ | |||
| from typing import Any, Union | |||
| from urllib.parse import parse_qs, urlparse | |||
| from youtube_transcript_api import YouTubeTranscriptApi | |||
| from core.tools.entities.tool_entities import ToolInvokeMessage | |||
| from core.tools.tool.builtin_tool import BuiltinTool | |||
| class YouTubeTranscriptTool(BuiltinTool): | |||
| def _invoke( | |||
| self, user_id: str, tool_parameters: dict[str, Any] | |||
| ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: | |||
| """ | |||
| Invoke the YouTube transcript tool | |||
| """ | |||
| try: | |||
| # Extract parameters with defaults | |||
| video_input = tool_parameters["video_id"] | |||
| language = tool_parameters.get("language") | |||
| output_format = tool_parameters.get("format", "text") | |||
| preserve_formatting = tool_parameters.get("preserve_formatting", False) | |||
| proxy = tool_parameters.get("proxy") | |||
| cookies = tool_parameters.get("cookies") | |||
| # Extract video ID from URL if needed | |||
| video_id = self._extract_video_id(video_input) | |||
| # Common kwargs for API calls | |||
| kwargs = {"proxies": {"https": proxy} if proxy else None, "cookies": cookies} | |||
| try: | |||
| if language: | |||
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, **kwargs) | |||
| try: | |||
| transcript = transcript_list.find_transcript([language]) | |||
| except: | |||
| # If requested language not found, try translating from English | |||
| transcript = transcript_list.find_transcript(["en"]).translate(language) | |||
| transcript_data = transcript.fetch() | |||
| else: | |||
| transcript_data = YouTubeTranscriptApi.get_transcript( | |||
| video_id, preserve_formatting=preserve_formatting, **kwargs | |||
| ) | |||
| # Format output | |||
| formatter_class = { | |||
| "json": "JSONFormatter", | |||
| "pretty": "PrettyPrintFormatter", | |||
| "srt": "SRTFormatter", | |||
| "vtt": "WebVTTFormatter", | |||
| }.get(output_format) | |||
| if formatter_class: | |||
| from youtube_transcript_api import formatters | |||
| formatter = getattr(formatters, formatter_class)() | |||
| formatted_transcript = formatter.format_transcript(transcript_data) | |||
| else: | |||
| formatted_transcript = " ".join(entry["text"] for entry in transcript_data) | |||
| return self.create_text_message(text=formatted_transcript) | |||
| except Exception as e: | |||
| return self.create_text_message(text=f"Error getting transcript: {str(e)}") | |||
| except Exception as e: | |||
| return self.create_text_message(text=f"Error processing request: {str(e)}") | |||
| def _extract_video_id(self, video_input: str) -> str: | |||
| """ | |||
| Extract video ID from URL or return as-is if already an ID | |||
| """ | |||
| if "youtube.com" in video_input or "youtu.be" in video_input: | |||
| # Parse URL | |||
| parsed_url = urlparse(video_input) | |||
| if "youtube.com" in parsed_url.netloc: | |||
| return parse_qs(parsed_url.query)["v"][0] | |||
| else: # youtu.be | |||
| return parsed_url.path[1:] | |||
| return video_input # Assume it's already a video ID | |||
| @@ -0,0 +1,101 @@ | |||
| identity: | |||
| name: free_youtube_transcript | |||
| author: Tao Wang | |||
| label: | |||
| en_US: Free YouTube Transcript API | |||
| zh_Hans: 免费获取 YouTube 转录 | |||
| description: | |||
| human: | |||
| en_US: Get transcript from a YouTube video for free. | |||
| zh_Hans: 免费获取 YouTube 视频的转录文案。 | |||
| llm: A tool for retrieving transcript from YouTube videos. | |||
| parameters: | |||
| - name: video_id | |||
| type: string | |||
| required: true | |||
| label: | |||
| en_US: Video ID/URL | |||
| zh_Hans: 视频ID | |||
| human_description: | |||
| en_US: Used to define the video from which the transcript will be fetched. You can find the id in the video url. For example - https://www.youtube.com/watch?v=video_id. | |||
| zh_Hans: 您要哪条视频的转录文案?您可以在视频链接中找到id。例如 - https://www.youtube.com/watch?v=video_id。 | |||
| llm_description: Used to define the video from which the transcript will be fetched. For example - https://www.youtube.com/watch?v=video_id. | |||
| form: llm | |||
| - name: language | |||
| type: string | |||
| required: false | |||
| label: | |||
| en_US: Language Code | |||
| zh_Hans: 语言 | |||
| human_description: | |||
| en_US: Language code (e.g. 'en', 'zh') for the transcript. | |||
| zh_Hans: 字幕语言代码(如'en'、'zh')。留空则自动选择。 | |||
| llm_description: Used to set the language for transcripts. | |||
| form: form | |||
| - name: format | |||
| type: select | |||
| required: false | |||
| default: text | |||
| options: | |||
| - value: text | |||
| label: | |||
| en_US: Plain Text | |||
| zh_Hans: 纯文本 | |||
| - value: json | |||
| label: | |||
| en_US: JSON Format | |||
| zh_Hans: JSON 格式 | |||
| - value: pretty | |||
| label: | |||
| en_US: Pretty Print Format | |||
| zh_Hans: 美化格式 | |||
| - value: srt | |||
| label: | |||
| en_US: SRT Format | |||
| zh_Hans: SRT 格式 | |||
| - value: vtt | |||
| label: | |||
| en_US: WebVTT Format | |||
| zh_Hans: WebVTT 格式 | |||
| label: | |||
| en_US: Output Format | |||
| zh_Hans: 输出格式 | |||
| human_description: | |||
| en_US: Format of the transcript output | |||
| zh_Hans: 字幕输出格式 | |||
| llm_description: The format to output the transcript in. Options are text (plain text), json (raw transcript data), srt (SubRip format), or vtt (WebVTT format) | |||
| form: form | |||
| - name: preserve_formatting | |||
| type: boolean | |||
| required: false | |||
| default: false | |||
| label: | |||
| en_US: Preserve Formatting | |||
| zh_Hans: 保留格式 | |||
| human_description: | |||
| en_US: Keep HTML formatting elements like <i> (italics) and <b> (bold) | |||
| zh_Hans: 保留HTML格式元素,如<i>(斜体)和<b>(粗体) | |||
| llm_description: Whether to preserve HTML formatting elements in the transcript text | |||
| form: form | |||
| - name: proxy | |||
| type: string | |||
| required: false | |||
| label: | |||
| en_US: HTTPS Proxy | |||
| zh_Hans: HTTPS 代理 | |||
| human_description: | |||
| en_US: HTTPS proxy URL (e.g. https://user:pass@domain:port) | |||
| zh_Hans: HTTPS 代理地址(如 https://user:pass@domain:port) | |||
| llm_description: HTTPS proxy to use for the request. Format should be https://user:pass@domain:port | |||
| form: form | |||
| - name: cookies | |||
| type: string | |||
| required: false | |||
| label: | |||
| en_US: Cookies File Path | |||
| zh_Hans: Cookies 文件路径 | |||
| human_description: | |||
| en_US: Path to cookies.txt file for accessing age-restricted videos | |||
| zh_Hans: 用于访问年龄限制视频的 cookies.txt 文件路径 | |||
| llm_description: Path to a cookies.txt file containing YouTube cookies, needed for accessing age-restricted videos | |||
| form: form | |||
| @@ -0,0 +1,11 @@ | |||
| from typing import Any | |||
| from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController | |||
| class YouTubeTranscriptProvider(BuiltinToolProviderController): | |||
| def _validate_credentials(self, credentials: dict[str, Any]) -> None: | |||
| """ | |||
| No credentials needed for YouTube Transcript API | |||
| """ | |||
| pass | |||
| @@ -0,0 +1,13 @@ | |||
| identity: | |||
| author: Tao Wang | |||
| name: transcript | |||
| label: | |||
| en_US: Transcript | |||
| zh_Hans: Transcript | |||
| description: | |||
| en_US: Get transcripts from YouTube videos | |||
| zh_Hans: 获取 YouTube 视频的字幕/转录文本 | |||
| icon: icon.svg | |||
| tags: | |||
| - videos | |||
| credentials_for_provider: | |||
| @@ -187,6 +187,7 @@ websocket-client = "~1.7.0" | |||
| werkzeug = "~3.0.1" | |||
| xinference-client = "0.15.2" | |||
| yarl = "~1.9.4" | |||
| youtube-transcript-api = "~0.6.2" | |||
| zhipuai = "~2.1.5" | |||
| # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group. | |||