Signed-off-by: xhe <xw897002528@gmail.com>tags/0.15.3
| @@ -1,13 +1,10 @@ | |||
| import json | |||
| from collections.abc import Generator | |||
| from typing import Optional, Union | |||
| import requests | |||
| from yarl import URL | |||
| from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta | |||
| from core.model_runtime.entities.llm_entities import LLMMode, LLMResult | |||
| from core.model_runtime.entities.message_entities import ( | |||
| AssistantPromptMessage, | |||
| PromptMessage, | |||
| PromptMessageTool, | |||
| ) | |||
| @@ -39,208 +36,3 @@ class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel): | |||
| credentials["mode"] = LLMMode.CHAT.value | |||
| credentials["function_calling_type"] = "tool_call" | |||
| credentials["stream_function_calling"] = "support" | |||
| def _handle_generate_stream_response( | |||
| self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage] | |||
| ) -> Generator: | |||
| """ | |||
| Handle llm stream response | |||
| :param model: model name | |||
| :param credentials: model credentials | |||
| :param response: streamed response | |||
| :param prompt_messages: prompt messages | |||
| :return: llm response chunk generator | |||
| """ | |||
| full_assistant_content = "" | |||
| chunk_index = 0 | |||
| is_reasoning_started = False # Add flag to track reasoning state | |||
| def create_final_llm_result_chunk( | |||
| id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict | |||
| ) -> LLMResultChunk: | |||
| # calculate num tokens | |||
| prompt_tokens = usage and usage.get("prompt_tokens") | |||
| if prompt_tokens is None: | |||
| prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content) | |||
| completion_tokens = usage and usage.get("completion_tokens") | |||
| if completion_tokens is None: | |||
| completion_tokens = self._num_tokens_from_string(model, full_assistant_content) | |||
| # transform usage | |||
| usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) | |||
| return LLMResultChunk( | |||
| id=id, | |||
| model=model, | |||
| prompt_messages=prompt_messages, | |||
| delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage), | |||
| ) | |||
| # delimiter for stream response, need unicode_escape | |||
| import codecs | |||
| delimiter = credentials.get("stream_mode_delimiter", "\n\n") | |||
| delimiter = codecs.decode(delimiter, "unicode_escape") | |||
| tools_calls: list[AssistantPromptMessage.ToolCall] = [] | |||
| def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]): | |||
| def get_tool_call(tool_call_id: str): | |||
| if not tool_call_id: | |||
| return tools_calls[-1] | |||
| tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None) | |||
| if tool_call is None: | |||
| tool_call = AssistantPromptMessage.ToolCall( | |||
| id=tool_call_id, | |||
| type="function", | |||
| function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""), | |||
| ) | |||
| tools_calls.append(tool_call) | |||
| return tool_call | |||
| for new_tool_call in new_tool_calls: | |||
| # get tool call | |||
| tool_call = get_tool_call(new_tool_call.function.name) | |||
| # update tool call | |||
| if new_tool_call.id: | |||
| tool_call.id = new_tool_call.id | |||
| if new_tool_call.type: | |||
| tool_call.type = new_tool_call.type | |||
| if new_tool_call.function.name: | |||
| tool_call.function.name = new_tool_call.function.name | |||
| if new_tool_call.function.arguments: | |||
| tool_call.function.arguments += new_tool_call.function.arguments | |||
| finish_reason = None # The default value of finish_reason is None | |||
| message_id, usage = None, None | |||
| for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter): | |||
| chunk = chunk.strip() | |||
| if chunk: | |||
| # ignore sse comments | |||
| if chunk.startswith(":"): | |||
| continue | |||
| decoded_chunk = chunk.strip().removeprefix("data:").lstrip() | |||
| if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]" | |||
| continue | |||
| try: | |||
| chunk_json: dict = json.loads(decoded_chunk) | |||
| # stream ended | |||
| except json.JSONDecodeError as e: | |||
| yield create_final_llm_result_chunk( | |||
| id=message_id, | |||
| index=chunk_index + 1, | |||
| message=AssistantPromptMessage(content=""), | |||
| finish_reason="Non-JSON encountered.", | |||
| usage=usage, | |||
| ) | |||
| break | |||
| # handle the error here. for issue #11629 | |||
| if chunk_json.get("error") and chunk_json.get("choices") is None: | |||
| raise ValueError(chunk_json.get("error")) | |||
| if chunk_json: | |||
| if u := chunk_json.get("usage"): | |||
| usage = u | |||
| if not chunk_json or len(chunk_json["choices"]) == 0: | |||
| continue | |||
| choice = chunk_json["choices"][0] | |||
| finish_reason = chunk_json["choices"][0].get("finish_reason") | |||
| message_id = chunk_json.get("id") | |||
| chunk_index += 1 | |||
| if "delta" in choice: | |||
| delta = choice["delta"] | |||
| is_reasoning = delta.get("reasoning_content") | |||
| delta_content = delta.get("content") or delta.get("reasoning_content") | |||
| assistant_message_tool_calls = None | |||
| if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call": | |||
| assistant_message_tool_calls = delta.get("tool_calls", None) | |||
| elif ( | |||
| "function_call" in delta | |||
| and credentials.get("function_calling_type", "no_call") == "function_call" | |||
| ): | |||
| assistant_message_tool_calls = [ | |||
| {"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})} | |||
| ] | |||
| # assistant_message_function_call = delta.delta.function_call | |||
| # extract tool calls from response | |||
| if assistant_message_tool_calls: | |||
| tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) | |||
| increase_tool_call(tool_calls) | |||
| if delta_content is None or delta_content == "": | |||
| continue | |||
| # Add markdown quote markers for reasoning content | |||
| if is_reasoning: | |||
| if not is_reasoning_started: | |||
| delta_content = "> 💭 " + delta_content | |||
| is_reasoning_started = True | |||
| elif "\n\n" in delta_content: | |||
| delta_content = delta_content.replace("\n\n", "\n> ") | |||
| elif "\n" in delta_content: | |||
| delta_content = delta_content.replace("\n", "\n> ") | |||
| elif is_reasoning_started: | |||
| # If we were in reasoning mode but now getting regular content, | |||
| # add \n\n to close the reasoning block | |||
| delta_content = "\n\n" + delta_content | |||
| is_reasoning_started = False | |||
| # transform assistant message to prompt message | |||
| assistant_prompt_message = AssistantPromptMessage( | |||
| content=delta_content, | |||
| ) | |||
| # reset tool calls | |||
| tool_calls = [] | |||
| full_assistant_content += delta_content | |||
| elif "text" in choice: | |||
| choice_text = choice.get("text", "") | |||
| if choice_text == "": | |||
| continue | |||
| # transform assistant message to prompt message | |||
| assistant_prompt_message = AssistantPromptMessage(content=choice_text) | |||
| full_assistant_content += choice_text | |||
| else: | |||
| continue | |||
| yield LLMResultChunk( | |||
| id=message_id, | |||
| model=model, | |||
| prompt_messages=prompt_messages, | |||
| delta=LLMResultChunkDelta( | |||
| index=chunk_index, | |||
| message=assistant_prompt_message, | |||
| ), | |||
| ) | |||
| chunk_index += 1 | |||
| if tools_calls: | |||
| yield LLMResultChunk( | |||
| id=message_id, | |||
| model=model, | |||
| prompt_messages=prompt_messages, | |||
| delta=LLMResultChunkDelta( | |||
| index=chunk_index, | |||
| message=AssistantPromptMessage(tool_calls=tools_calls, content=""), | |||
| ), | |||
| ) | |||
| yield create_final_llm_result_chunk( | |||
| id=message_id, | |||
| index=chunk_index, | |||
| message=AssistantPromptMessage(content=""), | |||
| finish_reason=finish_reason, | |||
| usage=usage, | |||
| ) | |||
| @@ -1,5 +1,6 @@ | |||
| import json | |||
| import logging | |||
| import re | |||
| from collections.abc import Generator | |||
| from decimal import Decimal | |||
| from typing import Optional, Union, cast | |||
| @@ -515,6 +516,8 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): | |||
| if "delta" in choice: | |||
| delta = choice["delta"] | |||
| delta_content = delta.get("content") | |||
| if not delta_content: | |||
| delta_content = "" | |||
| if not is_reasoning_started_tag and "<think>" in delta_content: | |||
| is_reasoning_started_tag = True | |||
| @@ -523,20 +526,21 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): | |||
| delta_content = delta_content.replace("</think>", "") + "\n\n" | |||
| is_reasoning_started_tag = False | |||
| elif is_reasoning_started_tag: | |||
| if "\n\n" in delta_content: | |||
| delta_content = delta_content.replace("\n\n", "\n> ") | |||
| elif "\n" in delta_content: | |||
| delta_content = delta_content.replace("\n", "\n> ") | |||
| if "\n" in delta_content: | |||
| delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content) | |||
| reasoning_content = delta.get("reasoning_content") | |||
| if reasoning_content: | |||
| if is_reasoning_started and not reasoning_content and not delta_content: | |||
| delta_content = "" | |||
| elif reasoning_content: | |||
| if not is_reasoning_started: | |||
| delta_content = "> 💭 " + reasoning_content | |||
| is_reasoning_started = True | |||
| elif "\n\n" in delta_content: | |||
| delta_content = reasoning_content.replace("\n\n", "\n> ") | |||
| elif "\n" in delta_content: | |||
| delta_content = reasoning_content.replace("\n", "\n> ") | |||
| else: | |||
| delta_content = reasoning_content | |||
| if "\n" in delta_content: | |||
| delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content) | |||
| elif is_reasoning_started: | |||
| # If we were in reasoning mode but now getting regular content, | |||
| # add \n\n to close the reasoning block | |||
| @@ -1,13 +1,9 @@ | |||
| import json | |||
| from collections.abc import Generator | |||
| from typing import Optional, Union | |||
| import requests | |||
| from core.model_runtime.entities.common_entities import I18nObject | |||
| from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta | |||
| from core.model_runtime.entities.llm_entities import LLMMode, LLMResult | |||
| from core.model_runtime.entities.message_entities import ( | |||
| AssistantPromptMessage, | |||
| PromptMessage, | |||
| PromptMessageTool, | |||
| ) | |||
| @@ -96,208 +92,3 @@ class SiliconflowLargeLanguageModel(OAIAPICompatLargeLanguageModel): | |||
| ), | |||
| ], | |||
| ) | |||
| def _handle_generate_stream_response( | |||
| self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage] | |||
| ) -> Generator: | |||
| """ | |||
| Handle llm stream response | |||
| :param model: model name | |||
| :param credentials: model credentials | |||
| :param response: streamed response | |||
| :param prompt_messages: prompt messages | |||
| :return: llm response chunk generator | |||
| """ | |||
| full_assistant_content = "" | |||
| chunk_index = 0 | |||
| is_reasoning_started = False # Add flag to track reasoning state | |||
| def create_final_llm_result_chunk( | |||
| id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict | |||
| ) -> LLMResultChunk: | |||
| # calculate num tokens | |||
| prompt_tokens = usage and usage.get("prompt_tokens") | |||
| if prompt_tokens is None: | |||
| prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content) | |||
| completion_tokens = usage and usage.get("completion_tokens") | |||
| if completion_tokens is None: | |||
| completion_tokens = self._num_tokens_from_string(model, full_assistant_content) | |||
| # transform usage | |||
| usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) | |||
| return LLMResultChunk( | |||
| id=id, | |||
| model=model, | |||
| prompt_messages=prompt_messages, | |||
| delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage), | |||
| ) | |||
| # delimiter for stream response, need unicode_escape | |||
| import codecs | |||
| delimiter = credentials.get("stream_mode_delimiter", "\n\n") | |||
| delimiter = codecs.decode(delimiter, "unicode_escape") | |||
| tools_calls: list[AssistantPromptMessage.ToolCall] = [] | |||
| def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]): | |||
| def get_tool_call(tool_call_id: str): | |||
| if not tool_call_id: | |||
| return tools_calls[-1] | |||
| tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None) | |||
| if tool_call is None: | |||
| tool_call = AssistantPromptMessage.ToolCall( | |||
| id=tool_call_id, | |||
| type="function", | |||
| function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""), | |||
| ) | |||
| tools_calls.append(tool_call) | |||
| return tool_call | |||
| for new_tool_call in new_tool_calls: | |||
| # get tool call | |||
| tool_call = get_tool_call(new_tool_call.function.name) | |||
| # update tool call | |||
| if new_tool_call.id: | |||
| tool_call.id = new_tool_call.id | |||
| if new_tool_call.type: | |||
| tool_call.type = new_tool_call.type | |||
| if new_tool_call.function.name: | |||
| tool_call.function.name = new_tool_call.function.name | |||
| if new_tool_call.function.arguments: | |||
| tool_call.function.arguments += new_tool_call.function.arguments | |||
| finish_reason = None # The default value of finish_reason is None | |||
| message_id, usage = None, None | |||
| for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter): | |||
| chunk = chunk.strip() | |||
| if chunk: | |||
| # ignore sse comments | |||
| if chunk.startswith(":"): | |||
| continue | |||
| decoded_chunk = chunk.strip().removeprefix("data:").lstrip() | |||
| if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]" | |||
| continue | |||
| try: | |||
| chunk_json: dict = json.loads(decoded_chunk) | |||
| # stream ended | |||
| except json.JSONDecodeError as e: | |||
| yield create_final_llm_result_chunk( | |||
| id=message_id, | |||
| index=chunk_index + 1, | |||
| message=AssistantPromptMessage(content=""), | |||
| finish_reason="Non-JSON encountered.", | |||
| usage=usage, | |||
| ) | |||
| break | |||
| # handle the error here. for issue #11629 | |||
| if chunk_json.get("error") and chunk_json.get("choices") is None: | |||
| raise ValueError(chunk_json.get("error")) | |||
| if chunk_json: | |||
| if u := chunk_json.get("usage"): | |||
| usage = u | |||
| if not chunk_json or len(chunk_json["choices"]) == 0: | |||
| continue | |||
| choice = chunk_json["choices"][0] | |||
| finish_reason = chunk_json["choices"][0].get("finish_reason") | |||
| message_id = chunk_json.get("id") | |||
| chunk_index += 1 | |||
| if "delta" in choice: | |||
| delta = choice["delta"] | |||
| delta_content = delta.get("content") | |||
| assistant_message_tool_calls = None | |||
| if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call": | |||
| assistant_message_tool_calls = delta.get("tool_calls", None) | |||
| elif ( | |||
| "function_call" in delta | |||
| and credentials.get("function_calling_type", "no_call") == "function_call" | |||
| ): | |||
| assistant_message_tool_calls = [ | |||
| {"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})} | |||
| ] | |||
| # assistant_message_function_call = delta.delta.function_call | |||
| # extract tool calls from response | |||
| if assistant_message_tool_calls: | |||
| tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) | |||
| increase_tool_call(tool_calls) | |||
| if delta_content is None or delta_content == "": | |||
| continue | |||
| # Check for think tags | |||
| if "<think>" in delta_content: | |||
| is_reasoning_started = True | |||
| # Remove <think> tag and add markdown quote | |||
| delta_content = "> 💭 " + delta_content.replace("<think>", "") | |||
| elif "</think>" in delta_content: | |||
| # Remove </think> tag and add newlines to end quote block | |||
| delta_content = delta_content.replace("</think>", "") + "\n\n" | |||
| is_reasoning_started = False | |||
| elif is_reasoning_started: | |||
| # Add quote markers for content within thinking block | |||
| if "\n\n" in delta_content: | |||
| delta_content = delta_content.replace("\n\n", "\n> ") | |||
| elif "\n" in delta_content: | |||
| delta_content = delta_content.replace("\n", "\n> ") | |||
| # transform assistant message to prompt message | |||
| assistant_prompt_message = AssistantPromptMessage( | |||
| content=delta_content, | |||
| ) | |||
| # reset tool calls | |||
| tool_calls = [] | |||
| full_assistant_content += delta_content | |||
| elif "text" in choice: | |||
| choice_text = choice.get("text", "") | |||
| if choice_text == "": | |||
| continue | |||
| # transform assistant message to prompt message | |||
| assistant_prompt_message = AssistantPromptMessage(content=choice_text) | |||
| full_assistant_content += choice_text | |||
| else: | |||
| continue | |||
| yield LLMResultChunk( | |||
| id=message_id, | |||
| model=model, | |||
| prompt_messages=prompt_messages, | |||
| delta=LLMResultChunkDelta( | |||
| index=chunk_index, | |||
| message=assistant_prompt_message, | |||
| ), | |||
| ) | |||
| chunk_index += 1 | |||
| if tools_calls: | |||
| yield LLMResultChunk( | |||
| id=message_id, | |||
| model=model, | |||
| prompt_messages=prompt_messages, | |||
| delta=LLMResultChunkDelta( | |||
| index=chunk_index, | |||
| message=AssistantPromptMessage(tool_calls=tools_calls, content=""), | |||
| ), | |||
| ) | |||
| yield create_final_llm_result_chunk( | |||
| id=message_id, | |||
| index=chunk_index, | |||
| message=AssistantPromptMessage(content=""), | |||
| finish_reason=finish_reason, | |||
| usage=usage, | |||
| ) | |||
| @@ -1,4 +1,5 @@ | |||
| import logging | |||
| import re | |||
| from collections.abc import Generator | |||
| from typing import Optional | |||
| @@ -251,25 +252,23 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): | |||
| for chunk in chunks: | |||
| content = "" | |||
| if chunk.choices: | |||
| if hasattr(chunk.choices[0].delta, "reasoning_content"): | |||
| delta_content = "" | |||
| delta = chunk.choices[0].delta | |||
| if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content: | |||
| content = "" | |||
| elif hasattr(delta, "reasoning_content"): | |||
| if not is_reasoning_started: | |||
| is_reasoning_started = True | |||
| delta_content = "> 💭 " + chunk.choices[0].delta.reasoning_content | |||
| content = "> 💭 " + delta.reasoning_content | |||
| else: | |||
| delta_content = chunk.choices[0].delta.reasoning_content | |||
| content = delta.reasoning_content | |||
| if "\n\n" in delta_content: | |||
| delta_content = delta_content.replace("\n\n", "\n> ") | |||
| elif "\n" in delta_content: | |||
| delta_content = delta_content.replace("\n", "\n> ") | |||
| content = delta_content | |||
| if "\n" in content: | |||
| content = re.sub(r"\n(?!(>|\n))", "\n> ", content) | |||
| elif is_reasoning_started: | |||
| content = "\n\n" + chunk.choices[0].delta.content | |||
| content = "\n\n" + delta.content | |||
| is_reasoning_started = False | |||
| else: | |||
| content = chunk.choices[0].delta.content | |||
| content = delta.content | |||
| yield LLMResultChunk( | |||
| model=model, | |||