|
|
|
@@ -1,4 +1,3 @@ |
|
|
|
import re |
|
|
|
from collections.abc import Generator, Iterator |
|
|
|
from typing import Optional, cast |
|
|
|
|
|
|
|
@@ -636,16 +635,13 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel): |
|
|
|
handle stream chat generate response |
|
|
|
""" |
|
|
|
full_response = "" |
|
|
|
is_reasoning_started_tag = False |
|
|
|
for chunk in resp: |
|
|
|
if len(chunk.choices) == 0: |
|
|
|
continue |
|
|
|
delta = chunk.choices[0] |
|
|
|
if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""): |
|
|
|
continue |
|
|
|
delta_content = delta.delta.content |
|
|
|
if not delta_content: |
|
|
|
delta_content = "" |
|
|
|
delta_content = delta.delta.content or "" |
|
|
|
# check if there is a tool call in the response |
|
|
|
function_call = None |
|
|
|
tool_calls = [] |
|
|
|
@@ -658,15 +654,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel): |
|
|
|
if function_call: |
|
|
|
assistant_message_tool_calls += [self._extract_response_function_call(function_call)] |
|
|
|
|
|
|
|
if not is_reasoning_started_tag and "<think>" in delta_content: |
|
|
|
is_reasoning_started_tag = True |
|
|
|
delta_content = "> 💭 " + delta_content.replace("<think>", "") |
|
|
|
elif is_reasoning_started_tag and "</think>" in delta_content: |
|
|
|
delta_content = delta_content.replace("</think>", "") + "\n\n" |
|
|
|
is_reasoning_started_tag = False |
|
|
|
elif is_reasoning_started_tag: |
|
|
|
if "\n" in delta_content: |
|
|
|
delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content) |
|
|
|
delta_content = self._wrap_thinking_by_tag(delta_content) |
|
|
|
# transform assistant message to prompt message |
|
|
|
assistant_prompt_message = AssistantPromptMessage( |
|
|
|
content=delta_content or "", tool_calls=assistant_message_tool_calls |