|
|
|
@@ -397,16 +397,21 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
chunk_index = 0 |
|
|
|
|
|
|
|
def create_final_llm_result_chunk( |
|
|
|
index: int, message: AssistantPromptMessage, finish_reason: str |
|
|
|
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict |
|
|
|
) -> LLMResultChunk: |
|
|
|
# calculate num tokens |
|
|
|
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content) |
|
|
|
completion_tokens = self._num_tokens_from_string(model, full_assistant_content) |
|
|
|
prompt_tokens = usage and usage.get("prompt_tokens") |
|
|
|
if prompt_tokens is None: |
|
|
|
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content) |
|
|
|
completion_tokens = usage and usage.get("completion_tokens") |
|
|
|
if completion_tokens is None: |
|
|
|
completion_tokens = self._num_tokens_from_string(model, full_assistant_content) |
|
|
|
|
|
|
|
# transform usage |
|
|
|
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) |
|
|
|
|
|
|
|
return LLMResultChunk( |
|
|
|
id=id, |
|
|
|
model=model, |
|
|
|
prompt_messages=prompt_messages, |
|
|
|
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage), |
|
|
|
@@ -450,7 +455,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
tool_call.function.arguments += new_tool_call.function.arguments |
|
|
|
|
|
|
|
finish_reason = None # The default value of finish_reason is None |
|
|
|
|
|
|
|
message_id, usage = None, None |
|
|
|
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter): |
|
|
|
chunk = chunk.strip() |
|
|
|
if chunk: |
|
|
|
@@ -462,20 +467,26 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
continue |
|
|
|
|
|
|
|
try: |
|
|
|
chunk_json = json.loads(decoded_chunk) |
|
|
|
chunk_json: dict = json.loads(decoded_chunk) |
|
|
|
# stream ended |
|
|
|
except json.JSONDecodeError as e: |
|
|
|
yield create_final_llm_result_chunk( |
|
|
|
id=message_id, |
|
|
|
index=chunk_index + 1, |
|
|
|
message=AssistantPromptMessage(content=""), |
|
|
|
finish_reason="Non-JSON encountered.", |
|
|
|
usage=usage, |
|
|
|
) |
|
|
|
break |
|
|
|
if chunk_json: |
|
|
|
if u := chunk_json.get("usage"): |
|
|
|
usage = u |
|
|
|
if not chunk_json or len(chunk_json["choices"]) == 0: |
|
|
|
continue |
|
|
|
|
|
|
|
choice = chunk_json["choices"][0] |
|
|
|
finish_reason = chunk_json["choices"][0].get("finish_reason") |
|
|
|
message_id = chunk_json.get("id") |
|
|
|
chunk_index += 1 |
|
|
|
|
|
|
|
if "delta" in choice: |
|
|
|
@@ -524,6 +535,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
continue |
|
|
|
|
|
|
|
yield LLMResultChunk( |
|
|
|
id=message_id, |
|
|
|
model=model, |
|
|
|
prompt_messages=prompt_messages, |
|
|
|
delta=LLMResultChunkDelta( |
|
|
|
@@ -536,6 +548,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
|
|
|
|
if tools_calls: |
|
|
|
yield LLMResultChunk( |
|
|
|
id=message_id, |
|
|
|
model=model, |
|
|
|
prompt_messages=prompt_messages, |
|
|
|
delta=LLMResultChunkDelta( |
|
|
|
@@ -545,17 +558,22 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
) |
|
|
|
|
|
|
|
yield create_final_llm_result_chunk( |
|
|
|
index=chunk_index, message=AssistantPromptMessage(content=""), finish_reason=finish_reason |
|
|
|
id=message_id, |
|
|
|
index=chunk_index, |
|
|
|
message=AssistantPromptMessage(content=""), |
|
|
|
finish_reason=finish_reason, |
|
|
|
usage=usage, |
|
|
|
) |
|
|
|
|
|
|
|
def _handle_generate_response( |
|
|
|
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage] |
|
|
|
) -> LLMResult: |
|
|
|
response_json = response.json() |
|
|
|
response_json: dict = response.json() |
|
|
|
|
|
|
|
completion_type = LLMMode.value_of(credentials["mode"]) |
|
|
|
|
|
|
|
output = response_json["choices"][0] |
|
|
|
message_id = response_json.get("id") |
|
|
|
|
|
|
|
response_content = "" |
|
|
|
tool_calls = None |
|
|
|
@@ -593,6 +611,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): |
|
|
|
|
|
|
|
# transform response |
|
|
|
result = LLMResult( |
|
|
|
id=message_id, |
|
|
|
model=response_json["model"], |
|
|
|
prompt_messages=prompt_messages, |
|
|
|
message=assistant_message, |