| @@ -84,7 +84,7 @@ class AppRunner: | |||
| return rest_tokens | |||
| def recale_llm_max_tokens(self, model_config: ModelConfigEntity, | |||
| def recalc_llm_max_tokens(self, model_config: ModelConfigEntity, | |||
| prompt_messages: list[PromptMessage]): | |||
| # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit | |||
| model_type_instance = model_config.provider_model_bundle.model_type_instance | |||
| @@ -181,7 +181,7 @@ class BasicApplicationRunner(AppRunner): | |||
| return | |||
| # Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit | |||
| self.recale_llm_max_tokens( | |||
| self.recalc_llm_max_tokens( | |||
| model_config=app_orchestration_config.model_config, | |||
| prompt_messages=prompt_messages | |||
| ) | |||
| @@ -131,7 +131,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): | |||
| ) | |||
| # recale llm max tokens | |||
| self.recale_llm_max_tokens(self.model_config, prompt_messages) | |||
| self.recalc_llm_max_tokens(self.model_config, prompt_messages) | |||
| # invoke model | |||
| chunks: Generator[LLMResultChunk, None, None] = model_instance.invoke_llm( | |||
| prompt_messages=prompt_messages, | |||
| @@ -106,7 +106,7 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner): | |||
| ) | |||
| # recale llm max tokens | |||
| self.recale_llm_max_tokens(self.model_config, prompt_messages) | |||
| self.recalc_llm_max_tokens(self.model_config, prompt_messages) | |||
| # invoke model | |||
| chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm( | |||
| prompt_messages=prompt_messages, | |||