| return rest_tokens | return rest_tokens | ||||
| def recale_llm_max_tokens(self, model_config: ModelConfigEntity, | |||||
| def recalc_llm_max_tokens(self, model_config: ModelConfigEntity, | |||||
| prompt_messages: list[PromptMessage]): | prompt_messages: list[PromptMessage]): | ||||
| # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit | # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit | ||||
| model_type_instance = model_config.provider_model_bundle.model_type_instance | model_type_instance = model_config.provider_model_bundle.model_type_instance |
| return | return | ||||
| # Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit | # Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit | ||||
| self.recale_llm_max_tokens( | |||||
| self.recalc_llm_max_tokens( | |||||
| model_config=app_orchestration_config.model_config, | model_config=app_orchestration_config.model_config, | ||||
| prompt_messages=prompt_messages | prompt_messages=prompt_messages | ||||
| ) | ) |
| ) | ) | ||||
| # recale llm max tokens | # recale llm max tokens | ||||
| self.recale_llm_max_tokens(self.model_config, prompt_messages) | |||||
| self.recalc_llm_max_tokens(self.model_config, prompt_messages) | |||||
| # invoke model | # invoke model | ||||
| chunks: Generator[LLMResultChunk, None, None] = model_instance.invoke_llm( | chunks: Generator[LLMResultChunk, None, None] = model_instance.invoke_llm( | ||||
| prompt_messages=prompt_messages, | prompt_messages=prompt_messages, |
| ) | ) | ||||
| # recale llm max tokens | # recale llm max tokens | ||||
| self.recale_llm_max_tokens(self.model_config, prompt_messages) | |||||
| self.recalc_llm_max_tokens(self.model_config, prompt_messages) | |||||
| # invoke model | # invoke model | ||||
| chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm( | chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm( | ||||
| prompt_messages=prompt_messages, | prompt_messages=prompt_messages, |