| 
                        123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 | 
                        - import time
 - from typing import cast, Optional, List, Tuple, Generator, Union
 - 
 - from core.application_queue_manager import ApplicationQueueManager
 - from core.entities.application_entities import ModelConfigEntity, PromptTemplateEntity, AppOrchestrationConfigEntity
 - from core.file.file_obj import FileObj
 - from core.memory.token_buffer_memory import TokenBufferMemory
 - from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 - from core.model_runtime.entities.message_entities import PromptMessage, AssistantPromptMessage
 - from core.model_runtime.entities.model_entities import ModelPropertyKey
 - from core.model_runtime.errors.invoke import InvokeBadRequestError
 - from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 - from core.prompt.prompt_transform import PromptTransform
 - from models.model import App
 - 
 - 
 - class AppRunner:
 -     def get_pre_calculate_rest_tokens(self, app_record: App,
 -                                       model_config: ModelConfigEntity,
 -                                       prompt_template_entity: PromptTemplateEntity,
 -                                       inputs: dict[str, str],
 -                                       files: list[FileObj],
 -                                       query: Optional[str] = None) -> int:
 -         """
 -         Get pre calculate rest tokens
 -         :param app_record: app record
 -         :param model_config: model config entity
 -         :param prompt_template_entity: prompt template entity
 -         :param inputs: inputs
 -         :param files: files
 -         :param query: query
 -         :return:
 -         """
 -         model_type_instance = model_config.provider_model_bundle.model_type_instance
 -         model_type_instance = cast(LargeLanguageModel, model_type_instance)
 - 
 -         model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
 - 
 -         max_tokens = 0
 -         for parameter_rule in model_config.model_schema.parameter_rules:
 -             if (parameter_rule.name == 'max_tokens'
 -                     or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
 -                 max_tokens = (model_config.parameters.get(parameter_rule.name)
 -                               or model_config.parameters.get(parameter_rule.use_template)) or 0
 - 
 -         if model_context_tokens is None:
 -             return -1
 - 
 -         if max_tokens is None:
 -             max_tokens = 0
 - 
 -         # get prompt messages without memory and context
 -         prompt_messages, stop = self.organize_prompt_messages(
 -             app_record=app_record,
 -             model_config=model_config,
 -             prompt_template_entity=prompt_template_entity,
 -             inputs=inputs,
 -             files=files,
 -             query=query
 -         )
 - 
 -         prompt_tokens = model_type_instance.get_num_tokens(
 -             model_config.model,
 -             model_config.credentials,
 -             prompt_messages
 -         )
 - 
 -         rest_tokens = model_context_tokens - max_tokens - prompt_tokens
 -         if rest_tokens < 0:
 -             raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
 -                                         "or shrink the max token, or switch to a llm with a larger token limit size.")
 - 
 -         return rest_tokens
 - 
 -     def recale_llm_max_tokens(self, model_config: ModelConfigEntity,
 -                               prompt_messages: List[PromptMessage]):
 -         # recalc max_tokens if sum(prompt_token +  max_tokens) over model token limit
 -         model_type_instance = model_config.provider_model_bundle.model_type_instance
 -         model_type_instance = cast(LargeLanguageModel, model_type_instance)
 - 
 -         model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
 - 
 -         max_tokens = 0
 -         for parameter_rule in model_config.model_schema.parameter_rules:
 -             if (parameter_rule.name == 'max_tokens'
 -                     or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
 -                 max_tokens = (model_config.parameters.get(parameter_rule.name)
 -                               or model_config.parameters.get(parameter_rule.use_template)) or 0
 - 
 -         if model_context_tokens is None:
 -             return -1
 - 
 -         if max_tokens is None:
 -             max_tokens = 0
 - 
 -         prompt_tokens = model_type_instance.get_num_tokens(
 -             model_config.model,
 -             model_config.credentials,
 -             prompt_messages
 -         )
 - 
 -         if prompt_tokens + max_tokens > model_context_tokens:
 -             max_tokens = max(model_context_tokens - prompt_tokens, 16)
 - 
 -             for parameter_rule in model_config.model_schema.parameter_rules:
 -                 if (parameter_rule.name == 'max_tokens'
 -                         or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
 -                     model_config.parameters[parameter_rule.name] = max_tokens
 - 
 -     def organize_prompt_messages(self, app_record: App,
 -                                  model_config: ModelConfigEntity,
 -                                  prompt_template_entity: PromptTemplateEntity,
 -                                  inputs: dict[str, str],
 -                                  files: list[FileObj],
 -                                  query: Optional[str] = None,
 -                                  context: Optional[str] = None,
 -                                  memory: Optional[TokenBufferMemory] = None) \
 -             -> Tuple[List[PromptMessage], Optional[List[str]]]:
 -         """
 -         Organize prompt messages
 -         :param context:
 -         :param app_record: app record
 -         :param model_config: model config entity
 -         :param prompt_template_entity: prompt template entity
 -         :param inputs: inputs
 -         :param files: files
 -         :param query: query
 -         :param memory: memory
 -         :return:
 -         """
 -         prompt_transform = PromptTransform()
 - 
 -         # get prompt without memory and context
 -         if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
 -             prompt_messages, stop = prompt_transform.get_prompt(
 -                 app_mode=app_record.mode,
 -                 prompt_template_entity=prompt_template_entity,
 -                 inputs=inputs,
 -                 query=query if query else '',
 -                 files=files,
 -                 context=context,
 -                 memory=memory,
 -                 model_config=model_config
 -             )
 -         else:
 -             prompt_messages = prompt_transform.get_advanced_prompt(
 -                 app_mode=app_record.mode,
 -                 prompt_template_entity=prompt_template_entity,
 -                 inputs=inputs,
 -                 query=query,
 -                 files=files,
 -                 context=context,
 -                 memory=memory,
 -                 model_config=model_config
 -             )
 -             stop = model_config.stop
 - 
 -         return prompt_messages, stop
 - 
 -     def direct_output(self, queue_manager: ApplicationQueueManager,
 -                       app_orchestration_config: AppOrchestrationConfigEntity,
 -                       prompt_messages: list,
 -                       text: str,
 -                       stream: bool,
 -                       usage: Optional[LLMUsage] = None) -> None:
 -         """
 -         Direct output
 -         :param queue_manager: application queue manager
 -         :param app_orchestration_config: app orchestration config
 -         :param prompt_messages: prompt messages
 -         :param text: text
 -         :param stream: stream
 -         :param usage: usage
 -         :return:
 -         """
 -         if stream:
 -             index = 0
 -             for token in text:
 -                 queue_manager.publish_chunk_message(LLMResultChunk(
 -                     model=app_orchestration_config.model_config.model,
 -                     prompt_messages=prompt_messages,
 -                     delta=LLMResultChunkDelta(
 -                         index=index,
 -                         message=AssistantPromptMessage(content=token)
 -                     )
 -                 ))
 -                 index += 1
 -                 time.sleep(0.01)
 - 
 -         queue_manager.publish_message_end(
 -             llm_result=LLMResult(
 -                 model=app_orchestration_config.model_config.model,
 -                 prompt_messages=prompt_messages,
 -                 message=AssistantPromptMessage(content=text),
 -                 usage=usage if usage else LLMUsage.empty_usage()
 -             )
 -         )
 - 
 -     def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
 -                               queue_manager: ApplicationQueueManager,
 -                               stream: bool) -> None:
 -         """
 -         Handle invoke result
 -         :param invoke_result: invoke result
 -         :param queue_manager: application queue manager
 -         :param stream: stream
 -         :return:
 -         """
 -         if not stream:
 -             self._handle_invoke_result_direct(
 -                 invoke_result=invoke_result,
 -                 queue_manager=queue_manager
 -             )
 -         else:
 -             self._handle_invoke_result_stream(
 -                 invoke_result=invoke_result,
 -                 queue_manager=queue_manager
 -             )
 - 
 -     def _handle_invoke_result_direct(self, invoke_result: LLMResult,
 -                                      queue_manager: ApplicationQueueManager) -> None:
 -         """
 -         Handle invoke result direct
 -         :param invoke_result: invoke result
 -         :param queue_manager: application queue manager
 -         :return:
 -         """
 -         queue_manager.publish_message_end(
 -             llm_result=invoke_result
 -         )
 - 
 -     def _handle_invoke_result_stream(self, invoke_result: Generator,
 -                                      queue_manager: ApplicationQueueManager) -> None:
 -         """
 -         Handle invoke result
 -         :param invoke_result: invoke result
 -         :param queue_manager: application queue manager
 -         :return:
 -         """
 -         model = None
 -         prompt_messages = []
 -         text = ''
 -         usage = None
 -         for result in invoke_result:
 -             queue_manager.publish_chunk_message(result)
 - 
 -             text += result.delta.message.content
 - 
 -             if not model:
 -                 model = result.model
 - 
 -             if not prompt_messages:
 -                 prompt_messages = result.prompt_messages
 - 
 -             if not usage and result.delta.usage:
 -                 usage = result.delta.usage
 - 
 -         llm_result = LLMResult(
 -             model=model,
 -             prompt_messages=prompt_messages,
 -             message=AssistantPromptMessage(content=text),
 -             usage=usage
 -         )
 - 
 -         queue_manager.publish_message_end(
 -             llm_result=llm_result
 -         )
 
 
  |