Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: Yeuoly <admin@srmxy.cn>tags/1.3.0
| MULTIMODAL_SEND_FORMAT=base64 | MULTIMODAL_SEND_FORMAT=base64 | ||||
| PROMPT_GENERATION_MAX_TOKENS=512 | PROMPT_GENERATION_MAX_TOKENS=512 | ||||
| CODE_GENERATION_MAX_TOKENS=1024 | CODE_GENERATION_MAX_TOKENS=1024 | ||||
| PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false | |||||
| # Mail configuration, support: resend, smtp | # Mail configuration, support: resend, smtp | ||||
| MAIL_TYPE= | MAIL_TYPE= |
| class ModelLoadBalanceConfig(BaseSettings): | class ModelLoadBalanceConfig(BaseSettings): | ||||
| """ | """ | ||||
| Configuration for model load balancing | |||||
| Configuration for model load balancing and token counting | |||||
| """ | """ | ||||
| MODEL_LB_ENABLED: bool = Field( | MODEL_LB_ENABLED: bool = Field( | ||||
| default=False, | default=False, | ||||
| ) | ) | ||||
| PLUGIN_BASED_TOKEN_COUNTING_ENABLED: bool = Field( | |||||
| description="Enable or disable plugin based token counting. If disabled, token counting will return 0.", | |||||
| default=False, | |||||
| ) | |||||
| class BillingConfig(BaseSettings): | class BillingConfig(BaseSettings): | ||||
| """ | """ |
| query = application_generate_entity.query | query = application_generate_entity.query | ||||
| files = application_generate_entity.files | files = application_generate_entity.files | ||||
| # Pre-calculate the number of tokens of the prompt messages, | |||||
| # and return the rest number of tokens by model context token size limit and max token size limit. | |||||
| # If the rest number of tokens is not enough, raise exception. | |||||
| # Include: prompt template, inputs, query(optional), files(optional) | |||||
| # Not Include: memory, external data, dataset context | |||||
| self.get_pre_calculate_rest_tokens( | |||||
| app_record=app_record, | |||||
| model_config=application_generate_entity.model_conf, | |||||
| prompt_template_entity=app_config.prompt_template, | |||||
| inputs=dict(inputs), | |||||
| files=list(files), | |||||
| query=query, | |||||
| ) | |||||
| memory = None | memory = None | ||||
| if application_generate_entity.conversation_id: | if application_generate_entity.conversation_id: | ||||
| # get memory of conversation (read-only) | # get memory of conversation (read-only) |
| ) | ) | ||||
| image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW | image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW | ||||
| # Pre-calculate the number of tokens of the prompt messages, | |||||
| # and return the rest number of tokens by model context token size limit and max token size limit. | |||||
| # If the rest number of tokens is not enough, raise exception. | |||||
| # Include: prompt template, inputs, query(optional), files(optional) | |||||
| # Not Include: memory, external data, dataset context | |||||
| self.get_pre_calculate_rest_tokens( | |||||
| app_record=app_record, | |||||
| model_config=application_generate_entity.model_conf, | |||||
| prompt_template_entity=app_config.prompt_template, | |||||
| inputs=inputs, | |||||
| files=files, | |||||
| query=query, | |||||
| ) | |||||
| memory = None | memory = None | ||||
| if application_generate_entity.conversation_id: | if application_generate_entity.conversation_id: | ||||
| # get memory of conversation (read-only) | # get memory of conversation (read-only) |
| ) | ) | ||||
| image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW | image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW | ||||
| # Pre-calculate the number of tokens of the prompt messages, | |||||
| # and return the rest number of tokens by model context token size limit and max token size limit. | |||||
| # If the rest number of tokens is not enough, raise exception. | |||||
| # Include: prompt template, inputs, query(optional), files(optional) | |||||
| # Not Include: memory, external data, dataset context | |||||
| self.get_pre_calculate_rest_tokens( | |||||
| app_record=app_record, | |||||
| model_config=application_generate_entity.model_conf, | |||||
| prompt_template_entity=app_config.prompt_template, | |||||
| inputs=inputs, | |||||
| files=files, | |||||
| query=query, | |||||
| ) | |||||
| # organize all inputs and template to prompt messages | # organize all inputs and template to prompt messages | ||||
| # Include: prompt template, inputs, query(optional), files(optional) | # Include: prompt template, inputs, query(optional), files(optional) | ||||
| prompt_messages, stop = self.organize_prompt_messages( | prompt_messages, stop = self.organize_prompt_messages( |
| ``` | ``` | ||||
| Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate. | |||||
| Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens and ensure environment variable `PLUGIN_BASED_TOKEN_COUNTING_ENABLED` is set to `true`, This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate. | |||||
| - Model Credentials Validation | - Model Credentials Validation | ||||
| """ | """ | ||||
| ``` | ``` | ||||
| 有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。 | |||||
| 有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,并确保环境变量`PLUGIN_BASED_TOKEN_COUNTING_ENABLED`设置为`true`,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。 | |||||
| - 模型凭据校验 | - 模型凭据校验 | ||||
| :param tools: tools for tool calling | :param tools: tools for tool calling | ||||
| :return: | :return: | ||||
| """ | """ | ||||
| plugin_model_manager = PluginModelManager() | |||||
| return plugin_model_manager.get_llm_num_tokens( | |||||
| tenant_id=self.tenant_id, | |||||
| user_id="unknown", | |||||
| plugin_id=self.plugin_id, | |||||
| provider=self.provider_name, | |||||
| model_type=self.model_type.value, | |||||
| model=model, | |||||
| credentials=credentials, | |||||
| prompt_messages=prompt_messages, | |||||
| tools=tools, | |||||
| ) | |||||
| if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED: | |||||
| plugin_model_manager = PluginModelManager() | |||||
| return plugin_model_manager.get_llm_num_tokens( | |||||
| tenant_id=self.tenant_id, | |||||
| user_id="unknown", | |||||
| plugin_id=self.plugin_id, | |||||
| provider=self.provider_name, | |||||
| model_type=self.model_type.value, | |||||
| model=model, | |||||
| credentials=credentials, | |||||
| prompt_messages=prompt_messages, | |||||
| tools=tools, | |||||
| ) | |||||
| return 0 | |||||
| def _calc_response_usage( | def _calc_response_usage( | ||||
| self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int | self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int |
| # Password for admin user initialization. | # Password for admin user initialization. | ||||
| # If left unset, admin user will not be prompted for a password | # If left unset, admin user will not be prompted for a password | ||||
| # when creating the initial admin account. | |||||
| # when creating the initial admin account. | |||||
| # The length of the password cannot exceed 30 characters. | # The length of the password cannot exceed 30 characters. | ||||
| INIT_PASSWORD= | INIT_PASSWORD= | ||||
| # ------------------------------ | # ------------------------------ | ||||
| # The maximum number of tokens allowed for prompt generation. | # The maximum number of tokens allowed for prompt generation. | ||||
| # This setting controls the upper limit of tokens that can be used by the LLM | |||||
| # This setting controls the upper limit of tokens that can be used by the LLM | |||||
| # when generating a prompt in the prompt generation tool. | # when generating a prompt in the prompt generation tool. | ||||
| # Default: 512 tokens. | # Default: 512 tokens. | ||||
| PROMPT_GENERATION_MAX_TOKENS=512 | PROMPT_GENERATION_MAX_TOKENS=512 | ||||
| # The maximum number of tokens allowed for code generation. | # The maximum number of tokens allowed for code generation. | ||||
| # This setting controls the upper limit of tokens that can be used by the LLM | |||||
| # This setting controls the upper limit of tokens that can be used by the LLM | |||||
| # when generating code in the code generation tool. | # when generating code in the code generation tool. | ||||
| # Default: 1024 tokens. | # Default: 1024 tokens. | ||||
| CODE_GENERATION_MAX_TOKENS=1024 | CODE_GENERATION_MAX_TOKENS=1024 | ||||
| # Enable or disable plugin based token counting. If disabled, token counting will return 0. | |||||
| # This can improve performance by skipping token counting operations. | |||||
| # Default: false (disabled). | |||||
| PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false | |||||
| # ------------------------------ | # ------------------------------ | ||||
| # Multi-modal Configuration | # Multi-modal Configuration | ||||
| # ------------------------------ | # ------------------------------ |
| SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true} | SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true} | ||||
| PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512} | PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512} | ||||
| CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024} | CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024} | ||||
| PLUGIN_BASED_TOKEN_COUNTING_ENABLED: ${PLUGIN_BASED_TOKEN_COUNTING_ENABLED:-false} | |||||
| MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64} | MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64} | ||||
| UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10} | UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10} | ||||
| UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100} | UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100} |