浏览代码

feat(large_language_model): Adds plugin-based token counting configuration option (#17706)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: Yeuoly <admin@srmxy.cn>
tags/1.3.0
-LAN- 6 个月前
父节点
当前提交
d3157b46ee
没有帐户链接到提交者的电子邮件

+ 1
- 0
api/.env.example 查看文件

MULTIMODAL_SEND_FORMAT=base64 MULTIMODAL_SEND_FORMAT=base64
PROMPT_GENERATION_MAX_TOKENS=512 PROMPT_GENERATION_MAX_TOKENS=512
CODE_GENERATION_MAX_TOKENS=1024 CODE_GENERATION_MAX_TOKENS=1024
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false


# Mail configuration, support: resend, smtp # Mail configuration, support: resend, smtp
MAIL_TYPE= MAIL_TYPE=

+ 6
- 1
api/configs/feature/__init__.py 查看文件



class ModelLoadBalanceConfig(BaseSettings): class ModelLoadBalanceConfig(BaseSettings):
""" """
Configuration for model load balancing
Configuration for model load balancing and token counting
""" """


MODEL_LB_ENABLED: bool = Field( MODEL_LB_ENABLED: bool = Field(
default=False, default=False,
) )


PLUGIN_BASED_TOKEN_COUNTING_ENABLED: bool = Field(
description="Enable or disable plugin based token counting. If disabled, token counting will return 0.",
default=False,
)



class BillingConfig(BaseSettings): class BillingConfig(BaseSettings):
""" """

+ 0
- 14
api/core/app/apps/agent_chat/app_runner.py 查看文件

query = application_generate_entity.query query = application_generate_entity.query
files = application_generate_entity.files files = application_generate_entity.files


# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=dict(inputs),
files=list(files),
query=query,
)

memory = None memory = None
if application_generate_entity.conversation_id: if application_generate_entity.conversation_id:
# get memory of conversation (read-only) # get memory of conversation (read-only)

+ 0
- 14
api/core/app/apps/chat/app_runner.py 查看文件

) )
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW


# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)

memory = None memory = None
if application_generate_entity.conversation_id: if application_generate_entity.conversation_id:
# get memory of conversation (read-only) # get memory of conversation (read-only)

+ 0
- 14
api/core/app/apps/completion/app_runner.py 查看文件

) )
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW


# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)

# organize all inputs and template to prompt messages # organize all inputs and template to prompt messages
# Include: prompt template, inputs, query(optional), files(optional) # Include: prompt template, inputs, query(optional), files(optional)
prompt_messages, stop = self.organize_prompt_messages( prompt_messages, stop = self.organize_prompt_messages(

+ 1
- 1
api/core/model_runtime/docs/en_US/customizable_model_scale_out.md 查看文件

``` ```




Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens and ensure environment variable `PLUGIN_BASED_TOKEN_COUNTING_ENABLED` is set to `true`, This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.


- Model Credentials Validation - Model Credentials Validation



+ 1
- 1
api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md 查看文件

""" """
``` ```


有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。
有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,并确保环境变量`PLUGIN_BASED_TOKEN_COUNTING_ENABLED`设置为`true`,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。


- 模型凭据校验 - 模型凭据校验



+ 14
- 12
api/core/model_runtime/model_providers/__base/large_language_model.py 查看文件

:param tools: tools for tool calling :param tools: tools for tool calling
:return: :return:
""" """
plugin_model_manager = PluginModelManager()
return plugin_model_manager.get_llm_num_tokens(
tenant_id=self.tenant_id,
user_id="unknown",
plugin_id=self.plugin_id,
provider=self.provider_name,
model_type=self.model_type.value,
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
tools=tools,
)
if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED:
plugin_model_manager = PluginModelManager()
return plugin_model_manager.get_llm_num_tokens(
tenant_id=self.tenant_id,
user_id="unknown",
plugin_id=self.plugin_id,
provider=self.provider_name,
model_type=self.model_type.value,
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
tools=tools,
)
return 0


def _calc_response_usage( def _calc_response_usage(
self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int

+ 8
- 3
docker/.env.example 查看文件



# Password for admin user initialization. # Password for admin user initialization.
# If left unset, admin user will not be prompted for a password # If left unset, admin user will not be prompted for a password
# when creating the initial admin account.
# when creating the initial admin account.
# The length of the password cannot exceed 30 characters. # The length of the password cannot exceed 30 characters.
INIT_PASSWORD= INIT_PASSWORD=


# ------------------------------ # ------------------------------


# The maximum number of tokens allowed for prompt generation. # The maximum number of tokens allowed for prompt generation.
# This setting controls the upper limit of tokens that can be used by the LLM
# This setting controls the upper limit of tokens that can be used by the LLM
# when generating a prompt in the prompt generation tool. # when generating a prompt in the prompt generation tool.
# Default: 512 tokens. # Default: 512 tokens.
PROMPT_GENERATION_MAX_TOKENS=512 PROMPT_GENERATION_MAX_TOKENS=512


# The maximum number of tokens allowed for code generation. # The maximum number of tokens allowed for code generation.
# This setting controls the upper limit of tokens that can be used by the LLM
# This setting controls the upper limit of tokens that can be used by the LLM
# when generating code in the code generation tool. # when generating code in the code generation tool.
# Default: 1024 tokens. # Default: 1024 tokens.
CODE_GENERATION_MAX_TOKENS=1024 CODE_GENERATION_MAX_TOKENS=1024


# Enable or disable plugin based token counting. If disabled, token counting will return 0.
# This can improve performance by skipping token counting operations.
# Default: false (disabled).
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false

# ------------------------------ # ------------------------------
# Multi-modal Configuration # Multi-modal Configuration
# ------------------------------ # ------------------------------

+ 1
- 0
docker/docker-compose.yaml 查看文件

SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true} SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true}
PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512} PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024} CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
PLUGIN_BASED_TOKEN_COUNTING_ENABLED: ${PLUGIN_BASED_TOKEN_COUNTING_ENABLED:-false}
MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64} MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64}
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10} UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100} UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}

正在加载...
取消
保存