瀏覽代碼

refactor: replace compact response generation with length-prefixed response for backwards invocation api (#20903)

tags/1.4.2
Yeuoly 4 月之前
父節點
當前提交
d6d8cca053
沒有連結到貢獻者的電子郵件帳戶。
共有 3 個檔案被更改,包括 64 行新增10 行删除
  1. 5
    5
      api/controllers/inner_api/plugin/plugin.py
  2. 3
    5
      api/core/plugin/backwards_invocation/base.py
  3. 56
    0
      api/libs/helper.py

+ 5
- 5
api/controllers/inner_api/plugin/plugin.py 查看文件

RequestRequestUploadFile, RequestRequestUploadFile,
) )
from core.tools.entities.tool_entities import ToolProviderType from core.tools.entities.tool_entities import ToolProviderType
from libs.helper import compact_generate_response
from libs.helper import length_prefixed_response
from models.account import Account, Tenant from models.account import Account, Tenant
from models.model import EndUser from models.model import EndUser


response = PluginModelBackwardsInvocation.invoke_llm(user_model.id, tenant_model, payload) response = PluginModelBackwardsInvocation.invoke_llm(user_model.id, tenant_model, payload)
return PluginModelBackwardsInvocation.convert_to_event_stream(response) return PluginModelBackwardsInvocation.convert_to_event_stream(response)


return compact_generate_response(generator())
return length_prefixed_response(0xF, generator())




class PluginInvokeTextEmbeddingApi(Resource): class PluginInvokeTextEmbeddingApi(Resource):
) )
return PluginModelBackwardsInvocation.convert_to_event_stream(response) return PluginModelBackwardsInvocation.convert_to_event_stream(response)


return compact_generate_response(generator())
return length_prefixed_response(0xF, generator())




class PluginInvokeSpeech2TextApi(Resource): class PluginInvokeSpeech2TextApi(Resource):
), ),
) )


return compact_generate_response(generator())
return length_prefixed_response(0xF, generator())




class PluginInvokeParameterExtractorNodeApi(Resource): class PluginInvokeParameterExtractorNodeApi(Resource):
files=payload.files, files=payload.files,
) )


return compact_generate_response(PluginAppBackwardsInvocation.convert_to_event_stream(response))
return length_prefixed_response(0xF, PluginAppBackwardsInvocation.convert_to_event_stream(response))




class PluginInvokeEncryptApi(Resource): class PluginInvokeEncryptApi(Resource):

+ 3
- 5
api/core/plugin/backwards_invocation/base.py 查看文件

try: try:
for chunk in response: for chunk in response:
if isinstance(chunk, BaseModel | dict): if isinstance(chunk, BaseModel | dict):
yield BaseBackwardsInvocationResponse(data=chunk).model_dump_json().encode() + b"\n\n"
elif isinstance(chunk, str):
yield f"event: {chunk}\n\n".encode()
yield BaseBackwardsInvocationResponse(data=chunk).model_dump_json().encode()
except Exception as e: except Exception as e:
error_message = BaseBackwardsInvocationResponse(error=str(e)).model_dump_json() error_message = BaseBackwardsInvocationResponse(error=str(e)).model_dump_json()
yield f"{error_message}\n\n".encode()
yield error_message.encode()
else: else:
yield BaseBackwardsInvocationResponse(data=response).model_dump_json().encode() + b"\n\n"
yield BaseBackwardsInvocationResponse(data=response).model_dump_json().encode()




T = TypeVar("T", bound=dict | Mapping | str | bool | int | BaseModel) T = TypeVar("T", bound=dict | Mapping | str | bool | int | BaseModel)

+ 56
- 0
api/libs/helper.py 查看文件

import re import re
import secrets import secrets
import string import string
import struct
import subprocess import subprocess
import time import time
import uuid import uuid


from flask import Response, stream_with_context from flask import Response, stream_with_context
from flask_restful import fields from flask_restful import fields
from pydantic import BaseModel


from configs import dify_config from configs import dify_config
from core.app.features.rate_limiting.rate_limit import RateLimitGenerator from core.app.features.rate_limiting.rate_limit import RateLimitGenerator
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream") return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")




def length_prefixed_response(magic_number: int, response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
"""
This function is used to return a response with a length prefix.
Magic number is a one byte number that indicates the type of the response.

For a compatibility with latest plugin daemon https://github.com/langgenius/dify-plugin-daemon/pull/341
Avoid using line-based response, it leads a memory issue.

We uses following format:
| Field | Size | Description |
|---------------|----------|---------------------------------|
| Magic Number | 1 byte | Magic number identifier |
| Reserved | 1 byte | Reserved field |
| Header Length | 2 bytes | Header length (usually 0xa) |
| Data Length | 4 bytes | Length of the data |
| Reserved | 6 bytes | Reserved fields |
| Data | Variable | Actual data content |

| Reserved Fields | Header | Data |
|-----------------|----------|----------|
| 4 bytes total | Variable | Variable |

all data is in little endian
"""

def pack_response_with_length_prefix(response: bytes) -> bytes:
header_length = 0xA
data_length = len(response)
# | Magic Number 1byte | Reserved 1byte | Header Length 2bytes | Data Length 4bytes | Reserved 6bytes | Data
return struct.pack("<BBHI", magic_number, 0, header_length, data_length) + b"\x00" * 6 + response

if isinstance(response, dict):
return Response(
response=pack_response_with_length_prefix(json.dumps(jsonable_encoder(response)).encode("utf-8")),
status=200,
mimetype="application/json",
)
elif isinstance(response, BaseModel):
return Response(
response=pack_response_with_length_prefix(response.model_dump_json().encode("utf-8")),
status=200,
mimetype="application/json",
)

def generate() -> Generator:
for chunk in response:
if isinstance(chunk, str):
yield pack_response_with_length_prefix(chunk.encode("utf-8"))
else:
yield pack_response_with_length_prefix(chunk)

return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")


class TokenManager: class TokenManager:
@classmethod @classmethod
def generate_token( def generate_token(

Loading…
取消
儲存