Sfoglia il codice sorgente

improve: pooling httpx clients for requests to code sandbox and ssrf (#26052)

tags/1.9.1
Blackoutta 1 mese fa
parent
commit
e937c8c72e
Nessun account collegato all'indirizzo email del committer

+ 7
- 0
api/.env.example Vedi File

SSRF_DEFAULT_CONNECT_TIME_OUT=5 SSRF_DEFAULT_CONNECT_TIME_OUT=5
SSRF_DEFAULT_READ_TIME_OUT=5 SSRF_DEFAULT_READ_TIME_OUT=5
SSRF_DEFAULT_WRITE_TIME_OUT=5 SSRF_DEFAULT_WRITE_TIME_OUT=5
SSRF_POOL_MAX_CONNECTIONS=100
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20
SSRF_POOL_KEEPALIVE_EXPIRY=5.0


BATCH_UPLOAD_LIMIT=10 BATCH_UPLOAD_LIMIT=10
KEYWORD_DATA_SOURCE_TYPE=database KEYWORD_DATA_SOURCE_TYPE=database
# CODE EXECUTION CONFIGURATION # CODE EXECUTION CONFIGURATION
CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194 CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194
CODE_EXECUTION_API_KEY=dify-sandbox CODE_EXECUTION_API_KEY=dify-sandbox
CODE_EXECUTION_SSL_VERIFY=True
CODE_EXECUTION_POOL_MAX_CONNECTIONS=100
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0
CODE_MAX_NUMBER=9223372036854775807 CODE_MAX_NUMBER=9223372036854775807
CODE_MIN_NUMBER=-9223372036854775808 CODE_MIN_NUMBER=-9223372036854775808
CODE_MAX_STRING_LENGTH=80000 CODE_MAX_STRING_LENGTH=80000

+ 35
- 0
api/configs/feature/__init__.py Vedi File

default=10.0, default=10.0,
) )


CODE_EXECUTION_POOL_MAX_CONNECTIONS: PositiveInt = Field(
description="Maximum number of concurrent connections for the code execution HTTP client",
default=100,
)

CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS: PositiveInt = Field(
description="Maximum number of persistent keep-alive connections for the code execution HTTP client",
default=20,
)

CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY: PositiveFloat | None = Field(
description="Keep-alive expiry in seconds for idle connections (set to None to disable)",
default=5.0,
)

CODE_MAX_NUMBER: PositiveInt = Field( CODE_MAX_NUMBER: PositiveInt = Field(
description="Maximum allowed numeric value in code execution", description="Maximum allowed numeric value in code execution",
default=9223372036854775807, default=9223372036854775807,
default=1000, default=1000,
) )


CODE_EXECUTION_SSL_VERIFY: bool = Field(
description="Enable or disable SSL verification for code execution requests",
default=True,
)



class PluginConfig(BaseSettings): class PluginConfig(BaseSettings):
""" """
default=5, default=5,
) )


SSRF_POOL_MAX_CONNECTIONS: PositiveInt = Field(
description="Maximum number of concurrent connections for the SSRF HTTP client",
default=100,
)

SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS: PositiveInt = Field(
description="Maximum number of persistent keep-alive connections for the SSRF HTTP client",
default=20,
)

SSRF_POOL_KEEPALIVE_EXPIRY: PositiveFloat | None = Field(
description="Keep-alive expiry in seconds for idle SSRF connections (set to None to disable)",
default=5.0,
)

RESPECT_XFORWARD_HEADERS_ENABLED: bool = Field( RESPECT_XFORWARD_HEADERS_ENABLED: bool = Field(
description="Enable handling of X-Forwarded-For, X-Forwarded-Proto, and X-Forwarded-Port headers" description="Enable handling of X-Forwarded-For, X-Forwarded-Proto, and X-Forwarded-Port headers"
" when the app is behind a single trusted reverse proxy.", " when the app is behind a single trusted reverse proxy.",

+ 29
- 10
api/core/helper/code_executor/code_executor.py Vedi File

from threading import Lock from threading import Lock
from typing import Any from typing import Any


from httpx import Timeout, post
import httpx
from pydantic import BaseModel from pydantic import BaseModel
from yarl import URL from yarl import URL


from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer
from core.helper.http_client_pooling import get_pooled_http_client


logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT)) code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT))
CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY
_CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits(
max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS,
max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS,
keepalive_expiry=dify_config.CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY,
)
_CODE_EXECUTOR_CLIENT_KEY = "code_executor:http_client"




class CodeExecutionError(Exception): class CodeExecutionError(Exception):
JAVASCRIPT = "javascript" JAVASCRIPT = "javascript"




def _build_code_executor_client() -> httpx.Client:
return httpx.Client(
verify=CODE_EXECUTION_SSL_VERIFY,
limits=_CODE_EXECUTOR_CLIENT_LIMITS,
)


class CodeExecutor: class CodeExecutor:
dependencies_cache: dict[str, str] = {} dependencies_cache: dict[str, str] = {}
dependencies_cache_lock = Lock() dependencies_cache_lock = Lock()
"enable_network": True, "enable_network": True,
} }


timeout = httpx.Timeout(
connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
pool=None,
)

client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client)

try: try:
response = post(
response = client.post(
str(url), str(url),
json=data, json=data,
headers=headers, headers=headers,
timeout=Timeout(
connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
pool=None,
),
timeout=timeout,
) )
if response.status_code == 503: if response.status_code == 503:
raise CodeExecutionError("Code execution service is unavailable") raise CodeExecutionError("Code execution service is unavailable")


try: try:
response_data = response.json() response_data = response.json()
except:
raise CodeExecutionError("Failed to parse response")
except Exception as e:
raise CodeExecutionError("Failed to parse response") from e


if (code := response_data.get("code")) != 0: if (code := response_data.get("code")) != 0:
raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}") raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}")

+ 59
- 0
api/core/helper/http_client_pooling.py Vedi File

"""HTTP client pooling utilities."""

from __future__ import annotations

import atexit
import threading
from collections.abc import Callable

import httpx

ClientBuilder = Callable[[], httpx.Client]


class HttpClientPoolFactory:
"""Thread-safe factory that maintains reusable HTTP client instances."""

def __init__(self) -> None:
self._clients: dict[str, httpx.Client] = {}
self._lock = threading.Lock()

def get_or_create(self, key: str, builder: ClientBuilder) -> httpx.Client:
"""Return a pooled client associated with ``key`` creating it on demand."""
client = self._clients.get(key)
if client is not None:
return client

with self._lock:
client = self._clients.get(key)
if client is None:
client = builder()
self._clients[key] = client
return client

def close_all(self) -> None:
"""Close all pooled clients and clear the pool."""
with self._lock:
for client in self._clients.values():
client.close()
self._clients.clear()


_factory = HttpClientPoolFactory()


def get_pooled_http_client(key: str, builder: ClientBuilder) -> httpx.Client:
"""Return a pooled client for the given ``key`` using ``builder`` when missing."""
return _factory.get_or_create(key, builder)


def close_all_pooled_clients() -> None:
"""Close every client created through the pooling factory."""
_factory.close_all()


def _register_shutdown_hook() -> None:
atexit.register(close_all_pooled_clients)


_register_shutdown_hook()

+ 55
- 31
api/core/helper/ssrf_proxy.py Vedi File

import httpx import httpx


from configs import dify_config from configs import dify_config
from core.helper.http_client_pooling import get_pooled_http_client


logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)


SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES


http_request_node_ssl_verify = True # Default value for http_request_node_ssl_verify is True
try:
config_value = dify_config.HTTP_REQUEST_NODE_SSL_VERIFY
http_request_node_ssl_verify_lower = str(config_value).lower()
if http_request_node_ssl_verify_lower == "true":
http_request_node_ssl_verify = True
elif http_request_node_ssl_verify_lower == "false":
http_request_node_ssl_verify = False
else:
raise ValueError("Invalid value. HTTP_REQUEST_NODE_SSL_VERIFY should be 'True' or 'False'")
except NameError:
http_request_node_ssl_verify = True

BACKOFF_FACTOR = 0.5 BACKOFF_FACTOR = 0.5
STATUS_FORCELIST = [429, 500, 502, 503, 504] STATUS_FORCELIST = [429, 500, 502, 503, 504]


_SSL_VERIFIED_POOL_KEY = "ssrf:verified"
_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
_SSRF_CLIENT_LIMITS = httpx.Limits(
max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
)



class MaxRetriesExceededError(ValueError): class MaxRetriesExceededError(ValueError):
"""Raised when the maximum number of retries is exceeded.""" """Raised when the maximum number of retries is exceeded."""
pass pass




def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
return {
"http://": httpx.HTTPTransport(
proxy=dify_config.SSRF_PROXY_HTTP_URL,
),
"https://": httpx.HTTPTransport(
proxy=dify_config.SSRF_PROXY_HTTPS_URL,
),
}


def _build_ssrf_client(verify: bool) -> httpx.Client:
if dify_config.SSRF_PROXY_ALL_URL:
return httpx.Client(
proxy=dify_config.SSRF_PROXY_ALL_URL,
verify=verify,
limits=_SSRF_CLIENT_LIMITS,
)

if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
return httpx.Client(
mounts=_create_proxy_mounts(),
verify=verify,
limits=_SSRF_CLIENT_LIMITS,
)

return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)


def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
if not isinstance(ssl_verify_enabled, bool):
raise ValueError("SSRF client verify flag must be a boolean")

return get_pooled_http_client(
_SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
lambda: _build_ssrf_client(verify=ssl_verify_enabled),
)


def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
if "allow_redirects" in kwargs: if "allow_redirects" in kwargs:
allow_redirects = kwargs.pop("allow_redirects") allow_redirects = kwargs.pop("allow_redirects")
write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT, write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
) )


if "ssl_verify" not in kwargs:
kwargs["ssl_verify"] = http_request_node_ssl_verify

ssl_verify = kwargs.pop("ssl_verify")
# prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
client = _get_ssrf_client(verify_option)


retries = 0 retries = 0
while retries <= max_retries: while retries <= max_retries:
try: try:
if dify_config.SSRF_PROXY_ALL_URL:
with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL, verify=ssl_verify) as client:
response = client.request(method=method, url=url, **kwargs)
elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
proxy_mounts = {
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL, verify=ssl_verify),
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL, verify=ssl_verify),
}
with httpx.Client(mounts=proxy_mounts, verify=ssl_verify) as client:
response = client.request(method=method, url=url, **kwargs)
else:
with httpx.Client(verify=ssl_verify) as client:
response = client.request(method=method, url=url, **kwargs)
response = client.request(method=method, url=url, **kwargs)


if response.status_code not in STATUS_FORCELIST: if response.status_code not in STATUS_FORCELIST:
return response return response
else: else:
logger.warning( logger.warning(
"Received status code %s for URL %s which is in the force list", response.status_code, url
"Received status code %s for URL %s which is in the force list",
response.status_code,
url,
) )


except httpx.RequestError as e: except httpx.RequestError as e:

+ 7
- 0
docker/.env.example Vedi File

# The sandbox service endpoint. # The sandbox service endpoint.
CODE_EXECUTION_ENDPOINT=http://sandbox:8194 CODE_EXECUTION_ENDPOINT=http://sandbox:8194
CODE_EXECUTION_API_KEY=dify-sandbox CODE_EXECUTION_API_KEY=dify-sandbox
CODE_EXECUTION_SSL_VERIFY=True
CODE_EXECUTION_POOL_MAX_CONNECTIONS=100
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0
CODE_MAX_NUMBER=9223372036854775807 CODE_MAX_NUMBER=9223372036854775807
CODE_MIN_NUMBER=-9223372036854775808 CODE_MIN_NUMBER=-9223372036854775808
CODE_MAX_DEPTH=5 CODE_MAX_DEPTH=5
SSRF_DEFAULT_CONNECT_TIME_OUT=5 SSRF_DEFAULT_CONNECT_TIME_OUT=5
SSRF_DEFAULT_READ_TIME_OUT=5 SSRF_DEFAULT_READ_TIME_OUT=5
SSRF_DEFAULT_WRITE_TIME_OUT=5 SSRF_DEFAULT_WRITE_TIME_OUT=5
SSRF_POOL_MAX_CONNECTIONS=100
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20
SSRF_POOL_KEEPALIVE_EXPIRY=5.0


# ------------------------------ # ------------------------------
# docker env var for specifying vector db type at startup # docker env var for specifying vector db type at startup

+ 7
- 0
docker/docker-compose.yaml Vedi File

OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES: ${OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES:-5} OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES: ${OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES:-5}
CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194} CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194}
CODE_EXECUTION_API_KEY: ${CODE_EXECUTION_API_KEY:-dify-sandbox} CODE_EXECUTION_API_KEY: ${CODE_EXECUTION_API_KEY:-dify-sandbox}
CODE_EXECUTION_SSL_VERIFY: ${CODE_EXECUTION_SSL_VERIFY:-True}
CODE_EXECUTION_POOL_MAX_CONNECTIONS: ${CODE_EXECUTION_POOL_MAX_CONNECTIONS:-100}
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS: ${CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS:-20}
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY: ${CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY:-5.0}
CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807} CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807}
CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808} CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808}
CODE_MAX_DEPTH: ${CODE_MAX_DEPTH:-5} CODE_MAX_DEPTH: ${CODE_MAX_DEPTH:-5}
SSRF_DEFAULT_CONNECT_TIME_OUT: ${SSRF_DEFAULT_CONNECT_TIME_OUT:-5} SSRF_DEFAULT_CONNECT_TIME_OUT: ${SSRF_DEFAULT_CONNECT_TIME_OUT:-5}
SSRF_DEFAULT_READ_TIME_OUT: ${SSRF_DEFAULT_READ_TIME_OUT:-5} SSRF_DEFAULT_READ_TIME_OUT: ${SSRF_DEFAULT_READ_TIME_OUT:-5}
SSRF_DEFAULT_WRITE_TIME_OUT: ${SSRF_DEFAULT_WRITE_TIME_OUT:-5} SSRF_DEFAULT_WRITE_TIME_OUT: ${SSRF_DEFAULT_WRITE_TIME_OUT:-5}
SSRF_POOL_MAX_CONNECTIONS: ${SSRF_POOL_MAX_CONNECTIONS:-100}
SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS: ${SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS:-20}
SSRF_POOL_KEEPALIVE_EXPIRY: ${SSRF_POOL_KEEPALIVE_EXPIRY:-5.0}
EXPOSE_NGINX_PORT: ${EXPOSE_NGINX_PORT:-80} EXPOSE_NGINX_PORT: ${EXPOSE_NGINX_PORT:-80}
EXPOSE_NGINX_SSL_PORT: ${EXPOSE_NGINX_SSL_PORT:-443} EXPOSE_NGINX_SSL_PORT: ${EXPOSE_NGINX_SSL_PORT:-443}
POSITION_TOOL_PINS: ${POSITION_TOOL_PINS:-} POSITION_TOOL_PINS: ${POSITION_TOOL_PINS:-}

Loading…
Annulla
Salva