| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- """
- Proxy requests to avoid SSRF
- """
-
- import logging
- import time
-
- import httpx
-
- from configs import dify_config
- from core.helper.http_client_pooling import get_pooled_http_client
-
- logger = logging.getLogger(__name__)
-
- SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
-
- BACKOFF_FACTOR = 0.5
- STATUS_FORCELIST = [429, 500, 502, 503, 504]
-
- _SSL_VERIFIED_POOL_KEY = "ssrf:verified"
- _SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
- _SSRF_CLIENT_LIMITS = httpx.Limits(
- max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
- max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
- keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
- )
-
-
- class MaxRetriesExceededError(ValueError):
- """Raised when the maximum number of retries is exceeded."""
-
- pass
-
-
- def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
- return {
- "http://": httpx.HTTPTransport(
- proxy=dify_config.SSRF_PROXY_HTTP_URL,
- ),
- "https://": httpx.HTTPTransport(
- proxy=dify_config.SSRF_PROXY_HTTPS_URL,
- ),
- }
-
-
- def _build_ssrf_client(verify: bool) -> httpx.Client:
- if dify_config.SSRF_PROXY_ALL_URL:
- return httpx.Client(
- proxy=dify_config.SSRF_PROXY_ALL_URL,
- verify=verify,
- limits=_SSRF_CLIENT_LIMITS,
- )
-
- if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
- return httpx.Client(
- mounts=_create_proxy_mounts(),
- verify=verify,
- limits=_SSRF_CLIENT_LIMITS,
- )
-
- return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)
-
-
- def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
- if not isinstance(ssl_verify_enabled, bool):
- raise ValueError("SSRF client verify flag must be a boolean")
-
- return get_pooled_http_client(
- _SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
- lambda: _build_ssrf_client(verify=ssl_verify_enabled),
- )
-
-
- def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- if "allow_redirects" in kwargs:
- allow_redirects = kwargs.pop("allow_redirects")
- if "follow_redirects" not in kwargs:
- kwargs["follow_redirects"] = allow_redirects
-
- if "timeout" not in kwargs:
- kwargs["timeout"] = httpx.Timeout(
- timeout=dify_config.SSRF_DEFAULT_TIME_OUT,
- connect=dify_config.SSRF_DEFAULT_CONNECT_TIME_OUT,
- read=dify_config.SSRF_DEFAULT_READ_TIME_OUT,
- write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
- )
-
- # prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
- verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
- client = _get_ssrf_client(verify_option)
-
- retries = 0
- while retries <= max_retries:
- try:
- response = client.request(method=method, url=url, **kwargs)
-
- if response.status_code not in STATUS_FORCELIST:
- return response
- else:
- logger.warning(
- "Received status code %s for URL %s which is in the force list",
- response.status_code,
- url,
- )
-
- except httpx.RequestError as e:
- logger.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e)
- if max_retries == 0:
- raise
-
- retries += 1
- if retries <= max_retries:
- time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1)))
- raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
-
-
- def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- return make_request("GET", url, max_retries=max_retries, **kwargs)
-
-
- def post(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- return make_request("POST", url, max_retries=max_retries, **kwargs)
-
-
- def put(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- return make_request("PUT", url, max_retries=max_retries, **kwargs)
-
-
- def patch(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- return make_request("PATCH", url, max_retries=max_retries, **kwargs)
-
-
- def delete(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- return make_request("DELETE", url, max_retries=max_retries, **kwargs)
-
-
- def head(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
- return make_request("HEAD", url, max_retries=max_retries, **kwargs)
|