You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ssrf_proxy.py 4.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. """
  2. Proxy requests to avoid SSRF
  3. """
  4. import logging
  5. import time
  6. import httpx
  7. from configs import dify_config
  8. from core.helper.http_client_pooling import get_pooled_http_client
  9. logger = logging.getLogger(__name__)
  10. SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
  11. BACKOFF_FACTOR = 0.5
  12. STATUS_FORCELIST = [429, 500, 502, 503, 504]
  13. _SSL_VERIFIED_POOL_KEY = "ssrf:verified"
  14. _SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
  15. _SSRF_CLIENT_LIMITS = httpx.Limits(
  16. max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
  17. max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
  18. keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
  19. )
  20. class MaxRetriesExceededError(ValueError):
  21. """Raised when the maximum number of retries is exceeded."""
  22. pass
  23. def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
  24. return {
  25. "http://": httpx.HTTPTransport(
  26. proxy=dify_config.SSRF_PROXY_HTTP_URL,
  27. ),
  28. "https://": httpx.HTTPTransport(
  29. proxy=dify_config.SSRF_PROXY_HTTPS_URL,
  30. ),
  31. }
  32. def _build_ssrf_client(verify: bool) -> httpx.Client:
  33. if dify_config.SSRF_PROXY_ALL_URL:
  34. return httpx.Client(
  35. proxy=dify_config.SSRF_PROXY_ALL_URL,
  36. verify=verify,
  37. limits=_SSRF_CLIENT_LIMITS,
  38. )
  39. if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
  40. return httpx.Client(
  41. mounts=_create_proxy_mounts(),
  42. verify=verify,
  43. limits=_SSRF_CLIENT_LIMITS,
  44. )
  45. return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)
  46. def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
  47. if not isinstance(ssl_verify_enabled, bool):
  48. raise ValueError("SSRF client verify flag must be a boolean")
  49. return get_pooled_http_client(
  50. _SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
  51. lambda: _build_ssrf_client(verify=ssl_verify_enabled),
  52. )
  53. def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  54. if "allow_redirects" in kwargs:
  55. allow_redirects = kwargs.pop("allow_redirects")
  56. if "follow_redirects" not in kwargs:
  57. kwargs["follow_redirects"] = allow_redirects
  58. if "timeout" not in kwargs:
  59. kwargs["timeout"] = httpx.Timeout(
  60. timeout=dify_config.SSRF_DEFAULT_TIME_OUT,
  61. connect=dify_config.SSRF_DEFAULT_CONNECT_TIME_OUT,
  62. read=dify_config.SSRF_DEFAULT_READ_TIME_OUT,
  63. write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
  64. )
  65. # prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
  66. verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
  67. client = _get_ssrf_client(verify_option)
  68. retries = 0
  69. while retries <= max_retries:
  70. try:
  71. response = client.request(method=method, url=url, **kwargs)
  72. if response.status_code not in STATUS_FORCELIST:
  73. return response
  74. else:
  75. logger.warning(
  76. "Received status code %s for URL %s which is in the force list",
  77. response.status_code,
  78. url,
  79. )
  80. except httpx.RequestError as e:
  81. logger.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e)
  82. if max_retries == 0:
  83. raise
  84. retries += 1
  85. if retries <= max_retries:
  86. time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1)))
  87. raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
  88. def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  89. return make_request("GET", url, max_retries=max_retries, **kwargs)
  90. def post(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  91. return make_request("POST", url, max_retries=max_retries, **kwargs)
  92. def put(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  93. return make_request("PUT", url, max_retries=max_retries, **kwargs)
  94. def patch(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  95. return make_request("PATCH", url, max_retries=max_retries, **kwargs)
  96. def delete(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  97. return make_request("DELETE", url, max_retries=max_retries, **kwargs)
  98. def head(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
  99. return make_request("HEAD", url, max_retries=max_retries, **kwargs)