You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

helper.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. import json
  2. import logging
  3. import re
  4. import secrets
  5. import string
  6. import struct
  7. import subprocess
  8. import time
  9. import uuid
  10. from collections.abc import Generator, Mapping
  11. from datetime import datetime
  12. from hashlib import sha256
  13. from typing import TYPE_CHECKING, Any, Optional, Union, cast
  14. from zoneinfo import available_timezones
  15. from flask import Response, stream_with_context
  16. from flask_restful import fields
  17. from pydantic import BaseModel
  18. from configs import dify_config
  19. from core.app.features.rate_limiting.rate_limit import RateLimitGenerator
  20. from core.file import helpers as file_helpers
  21. from core.model_runtime.utils.encoders import jsonable_encoder
  22. from extensions.ext_redis import redis_client
  23. if TYPE_CHECKING:
  24. from models.account import Account
  25. def run(script):
  26. return subprocess.getstatusoutput("source /root/.bashrc && " + script)
  27. class AppIconUrlField(fields.Raw):
  28. def output(self, key, obj):
  29. if obj is None:
  30. return None
  31. from models.model import App, IconType, Site
  32. if isinstance(obj, dict) and "app" in obj:
  33. obj = obj["app"]
  34. if isinstance(obj, App | Site) and obj.icon_type == IconType.IMAGE.value:
  35. return file_helpers.get_signed_file_url(obj.icon)
  36. return None
  37. class AvatarUrlField(fields.Raw):
  38. def output(self, key, obj):
  39. if obj is None:
  40. return None
  41. from models.account import Account
  42. if isinstance(obj, Account) and obj.avatar is not None:
  43. return file_helpers.get_signed_file_url(obj.avatar)
  44. return None
  45. class TimestampField(fields.Raw):
  46. def format(self, value) -> int:
  47. return int(value.timestamp())
  48. def email(email):
  49. # Define a regex pattern for email addresses
  50. pattern = r"^[\w\.!#$%&'*+\-/=?^_`{|}~]+@([\w-]+\.)+[\w-]{2,}$"
  51. # Check if the email matches the pattern
  52. if re.match(pattern, email) is not None:
  53. return email
  54. error = "{email} is not a valid email.".format(email=email)
  55. raise ValueError(error)
  56. def uuid_value(value):
  57. if value == "":
  58. return str(value)
  59. try:
  60. uuid_obj = uuid.UUID(value)
  61. return str(uuid_obj)
  62. except ValueError:
  63. error = "{value} is not a valid uuid.".format(value=value)
  64. raise ValueError(error)
  65. def alphanumeric(value: str):
  66. # check if the value is alphanumeric and underlined
  67. if re.match(r"^[a-zA-Z0-9_]+$", value):
  68. return value
  69. raise ValueError(f"{value} is not a valid alphanumeric value")
  70. def timestamp_value(timestamp):
  71. try:
  72. int_timestamp = int(timestamp)
  73. if int_timestamp < 0:
  74. raise ValueError
  75. return int_timestamp
  76. except ValueError:
  77. error = "{timestamp} is not a valid timestamp.".format(timestamp=timestamp)
  78. raise ValueError(error)
  79. class StrLen:
  80. """Restrict input to an integer in a range (inclusive)"""
  81. def __init__(self, max_length, argument="argument"):
  82. self.max_length = max_length
  83. self.argument = argument
  84. def __call__(self, value):
  85. length = len(value)
  86. if length > self.max_length:
  87. error = "Invalid {arg}: {val}. {arg} cannot exceed length {length}".format(
  88. arg=self.argument, val=value, length=self.max_length
  89. )
  90. raise ValueError(error)
  91. return value
  92. class FloatRange:
  93. """Restrict input to an float in a range (inclusive)"""
  94. def __init__(self, low, high, argument="argument"):
  95. self.low = low
  96. self.high = high
  97. self.argument = argument
  98. def __call__(self, value):
  99. value = _get_float(value)
  100. if value < self.low or value > self.high:
  101. error = "Invalid {arg}: {val}. {arg} must be within the range {lo} - {hi}".format(
  102. arg=self.argument, val=value, lo=self.low, hi=self.high
  103. )
  104. raise ValueError(error)
  105. return value
  106. class DatetimeString:
  107. def __init__(self, format, argument="argument"):
  108. self.format = format
  109. self.argument = argument
  110. def __call__(self, value):
  111. try:
  112. datetime.strptime(value, self.format)
  113. except ValueError:
  114. error = "Invalid {arg}: {val}. {arg} must be conform to the format {format}".format(
  115. arg=self.argument, val=value, format=self.format
  116. )
  117. raise ValueError(error)
  118. return value
  119. def _get_float(value):
  120. try:
  121. return float(value)
  122. except (TypeError, ValueError):
  123. raise ValueError("{} is not a valid float".format(value))
  124. def timezone(timezone_string):
  125. if timezone_string and timezone_string in available_timezones():
  126. return timezone_string
  127. error = "{timezone_string} is not a valid timezone.".format(timezone_string=timezone_string)
  128. raise ValueError(error)
  129. def generate_string(n):
  130. letters_digits = string.ascii_letters + string.digits
  131. result = ""
  132. for i in range(n):
  133. result += secrets.choice(letters_digits)
  134. return result
  135. def extract_remote_ip(request) -> str:
  136. if request.headers.get("CF-Connecting-IP"):
  137. return cast(str, request.headers.get("CF-Connecting-IP"))
  138. elif request.headers.getlist("X-Forwarded-For"):
  139. return cast(str, request.headers.getlist("X-Forwarded-For")[0])
  140. else:
  141. return cast(str, request.remote_addr)
  142. def generate_text_hash(text: str) -> str:
  143. hash_text = str(text) + "None"
  144. return sha256(hash_text.encode()).hexdigest()
  145. def compact_generate_response(response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
  146. if isinstance(response, dict):
  147. return Response(response=json.dumps(jsonable_encoder(response)), status=200, mimetype="application/json")
  148. else:
  149. def generate() -> Generator:
  150. yield from response
  151. return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
  152. def length_prefixed_response(magic_number: int, response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
  153. """
  154. This function is used to return a response with a length prefix.
  155. Magic number is a one byte number that indicates the type of the response.
  156. For a compatibility with latest plugin daemon https://github.com/langgenius/dify-plugin-daemon/pull/341
  157. Avoid using line-based response, it leads a memory issue.
  158. We uses following format:
  159. | Field | Size | Description |
  160. |---------------|----------|---------------------------------|
  161. | Magic Number | 1 byte | Magic number identifier |
  162. | Reserved | 1 byte | Reserved field |
  163. | Header Length | 2 bytes | Header length (usually 0xa) |
  164. | Data Length | 4 bytes | Length of the data |
  165. | Reserved | 6 bytes | Reserved fields |
  166. | Data | Variable | Actual data content |
  167. | Reserved Fields | Header | Data |
  168. |-----------------|----------|----------|
  169. | 4 bytes total | Variable | Variable |
  170. all data is in little endian
  171. """
  172. def pack_response_with_length_prefix(response: bytes) -> bytes:
  173. header_length = 0xA
  174. data_length = len(response)
  175. # | Magic Number 1byte | Reserved 1byte | Header Length 2bytes | Data Length 4bytes | Reserved 6bytes | Data
  176. return struct.pack("<BBHI", magic_number, 0, header_length, data_length) + b"\x00" * 6 + response
  177. if isinstance(response, dict):
  178. return Response(
  179. response=pack_response_with_length_prefix(json.dumps(jsonable_encoder(response)).encode("utf-8")),
  180. status=200,
  181. mimetype="application/json",
  182. )
  183. elif isinstance(response, BaseModel):
  184. return Response(
  185. response=pack_response_with_length_prefix(response.model_dump_json().encode("utf-8")),
  186. status=200,
  187. mimetype="application/json",
  188. )
  189. def generate() -> Generator:
  190. for chunk in response:
  191. if isinstance(chunk, str):
  192. yield pack_response_with_length_prefix(chunk.encode("utf-8"))
  193. else:
  194. yield pack_response_with_length_prefix(chunk)
  195. return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
  196. class TokenManager:
  197. @classmethod
  198. def generate_token(
  199. cls,
  200. token_type: str,
  201. account: Optional["Account"] = None,
  202. email: Optional[str] = None,
  203. additional_data: Optional[dict] = None,
  204. ) -> str:
  205. if account is None and email is None:
  206. raise ValueError("Account or email must be provided")
  207. account_id = account.id if account else None
  208. account_email = account.email if account else email
  209. if account_id:
  210. old_token = cls._get_current_token_for_account(account_id, token_type)
  211. if old_token:
  212. if isinstance(old_token, bytes):
  213. old_token = old_token.decode("utf-8")
  214. cls.revoke_token(old_token, token_type)
  215. token = str(uuid.uuid4())
  216. token_data = {"account_id": account_id, "email": account_email, "token_type": token_type}
  217. if additional_data:
  218. token_data.update(additional_data)
  219. expiry_minutes = dify_config.model_dump().get(f"{token_type.upper()}_TOKEN_EXPIRY_MINUTES")
  220. if expiry_minutes is None:
  221. raise ValueError(f"Expiry minutes for {token_type} token is not set")
  222. token_key = cls._get_token_key(token, token_type)
  223. expiry_time = int(expiry_minutes * 60)
  224. redis_client.setex(token_key, expiry_time, json.dumps(token_data))
  225. if account_id:
  226. cls._set_current_token_for_account(account_id, token, token_type, expiry_minutes)
  227. return token
  228. @classmethod
  229. def _get_token_key(cls, token: str, token_type: str) -> str:
  230. return f"{token_type}:token:{token}"
  231. @classmethod
  232. def revoke_token(cls, token: str, token_type: str):
  233. token_key = cls._get_token_key(token, token_type)
  234. redis_client.delete(token_key)
  235. @classmethod
  236. def get_token_data(cls, token: str, token_type: str) -> Optional[dict[str, Any]]:
  237. key = cls._get_token_key(token, token_type)
  238. token_data_json = redis_client.get(key)
  239. if token_data_json is None:
  240. logging.warning(f"{token_type} token {token} not found with key {key}")
  241. return None
  242. token_data: Optional[dict[str, Any]] = json.loads(token_data_json)
  243. return token_data
  244. @classmethod
  245. def _get_current_token_for_account(cls, account_id: str, token_type: str) -> Optional[str]:
  246. key = cls._get_account_token_key(account_id, token_type)
  247. current_token: Optional[str] = redis_client.get(key)
  248. return current_token
  249. @classmethod
  250. def _set_current_token_for_account(
  251. cls, account_id: str, token: str, token_type: str, expiry_hours: Union[int, float]
  252. ):
  253. key = cls._get_account_token_key(account_id, token_type)
  254. expiry_time = int(expiry_hours * 60 * 60)
  255. redis_client.setex(key, expiry_time, token)
  256. @classmethod
  257. def _get_account_token_key(cls, account_id: str, token_type: str) -> str:
  258. return f"{token_type}:account:{account_id}"
  259. class RateLimiter:
  260. def __init__(self, prefix: str, max_attempts: int, time_window: int):
  261. self.prefix = prefix
  262. self.max_attempts = max_attempts
  263. self.time_window = time_window
  264. def _get_key(self, email: str) -> str:
  265. return f"{self.prefix}:{email}"
  266. def is_rate_limited(self, email: str) -> bool:
  267. key = self._get_key(email)
  268. current_time = int(time.time())
  269. window_start_time = current_time - self.time_window
  270. redis_client.zremrangebyscore(key, "-inf", window_start_time)
  271. attempts = redis_client.zcard(key)
  272. if attempts and int(attempts) >= self.max_attempts:
  273. return True
  274. return False
  275. def increment_rate_limit(self, email: str):
  276. key = self._get_key(email)
  277. current_time = int(time.time())
  278. redis_client.zadd(key, {current_time: current_time})
  279. redis_client.expire(key, self.time_window * 2)