Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

tool_file_manager.py 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. import base64
  2. import hashlib
  3. import hmac
  4. import logging
  5. import os
  6. import time
  7. from mimetypes import guess_extension, guess_type
  8. from typing import Optional, Union
  9. from uuid import uuid4
  10. import httpx
  11. from configs import dify_config
  12. from core.helper import ssrf_proxy
  13. from extensions.ext_database import db
  14. from extensions.ext_storage import storage
  15. from models.model import MessageFile
  16. from models.tools import ToolFile
  17. logger = logging.getLogger(__name__)
  18. class ToolFileManager:
  19. @staticmethod
  20. def sign_file(tool_file_id: str, extension: str) -> str:
  21. """
  22. sign file to get a temporary url
  23. """
  24. base_url = dify_config.FILES_URL
  25. file_preview_url = f"{base_url}/files/tools/{tool_file_id}{extension}"
  26. timestamp = str(int(time.time()))
  27. nonce = os.urandom(16).hex()
  28. data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
  29. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  30. sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  31. encoded_sign = base64.urlsafe_b64encode(sign).decode()
  32. return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
  33. @staticmethod
  34. def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
  35. """
  36. verify signature
  37. """
  38. data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
  39. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  40. recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  41. recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
  42. # verify signature
  43. if sign != recalculated_encoded_sign:
  44. return False
  45. current_time = int(time.time())
  46. return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
  47. @staticmethod
  48. def create_file_by_raw(
  49. *,
  50. user_id: str,
  51. tenant_id: str,
  52. conversation_id: Optional[str],
  53. file_binary: bytes,
  54. mimetype: str,
  55. filename: Optional[str] = None,
  56. ) -> ToolFile:
  57. extension = guess_extension(mimetype) or ".bin"
  58. unique_name = uuid4().hex
  59. unique_filename = f"{unique_name}{extension}"
  60. # default just as before
  61. present_filename = unique_filename
  62. if filename is not None:
  63. has_extension = len(filename.split(".")) > 1
  64. # Add extension flexibly
  65. present_filename = filename if has_extension else f"{filename}{extension}"
  66. filepath = f"tools/{tenant_id}/{unique_filename}"
  67. storage.save(filepath, file_binary)
  68. tool_file = ToolFile(
  69. user_id=user_id,
  70. tenant_id=tenant_id,
  71. conversation_id=conversation_id,
  72. file_key=filepath,
  73. mimetype=mimetype,
  74. name=present_filename,
  75. size=len(file_binary),
  76. )
  77. db.session.add(tool_file)
  78. db.session.commit()
  79. db.session.refresh(tool_file)
  80. return tool_file
  81. @staticmethod
  82. def create_file_by_url(
  83. user_id: str,
  84. tenant_id: str,
  85. file_url: str,
  86. conversation_id: Optional[str] = None,
  87. ) -> ToolFile:
  88. # try to download image
  89. try:
  90. response = ssrf_proxy.get(file_url)
  91. response.raise_for_status()
  92. blob = response.content
  93. except httpx.TimeoutException:
  94. raise ValueError(f"timeout when downloading file from {file_url}")
  95. mimetype = (
  96. guess_type(file_url)[0]
  97. or response.headers.get("Content-Type", "").split(";")[0].strip()
  98. or "application/octet-stream"
  99. )
  100. extension = guess_extension(mimetype) or ".bin"
  101. unique_name = uuid4().hex
  102. filename = f"{unique_name}{extension}"
  103. filepath = f"tools/{tenant_id}/{filename}"
  104. storage.save(filepath, blob)
  105. tool_file = ToolFile(
  106. user_id=user_id,
  107. tenant_id=tenant_id,
  108. conversation_id=conversation_id,
  109. file_key=filepath,
  110. mimetype=mimetype,
  111. original_url=file_url,
  112. name=filename,
  113. size=len(blob),
  114. )
  115. db.session.add(tool_file)
  116. db.session.commit()
  117. return tool_file
  118. @staticmethod
  119. def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
  120. """
  121. get file binary
  122. :param id: the id of the file
  123. :return: the binary of the file, mime type
  124. """
  125. tool_file: ToolFile | None = (
  126. db.session.query(ToolFile)
  127. .filter(
  128. ToolFile.id == id,
  129. )
  130. .first()
  131. )
  132. if not tool_file:
  133. return None
  134. blob = storage.load_once(tool_file.file_key)
  135. return blob, tool_file.mimetype
  136. @staticmethod
  137. def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
  138. """
  139. get file binary
  140. :param id: the id of the file
  141. :return: the binary of the file, mime type
  142. """
  143. message_file: MessageFile | None = (
  144. db.session.query(MessageFile)
  145. .filter(
  146. MessageFile.id == id,
  147. )
  148. .first()
  149. )
  150. # Check if message_file is not None
  151. if message_file is not None:
  152. # get tool file id
  153. if message_file.url is not None:
  154. tool_file_id = message_file.url.split("/")[-1]
  155. # trim extension
  156. tool_file_id = tool_file_id.split(".")[0]
  157. else:
  158. tool_file_id = None
  159. else:
  160. tool_file_id = None
  161. tool_file: ToolFile | None = (
  162. db.session.query(ToolFile)
  163. .filter(
  164. ToolFile.id == tool_file_id,
  165. )
  166. .first()
  167. )
  168. if not tool_file:
  169. return None
  170. blob = storage.load_once(tool_file.file_key)
  171. return blob, tool_file.mimetype
  172. @staticmethod
  173. def get_file_generator_by_tool_file_id(tool_file_id: str):
  174. """
  175. get file binary
  176. :param tool_file_id: the id of the tool file
  177. :return: the binary of the file, mime type
  178. """
  179. tool_file: ToolFile | None = (
  180. db.session.query(ToolFile)
  181. .filter(
  182. ToolFile.id == tool_file_id,
  183. )
  184. .first()
  185. )
  186. if not tool_file:
  187. return None, None
  188. stream = storage.load_stream(tool_file.file_key)
  189. return stream, tool_file
  190. # init tool_file_parser
  191. from core.file.tool_file_parser import tool_file_manager
  192. tool_file_manager["manager"] = ToolFileManager