您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

tool_file_manager.py 7.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import base64
  2. import hashlib
  3. import hmac
  4. import logging
  5. import os
  6. import time
  7. from collections.abc import Generator
  8. from mimetypes import guess_extension, guess_type
  9. from typing import Optional, Union
  10. from uuid import uuid4
  11. import httpx
  12. from sqlalchemy.orm import Session
  13. from configs import dify_config
  14. from core.helper import ssrf_proxy
  15. from extensions.ext_database import db as global_db
  16. from extensions.ext_storage import storage
  17. from models.model import MessageFile
  18. from models.tools import ToolFile
  19. logger = logging.getLogger(__name__)
  20. from sqlalchemy.engine import Engine
  21. class ToolFileManager:
  22. _engine: Engine
  23. def __init__(self, engine: Engine | None = None):
  24. if engine is None:
  25. engine = global_db.engine
  26. self._engine = engine
  27. @staticmethod
  28. def sign_file(tool_file_id: str, extension: str) -> str:
  29. """
  30. sign file to get a temporary url for plugin access
  31. """
  32. # Use internal URL for plugin/tool file access in Docker environments
  33. base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
  34. file_preview_url = f"{base_url}/files/tools/{tool_file_id}{extension}"
  35. timestamp = str(int(time.time()))
  36. nonce = os.urandom(16).hex()
  37. data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
  38. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  39. sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  40. encoded_sign = base64.urlsafe_b64encode(sign).decode()
  41. return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
  42. @staticmethod
  43. def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
  44. """
  45. verify signature
  46. """
  47. data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
  48. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  49. recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  50. recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
  51. # verify signature
  52. if sign != recalculated_encoded_sign:
  53. return False
  54. current_time = int(time.time())
  55. return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
  56. def create_file_by_raw(
  57. self,
  58. *,
  59. user_id: str,
  60. tenant_id: str,
  61. conversation_id: Optional[str],
  62. file_binary: bytes,
  63. mimetype: str,
  64. filename: Optional[str] = None,
  65. ) -> ToolFile:
  66. extension = guess_extension(mimetype) or ".bin"
  67. unique_name = uuid4().hex
  68. unique_filename = f"{unique_name}{extension}"
  69. # default just as before
  70. present_filename = unique_filename
  71. if filename is not None:
  72. has_extension = len(filename.split(".")) > 1
  73. # Add extension flexibly
  74. present_filename = filename if has_extension else f"{filename}{extension}"
  75. filepath = f"tools/{tenant_id}/{unique_filename}"
  76. storage.save(filepath, file_binary)
  77. with Session(self._engine, expire_on_commit=False) as session:
  78. tool_file = ToolFile(
  79. user_id=user_id,
  80. tenant_id=tenant_id,
  81. conversation_id=conversation_id,
  82. file_key=filepath,
  83. mimetype=mimetype,
  84. name=present_filename,
  85. size=len(file_binary),
  86. original_url=None,
  87. )
  88. session.add(tool_file)
  89. session.commit()
  90. session.refresh(tool_file)
  91. return tool_file
  92. def create_file_by_url(
  93. self,
  94. user_id: str,
  95. tenant_id: str,
  96. file_url: str,
  97. conversation_id: Optional[str] = None,
  98. ) -> ToolFile:
  99. # try to download image
  100. try:
  101. response = ssrf_proxy.get(file_url)
  102. response.raise_for_status()
  103. blob = response.content
  104. except httpx.TimeoutException:
  105. raise ValueError(f"timeout when downloading file from {file_url}")
  106. mimetype = (
  107. guess_type(file_url)[0]
  108. or response.headers.get("Content-Type", "").split(";")[0].strip()
  109. or "application/octet-stream"
  110. )
  111. extension = guess_extension(mimetype) or ".bin"
  112. unique_name = uuid4().hex
  113. filename = f"{unique_name}{extension}"
  114. filepath = f"tools/{tenant_id}/{filename}"
  115. storage.save(filepath, blob)
  116. with Session(self._engine, expire_on_commit=False) as session:
  117. tool_file = ToolFile(
  118. user_id=user_id,
  119. tenant_id=tenant_id,
  120. conversation_id=conversation_id,
  121. file_key=filepath,
  122. mimetype=mimetype,
  123. original_url=file_url,
  124. name=filename,
  125. size=len(blob),
  126. )
  127. session.add(tool_file)
  128. session.commit()
  129. return tool_file
  130. def get_file_binary(self, id: str) -> Union[tuple[bytes, str], None]:
  131. """
  132. get file binary
  133. :param id: the id of the file
  134. :return: the binary of the file, mime type
  135. """
  136. with Session(self._engine, expire_on_commit=False) as session:
  137. tool_file: ToolFile | None = (
  138. session.query(ToolFile)
  139. .where(
  140. ToolFile.id == id,
  141. )
  142. .first()
  143. )
  144. if not tool_file:
  145. return None
  146. blob = storage.load_once(tool_file.file_key)
  147. return blob, tool_file.mimetype
  148. def get_file_binary_by_message_file_id(self, id: str) -> Union[tuple[bytes, str], None]:
  149. """
  150. get file binary
  151. :param id: the id of the file
  152. :return: the binary of the file, mime type
  153. """
  154. with Session(self._engine, expire_on_commit=False) as session:
  155. message_file: MessageFile | None = (
  156. session.query(MessageFile)
  157. .where(
  158. MessageFile.id == id,
  159. )
  160. .first()
  161. )
  162. # Check if message_file is not None
  163. if message_file is not None:
  164. # get tool file id
  165. if message_file.url is not None:
  166. tool_file_id = message_file.url.split("/")[-1]
  167. # trim extension
  168. tool_file_id = tool_file_id.split(".")[0]
  169. else:
  170. tool_file_id = None
  171. else:
  172. tool_file_id = None
  173. tool_file: ToolFile | None = (
  174. session.query(ToolFile)
  175. .where(
  176. ToolFile.id == tool_file_id,
  177. )
  178. .first()
  179. )
  180. if not tool_file:
  181. return None
  182. blob = storage.load_once(tool_file.file_key)
  183. return blob, tool_file.mimetype
  184. def get_file_generator_by_tool_file_id(self, tool_file_id: str) -> tuple[Optional[Generator], Optional[ToolFile]]:
  185. """
  186. get file binary
  187. :param tool_file_id: the id of the tool file
  188. :return: the binary of the file, mime type
  189. """
  190. with Session(self._engine, expire_on_commit=False) as session:
  191. tool_file: ToolFile | None = (
  192. session.query(ToolFile)
  193. .where(
  194. ToolFile.id == tool_file_id,
  195. )
  196. .first()
  197. )
  198. if not tool_file:
  199. return None, None
  200. stream = storage.load_stream(tool_file.file_key)
  201. return stream, tool_file
  202. # init tool_file_parser
  203. from core.file.tool_file_parser import set_tool_file_manager_factory
  204. def _factory() -> ToolFileManager:
  205. return ToolFileManager()
  206. set_tool_file_manager_factory(_factory)