Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

tool_file_manager.py 7.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. import base64
  2. import hashlib
  3. import hmac
  4. import logging
  5. import os
  6. import time
  7. from mimetypes import guess_extension, guess_type
  8. from typing import Optional, Union
  9. from uuid import uuid4
  10. import httpx
  11. from sqlalchemy.orm import Session
  12. from configs import dify_config
  13. from core.helper import ssrf_proxy
  14. from extensions.ext_database import db as global_db
  15. from extensions.ext_storage import storage
  16. from models.model import MessageFile
  17. from models.tools import ToolFile
  18. logger = logging.getLogger(__name__)
  19. from sqlalchemy.engine import Engine
  20. class ToolFileManager:
  21. _engine: Engine
  22. def __init__(self, engine: Engine | None = None):
  23. if engine is None:
  24. engine = global_db.engine
  25. self._engine = engine
  26. @staticmethod
  27. def sign_file(tool_file_id: str, extension: str) -> str:
  28. """
  29. sign file to get a temporary url
  30. """
  31. base_url = dify_config.FILES_URL
  32. file_preview_url = f"{base_url}/files/tools/{tool_file_id}{extension}"
  33. timestamp = str(int(time.time()))
  34. nonce = os.urandom(16).hex()
  35. data_to_sign = f"file-preview|{tool_file_id}|{timestamp}|{nonce}"
  36. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  37. sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  38. encoded_sign = base64.urlsafe_b64encode(sign).decode()
  39. return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
  40. @staticmethod
  41. def verify_file(file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
  42. """
  43. verify signature
  44. """
  45. data_to_sign = f"file-preview|{file_id}|{timestamp}|{nonce}"
  46. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  47. recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  48. recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
  49. # verify signature
  50. if sign != recalculated_encoded_sign:
  51. return False
  52. current_time = int(time.time())
  53. return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
  54. def create_file_by_raw(
  55. self,
  56. *,
  57. user_id: str,
  58. tenant_id: str,
  59. conversation_id: Optional[str],
  60. file_binary: bytes,
  61. mimetype: str,
  62. filename: Optional[str] = None,
  63. ) -> ToolFile:
  64. extension = guess_extension(mimetype) or ".bin"
  65. unique_name = uuid4().hex
  66. unique_filename = f"{unique_name}{extension}"
  67. # default just as before
  68. present_filename = unique_filename
  69. if filename is not None:
  70. has_extension = len(filename.split(".")) > 1
  71. # Add extension flexibly
  72. present_filename = filename if has_extension else f"{filename}{extension}"
  73. filepath = f"tools/{tenant_id}/{unique_filename}"
  74. storage.save(filepath, file_binary)
  75. with Session(self._engine, expire_on_commit=False) as session:
  76. tool_file = ToolFile(
  77. user_id=user_id,
  78. tenant_id=tenant_id,
  79. conversation_id=conversation_id,
  80. file_key=filepath,
  81. mimetype=mimetype,
  82. name=present_filename,
  83. size=len(file_binary),
  84. )
  85. session.add(tool_file)
  86. session.commit()
  87. session.refresh(tool_file)
  88. return tool_file
  89. def create_file_by_url(
  90. self,
  91. user_id: str,
  92. tenant_id: str,
  93. file_url: str,
  94. conversation_id: Optional[str] = None,
  95. ) -> ToolFile:
  96. # try to download image
  97. try:
  98. response = ssrf_proxy.get(file_url)
  99. response.raise_for_status()
  100. blob = response.content
  101. except httpx.TimeoutException:
  102. raise ValueError(f"timeout when downloading file from {file_url}")
  103. mimetype = (
  104. guess_type(file_url)[0]
  105. or response.headers.get("Content-Type", "").split(";")[0].strip()
  106. or "application/octet-stream"
  107. )
  108. extension = guess_extension(mimetype) or ".bin"
  109. unique_name = uuid4().hex
  110. filename = f"{unique_name}{extension}"
  111. filepath = f"tools/{tenant_id}/{filename}"
  112. storage.save(filepath, blob)
  113. with Session(self._engine, expire_on_commit=False) as session:
  114. tool_file = ToolFile(
  115. user_id=user_id,
  116. tenant_id=tenant_id,
  117. conversation_id=conversation_id,
  118. file_key=filepath,
  119. mimetype=mimetype,
  120. original_url=file_url,
  121. name=filename,
  122. size=len(blob),
  123. )
  124. session.add(tool_file)
  125. session.commit()
  126. return tool_file
  127. def get_file_binary(self, id: str) -> Union[tuple[bytes, str], None]:
  128. """
  129. get file binary
  130. :param id: the id of the file
  131. :return: the binary of the file, mime type
  132. """
  133. with Session(self._engine, expire_on_commit=False) as session:
  134. tool_file: ToolFile | None = (
  135. session.query(ToolFile)
  136. .filter(
  137. ToolFile.id == id,
  138. )
  139. .first()
  140. )
  141. if not tool_file:
  142. return None
  143. blob = storage.load_once(tool_file.file_key)
  144. return blob, tool_file.mimetype
  145. def get_file_binary_by_message_file_id(self, id: str) -> Union[tuple[bytes, str], None]:
  146. """
  147. get file binary
  148. :param id: the id of the file
  149. :return: the binary of the file, mime type
  150. """
  151. with Session(self._engine, expire_on_commit=False) as session:
  152. message_file: MessageFile | None = (
  153. session.query(MessageFile)
  154. .filter(
  155. MessageFile.id == id,
  156. )
  157. .first()
  158. )
  159. # Check if message_file is not None
  160. if message_file is not None:
  161. # get tool file id
  162. if message_file.url is not None:
  163. tool_file_id = message_file.url.split("/")[-1]
  164. # trim extension
  165. tool_file_id = tool_file_id.split(".")[0]
  166. else:
  167. tool_file_id = None
  168. else:
  169. tool_file_id = None
  170. tool_file: ToolFile | None = (
  171. session.query(ToolFile)
  172. .filter(
  173. ToolFile.id == tool_file_id,
  174. )
  175. .first()
  176. )
  177. if not tool_file:
  178. return None
  179. blob = storage.load_once(tool_file.file_key)
  180. return blob, tool_file.mimetype
  181. def get_file_generator_by_tool_file_id(self, tool_file_id: str):
  182. """
  183. get file binary
  184. :param tool_file_id: the id of the tool file
  185. :return: the binary of the file, mime type
  186. """
  187. with Session(self._engine, expire_on_commit=False) as session:
  188. tool_file: ToolFile | None = (
  189. session.query(ToolFile)
  190. .filter(
  191. ToolFile.id == tool_file_id,
  192. )
  193. .first()
  194. )
  195. if not tool_file:
  196. return None, None
  197. stream = storage.load_stream(tool_file.file_key)
  198. return stream, tool_file
  199. # init tool_file_parser
  200. from core.file.tool_file_parser import set_tool_file_manager_factory
  201. def _factory() -> ToolFileManager:
  202. return ToolFileManager()
  203. set_tool_file_manager_factory(_factory)