You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

datasource_file_manager.py 7.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. import base64
  2. import hashlib
  3. import hmac
  4. import logging
  5. import os
  6. import time
  7. from datetime import datetime
  8. from mimetypes import guess_extension, guess_type
  9. from typing import Optional, Union
  10. from uuid import uuid4
  11. import httpx
  12. from configs import dify_config
  13. from core.helper import ssrf_proxy
  14. from extensions.ext_database import db
  15. from extensions.ext_storage import storage
  16. from models.enums import CreatorUserRole
  17. from models.model import MessageFile, UploadFile
  18. from models.tools import ToolFile
  19. logger = logging.getLogger(__name__)
  20. class DatasourceFileManager:
  21. @staticmethod
  22. def sign_file(datasource_file_id: str, extension: str) -> str:
  23. """
  24. sign file to get a temporary url
  25. """
  26. base_url = dify_config.FILES_URL
  27. file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
  28. timestamp = str(int(time.time()))
  29. nonce = os.urandom(16).hex()
  30. data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
  31. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  32. sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  33. encoded_sign = base64.urlsafe_b64encode(sign).decode()
  34. return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
  35. @staticmethod
  36. def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
  37. """
  38. verify signature
  39. """
  40. data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
  41. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  42. recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  43. recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
  44. # verify signature
  45. if sign != recalculated_encoded_sign:
  46. return False
  47. current_time = int(time.time())
  48. return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
  49. @staticmethod
  50. def create_file_by_raw(
  51. *,
  52. user_id: str,
  53. tenant_id: str,
  54. conversation_id: Optional[str],
  55. file_binary: bytes,
  56. mimetype: str,
  57. filename: Optional[str] = None,
  58. ) -> UploadFile:
  59. extension = guess_extension(mimetype) or ".bin"
  60. unique_name = uuid4().hex
  61. unique_filename = f"{unique_name}{extension}"
  62. # default just as before
  63. present_filename = unique_filename
  64. if filename is not None:
  65. has_extension = len(filename.split(".")) > 1
  66. # Add extension flexibly
  67. present_filename = filename if has_extension else f"{filename}{extension}"
  68. filepath = f"datasources/{tenant_id}/{unique_filename}"
  69. storage.save(filepath, file_binary)
  70. upload_file = UploadFile(
  71. tenant_id=tenant_id,
  72. storage_type=dify_config.STORAGE_TYPE,
  73. key=filepath,
  74. name=present_filename,
  75. size=len(file_binary),
  76. extension=extension,
  77. mime_type=mimetype,
  78. created_by_role=CreatorUserRole.ACCOUNT,
  79. created_by=user_id,
  80. used=False,
  81. hash=hashlib.sha3_256(file_binary).hexdigest(),
  82. source_url="",
  83. created_at=datetime.now(),
  84. )
  85. db.session.add(upload_file)
  86. db.session.commit()
  87. db.session.refresh(upload_file)
  88. return upload_file
  89. @staticmethod
  90. def create_file_by_url(
  91. user_id: str,
  92. tenant_id: str,
  93. file_url: str,
  94. conversation_id: Optional[str] = None,
  95. ) -> UploadFile:
  96. # try to download image
  97. try:
  98. response = ssrf_proxy.get(file_url)
  99. response.raise_for_status()
  100. blob = response.content
  101. except httpx.TimeoutException:
  102. raise ValueError(f"timeout when downloading file from {file_url}")
  103. mimetype = (
  104. guess_type(file_url)[0]
  105. or response.headers.get("Content-Type", "").split(";")[0].strip()
  106. or "application/octet-stream"
  107. )
  108. extension = guess_extension(mimetype) or ".bin"
  109. unique_name = uuid4().hex
  110. filename = f"{unique_name}{extension}"
  111. filepath = f"tools/{tenant_id}/{filename}"
  112. storage.save(filepath, blob)
  113. upload_file = UploadFile(
  114. tenant_id=tenant_id,
  115. storage_type=dify_config.STORAGE_TYPE,
  116. key=filepath,
  117. name=filename,
  118. size=len(blob),
  119. extension=extension,
  120. mime_type=mimetype,
  121. created_by_role=CreatorUserRole.ACCOUNT,
  122. created_by=user_id,
  123. used=False,
  124. hash=hashlib.sha3_256(blob).hexdigest(),
  125. source_url=file_url,
  126. created_at=datetime.now(),
  127. )
  128. db.session.add(upload_file)
  129. db.session.commit()
  130. return upload_file
  131. @staticmethod
  132. def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
  133. """
  134. get file binary
  135. :param id: the id of the file
  136. :return: the binary of the file, mime type
  137. """
  138. upload_file: UploadFile | None = (
  139. db.session.query(UploadFile)
  140. .filter(
  141. UploadFile.id == id,
  142. )
  143. .first()
  144. )
  145. if not upload_file:
  146. return None
  147. blob = storage.load_once(upload_file.key)
  148. return blob, upload_file.mime_type
  149. @staticmethod
  150. def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
  151. """
  152. get file binary
  153. :param id: the id of the file
  154. :return: the binary of the file, mime type
  155. """
  156. message_file: MessageFile | None = (
  157. db.session.query(MessageFile)
  158. .filter(
  159. MessageFile.id == id,
  160. )
  161. .first()
  162. )
  163. # Check if message_file is not None
  164. if message_file is not None:
  165. # get tool file id
  166. if message_file.url is not None:
  167. tool_file_id = message_file.url.split("/")[-1]
  168. # trim extension
  169. tool_file_id = tool_file_id.split(".")[0]
  170. else:
  171. tool_file_id = None
  172. else:
  173. tool_file_id = None
  174. tool_file: ToolFile | None = (
  175. db.session.query(ToolFile)
  176. .filter(
  177. ToolFile.id == tool_file_id,
  178. )
  179. .first()
  180. )
  181. if not tool_file:
  182. return None
  183. blob = storage.load_once(tool_file.file_key)
  184. return blob, tool_file.mimetype
  185. @staticmethod
  186. def get_file_generator_by_upload_file_id(upload_file_id: str):
  187. """
  188. get file binary
  189. :param tool_file_id: the id of the tool file
  190. :return: the binary of the file, mime type
  191. """
  192. upload_file: UploadFile | None = (
  193. db.session.query(UploadFile)
  194. .filter(
  195. UploadFile.id == upload_file_id,
  196. )
  197. .first()
  198. )
  199. if not upload_file:
  200. return None, None
  201. stream = storage.load_stream(upload_file.key)
  202. return stream, upload_file.mime_type
  203. # init tool_file_parser
  204. # from core.file.datasource_file_parser import datasource_file_manager
  205. #
  206. # datasource_file_manager["manager"] = DatasourceFileManager