Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

datasource_file_manager.py 7.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. import base64
  2. import hashlib
  3. import hmac
  4. import logging
  5. import os
  6. import time
  7. from mimetypes import guess_extension, guess_type
  8. from typing import Optional, Union
  9. from uuid import uuid4
  10. import httpx
  11. from configs import dify_config
  12. from core.helper import ssrf_proxy
  13. from extensions.ext_database import db
  14. from extensions.ext_storage import storage
  15. from models.enums import CreatedByRole
  16. from models.model import MessageFile, UploadFile
  17. from models.tools import ToolFile
  18. logger = logging.getLogger(__name__)
  19. class DatasourceFileManager:
  20. @staticmethod
  21. def sign_file(datasource_file_id: str, extension: str) -> str:
  22. """
  23. sign file to get a temporary url
  24. """
  25. base_url = dify_config.FILES_URL
  26. file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
  27. timestamp = str(int(time.time()))
  28. nonce = os.urandom(16).hex()
  29. data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
  30. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  31. sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  32. encoded_sign = base64.urlsafe_b64encode(sign).decode()
  33. return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
  34. @staticmethod
  35. def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
  36. """
  37. verify signature
  38. """
  39. data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
  40. secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
  41. recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
  42. recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
  43. # verify signature
  44. if sign != recalculated_encoded_sign:
  45. return False
  46. current_time = int(time.time())
  47. return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
  48. @staticmethod
  49. def create_file_by_raw(
  50. *,
  51. user_id: str,
  52. tenant_id: str,
  53. conversation_id: Optional[str],
  54. file_binary: bytes,
  55. mimetype: str,
  56. filename: Optional[str] = None,
  57. ) -> UploadFile:
  58. extension = guess_extension(mimetype) or ".bin"
  59. unique_name = uuid4().hex
  60. unique_filename = f"{unique_name}{extension}"
  61. # default just as before
  62. present_filename = unique_filename
  63. if filename is not None:
  64. has_extension = len(filename.split(".")) > 1
  65. # Add extension flexibly
  66. present_filename = filename if has_extension else f"{filename}{extension}"
  67. filepath = f"datasources/{tenant_id}/{unique_filename}"
  68. storage.save(filepath, file_binary)
  69. upload_file = UploadFile(
  70. tenant_id=tenant_id,
  71. storage_type=dify_config.STORAGE_TYPE,
  72. key=filepath,
  73. name=present_filename,
  74. size=len(file_binary),
  75. extension=extension,
  76. mime_type=mimetype,
  77. created_by_role=CreatedByRole.ACCOUNT,
  78. created_by=user_id,
  79. used=False,
  80. hash=hashlib.sha3_256(file_binary).hexdigest(),
  81. source_url="",
  82. )
  83. db.session.add(upload_file)
  84. db.session.commit()
  85. db.session.refresh(upload_file)
  86. return upload_file
  87. @staticmethod
  88. def create_file_by_url(
  89. user_id: str,
  90. tenant_id: str,
  91. file_url: str,
  92. conversation_id: Optional[str] = None,
  93. ) -> UploadFile:
  94. # try to download image
  95. try:
  96. response = ssrf_proxy.get(file_url)
  97. response.raise_for_status()
  98. blob = response.content
  99. except httpx.TimeoutException:
  100. raise ValueError(f"timeout when downloading file from {file_url}")
  101. mimetype = (
  102. guess_type(file_url)[0]
  103. or response.headers.get("Content-Type", "").split(";")[0].strip()
  104. or "application/octet-stream"
  105. )
  106. extension = guess_extension(mimetype) or ".bin"
  107. unique_name = uuid4().hex
  108. filename = f"{unique_name}{extension}"
  109. filepath = f"tools/{tenant_id}/{filename}"
  110. storage.save(filepath, blob)
  111. upload_file = UploadFile(
  112. tenant_id=tenant_id,
  113. storage_type=dify_config.STORAGE_TYPE,
  114. key=filepath,
  115. name=filename,
  116. size=len(blob),
  117. extension=extension,
  118. mime_type=mimetype,
  119. created_by_role=CreatedByRole.ACCOUNT,
  120. created_by=user_id,
  121. used=False,
  122. hash=hashlib.sha3_256(blob).hexdigest(),
  123. source_url=file_url,
  124. )
  125. db.session.add(upload_file)
  126. db.session.commit()
  127. return upload_file
  128. @staticmethod
  129. def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
  130. """
  131. get file binary
  132. :param id: the id of the file
  133. :return: the binary of the file, mime type
  134. """
  135. upload_file: UploadFile | None = (
  136. db.session.query(UploadFile)
  137. .filter(
  138. UploadFile.id == id,
  139. )
  140. .first()
  141. )
  142. if not upload_file:
  143. return None
  144. blob = storage.load_once(upload_file.key)
  145. return blob, upload_file.mime_type
  146. @staticmethod
  147. def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
  148. """
  149. get file binary
  150. :param id: the id of the file
  151. :return: the binary of the file, mime type
  152. """
  153. message_file: MessageFile | None = (
  154. db.session.query(MessageFile)
  155. .filter(
  156. MessageFile.id == id,
  157. )
  158. .first()
  159. )
  160. # Check if message_file is not None
  161. if message_file is not None:
  162. # get tool file id
  163. if message_file.url is not None:
  164. tool_file_id = message_file.url.split("/")[-1]
  165. # trim extension
  166. tool_file_id = tool_file_id.split(".")[0]
  167. else:
  168. tool_file_id = None
  169. else:
  170. tool_file_id = None
  171. tool_file: ToolFile | None = (
  172. db.session.query(ToolFile)
  173. .filter(
  174. ToolFile.id == tool_file_id,
  175. )
  176. .first()
  177. )
  178. if not tool_file:
  179. return None
  180. blob = storage.load_once(tool_file.file_key)
  181. return blob, tool_file.mimetype
  182. @staticmethod
  183. def get_file_generator_by_upload_file_id(upload_file_id: str):
  184. """
  185. get file binary
  186. :param tool_file_id: the id of the tool file
  187. :return: the binary of the file, mime type
  188. """
  189. upload_file: UploadFile | None = (
  190. db.session.query(UploadFile)
  191. .filter(
  192. UploadFile.id == upload_file_id,
  193. )
  194. .first()
  195. )
  196. if not upload_file:
  197. return None, None
  198. stream = storage.load_stream(upload_file.key)
  199. return stream, upload_file.mime_type
  200. # init tool_file_parser
  201. from core.file.datasource_file_parser import datasource_file_manager
  202. datasource_file_manager["manager"] = DatasourceFileManager