| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 | 
							- import base64
 - import hashlib
 - import hmac
 - import logging
 - import os
 - import time
 - from datetime import datetime
 - from mimetypes import guess_extension, guess_type
 - from typing import Optional, Union
 - from uuid import uuid4
 - 
 - import httpx
 - 
 - from configs import dify_config
 - from core.helper import ssrf_proxy
 - from extensions.ext_database import db
 - from extensions.ext_storage import storage
 - from models.enums import CreatorUserRole
 - from models.model import MessageFile, UploadFile
 - from models.tools import ToolFile
 - 
 - logger = logging.getLogger(__name__)
 - 
 - 
 - class DatasourceFileManager:
 -     @staticmethod
 -     def sign_file(datasource_file_id: str, extension: str) -> str:
 -         """
 -         sign file to get a temporary url
 -         """
 -         base_url = dify_config.FILES_URL
 -         file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
 - 
 -         timestamp = str(int(time.time()))
 -         nonce = os.urandom(16).hex()
 -         data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
 -         secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
 -         sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
 -         encoded_sign = base64.urlsafe_b64encode(sign).decode()
 - 
 -         return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
 - 
 -     @staticmethod
 -     def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
 -         """
 -         verify signature
 -         """
 -         data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
 -         secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
 -         recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
 -         recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
 - 
 -         # verify signature
 -         if sign != recalculated_encoded_sign:
 -             return False
 - 
 -         current_time = int(time.time())
 -         return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
 - 
 -     @staticmethod
 -     def create_file_by_raw(
 -         *,
 -         user_id: str,
 -         tenant_id: str,
 -         conversation_id: Optional[str],
 -         file_binary: bytes,
 -         mimetype: str,
 -         filename: Optional[str] = None,
 -     ) -> UploadFile:
 -         extension = guess_extension(mimetype) or ".bin"
 -         unique_name = uuid4().hex
 -         unique_filename = f"{unique_name}{extension}"
 -         # default just as before
 -         present_filename = unique_filename
 -         if filename is not None:
 -             has_extension = len(filename.split(".")) > 1
 -             # Add extension flexibly
 -             present_filename = filename if has_extension else f"{filename}{extension}"
 -         filepath = f"datasources/{tenant_id}/{unique_filename}"
 -         storage.save(filepath, file_binary)
 - 
 -         upload_file = UploadFile(
 -             tenant_id=tenant_id,
 -             storage_type=dify_config.STORAGE_TYPE,
 -             key=filepath,
 -             name=present_filename,
 -             size=len(file_binary),
 -             extension=extension,
 -             mime_type=mimetype,
 -             created_by_role=CreatorUserRole.ACCOUNT,
 -             created_by=user_id,
 -             used=False,
 -             hash=hashlib.sha3_256(file_binary).hexdigest(),
 -             source_url="",
 -             created_at=datetime.now(),
 -         )
 - 
 -         db.session.add(upload_file)
 -         db.session.commit()
 -         db.session.refresh(upload_file)
 - 
 -         return upload_file
 - 
 -     @staticmethod
 -     def create_file_by_url(
 -         user_id: str,
 -         tenant_id: str,
 -         file_url: str,
 -         conversation_id: Optional[str] = None,
 -     ) -> UploadFile:
 -         # try to download image
 -         try:
 -             response = ssrf_proxy.get(file_url)
 -             response.raise_for_status()
 -             blob = response.content
 -         except httpx.TimeoutException:
 -             raise ValueError(f"timeout when downloading file from {file_url}")
 - 
 -         mimetype = (
 -             guess_type(file_url)[0]
 -             or response.headers.get("Content-Type", "").split(";")[0].strip()
 -             or "application/octet-stream"
 -         )
 -         extension = guess_extension(mimetype) or ".bin"
 -         unique_name = uuid4().hex
 -         filename = f"{unique_name}{extension}"
 -         filepath = f"tools/{tenant_id}/{filename}"
 -         storage.save(filepath, blob)
 - 
 -         upload_file = UploadFile(
 -             tenant_id=tenant_id,
 -             storage_type=dify_config.STORAGE_TYPE,
 -             key=filepath,
 -             name=filename,
 -             size=len(blob),
 -             extension=extension,
 -             mime_type=mimetype,
 -             created_by_role=CreatorUserRole.ACCOUNT,
 -             created_by=user_id,
 -             used=False,
 -             hash=hashlib.sha3_256(blob).hexdigest(),
 -             source_url=file_url,
 -             created_at=datetime.now(),
 -         )
 - 
 -         db.session.add(upload_file)
 -         db.session.commit()
 - 
 -         return upload_file
 - 
 -     @staticmethod
 -     def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
 -         """
 -         get file binary
 - 
 -         :param id: the id of the file
 - 
 -         :return: the binary of the file, mime type
 -         """
 -         upload_file: UploadFile | None = (
 -             db.session.query(UploadFile)
 -             .filter(
 -                 UploadFile.id == id,
 -             )
 -             .first()
 -         )
 - 
 -         if not upload_file:
 -             return None
 - 
 -         blob = storage.load_once(upload_file.key)
 - 
 -         return blob, upload_file.mime_type
 - 
 -     @staticmethod
 -     def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
 -         """
 -         get file binary
 - 
 -         :param id: the id of the file
 - 
 -         :return: the binary of the file, mime type
 -         """
 -         message_file: MessageFile | None = (
 -             db.session.query(MessageFile)
 -             .filter(
 -                 MessageFile.id == id,
 -             )
 -             .first()
 -         )
 - 
 -         # Check if message_file is not None
 -         if message_file is not None:
 -             # get tool file id
 -             if message_file.url is not None:
 -                 tool_file_id = message_file.url.split("/")[-1]
 -                 # trim extension
 -                 tool_file_id = tool_file_id.split(".")[0]
 -             else:
 -                 tool_file_id = None
 -         else:
 -             tool_file_id = None
 - 
 -         tool_file: ToolFile | None = (
 -             db.session.query(ToolFile)
 -             .filter(
 -                 ToolFile.id == tool_file_id,
 -             )
 -             .first()
 -         )
 - 
 -         if not tool_file:
 -             return None
 - 
 -         blob = storage.load_once(tool_file.file_key)
 - 
 -         return blob, tool_file.mimetype
 - 
 -     @staticmethod
 -     def get_file_generator_by_upload_file_id(upload_file_id: str):
 -         """
 -         get file binary
 - 
 -         :param tool_file_id: the id of the tool file
 - 
 -         :return: the binary of the file, mime type
 -         """
 -         upload_file: UploadFile | None = (
 -             db.session.query(UploadFile)
 -             .filter(
 -                 UploadFile.id == upload_file_id,
 -             )
 -             .first()
 -         )
 - 
 -         if not upload_file:
 -             return None, None
 - 
 -         stream = storage.load_stream(upload_file.key)
 - 
 -         return stream, upload_file.mime_type
 - 
 - 
 - # init tool_file_parser
 - # from core.file.datasource_file_parser import datasource_file_manager
 - #
 - # datasource_file_manager["manager"] = DatasourceFileManager
 
 
  |