您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

helpers.py 2.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import contextlib
  2. import mimetypes
  3. import os
  4. import platform
  5. import re
  6. import urllib.parse
  7. import warnings
  8. from uuid import uuid4
  9. import httpx
  10. try:
  11. import magic
  12. except ImportError:
  13. if platform.system() == "Windows":
  14. warnings.warn(
  15. "To use python-magic guess MIMETYPE, you need to run `pip install python-magic-bin`", stacklevel=2
  16. )
  17. elif platform.system() == "Darwin":
  18. warnings.warn("To use python-magic guess MIMETYPE, you need to run `brew install libmagic`", stacklevel=2)
  19. elif platform.system() == "Linux":
  20. warnings.warn(
  21. "To use python-magic guess MIMETYPE, you need to run `sudo apt-get install libmagic1`", stacklevel=2
  22. )
  23. else:
  24. warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
  25. magic = None # type: ignore
  26. from pydantic import BaseModel
  27. class FileInfo(BaseModel):
  28. filename: str
  29. extension: str
  30. mimetype: str
  31. size: int
  32. def guess_file_info_from_response(response: httpx.Response):
  33. url = str(response.url)
  34. # Try to extract filename from URL
  35. parsed_url = urllib.parse.urlparse(url)
  36. url_path = parsed_url.path
  37. filename = os.path.basename(url_path)
  38. # If filename couldn't be extracted, use Content-Disposition header
  39. if not filename:
  40. content_disposition = response.headers.get("Content-Disposition")
  41. if content_disposition:
  42. filename_match = re.search(r'filename="?(.+)"?', content_disposition)
  43. if filename_match:
  44. filename = filename_match.group(1)
  45. # If still no filename, generate a unique one
  46. if not filename:
  47. unique_name = str(uuid4())
  48. filename = f"{unique_name}"
  49. # Guess MIME type from filename first, then URL
  50. mimetype, _ = mimetypes.guess_type(filename)
  51. if mimetype is None:
  52. mimetype, _ = mimetypes.guess_type(url)
  53. if mimetype is None:
  54. # If guessing fails, use Content-Type from response headers
  55. mimetype = response.headers.get("Content-Type", "application/octet-stream")
  56. # Use python-magic to guess MIME type if still unknown or generic
  57. if mimetype == "application/octet-stream" and magic is not None:
  58. with contextlib.suppress(magic.MagicException):
  59. mimetype = magic.from_buffer(response.content[:1024], mime=True)
  60. extension = os.path.splitext(filename)[1]
  61. # Ensure filename has an extension
  62. if not extension:
  63. extension = mimetypes.guess_extension(mimetype) or ".bin"
  64. filename = f"{filename}{extension}"
  65. return FileInfo(
  66. filename=filename,
  67. extension=extension,
  68. mimetype=mimetype,
  69. size=int(response.headers.get("Content-Length", -1)),
  70. )