Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import mimetypes
  2. import os
  3. import platform
  4. import re
  5. import urllib.parse
  6. import warnings
  7. from uuid import uuid4
  8. import httpx
  9. try:
  10. import magic
  11. except ImportError:
  12. if platform.system() == "Windows":
  13. warnings.warn(
  14. "To use python-magic guess MIMETYPE, you need to run `pip install python-magic-bin`", stacklevel=2
  15. )
  16. elif platform.system() == "Darwin":
  17. warnings.warn("To use python-magic guess MIMETYPE, you need to run `brew install libmagic`", stacklevel=2)
  18. elif platform.system() == "Linux":
  19. warnings.warn(
  20. "To use python-magic guess MIMETYPE, you need to run `sudo apt-get install libmagic1`", stacklevel=2
  21. )
  22. else:
  23. warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
  24. magic = None # type: ignore
  25. from pydantic import BaseModel
  26. class FileInfo(BaseModel):
  27. filename: str
  28. extension: str
  29. mimetype: str
  30. size: int
  31. def guess_file_info_from_response(response: httpx.Response):
  32. url = str(response.url)
  33. # Try to extract filename from URL
  34. parsed_url = urllib.parse.urlparse(url)
  35. url_path = parsed_url.path
  36. filename = os.path.basename(url_path)
  37. # If filename couldn't be extracted, use Content-Disposition header
  38. if not filename:
  39. content_disposition = response.headers.get("Content-Disposition")
  40. if content_disposition:
  41. filename_match = re.search(r'filename="?(.+)"?', content_disposition)
  42. if filename_match:
  43. filename = filename_match.group(1)
  44. # If still no filename, generate a unique one
  45. if not filename:
  46. unique_name = str(uuid4())
  47. filename = f"{unique_name}"
  48. # Guess MIME type from filename first, then URL
  49. mimetype, _ = mimetypes.guess_type(filename)
  50. if mimetype is None:
  51. mimetype, _ = mimetypes.guess_type(url)
  52. if mimetype is None:
  53. # If guessing fails, use Content-Type from response headers
  54. mimetype = response.headers.get("Content-Type", "application/octet-stream")
  55. # Use python-magic to guess MIME type if still unknown or generic
  56. if mimetype == "application/octet-stream" and magic is not None:
  57. try:
  58. mimetype = magic.from_buffer(response.content[:1024], mime=True)
  59. except magic.MagicException:
  60. pass
  61. extension = os.path.splitext(filename)[1]
  62. # Ensure filename has an extension
  63. if not extension:
  64. extension = mimetypes.guess_extension(mimetype) or ".bin"
  65. filename = f"{filename}{extension}"
  66. return FileInfo(
  67. filename=filename,
  68. extension=extension,
  69. mimetype=mimetype,
  70. size=int(response.headers.get("Content-Length", -1)),
  71. )