|
|
|
@@ -1,4 +1,6 @@ |
|
|
|
import mimetypes |
|
|
|
import os |
|
|
|
import urllib.parse |
|
|
|
import uuid |
|
|
|
from collections.abc import Callable, Mapping, Sequence |
|
|
|
from typing import Any, cast |
|
|
|
@@ -240,16 +242,21 @@ def _build_from_remote_url( |
|
|
|
|
|
|
|
def _get_remote_file_info(url: str): |
|
|
|
file_size = -1 |
|
|
|
filename = url.split("/")[-1].split("?")[0] or "unknown_file" |
|
|
|
mime_type = mimetypes.guess_type(filename)[0] or "" |
|
|
|
parsed_url = urllib.parse.urlparse(url) |
|
|
|
url_path = parsed_url.path |
|
|
|
filename = os.path.basename(url_path) |
|
|
|
|
|
|
|
# Initialize mime_type from filename as fallback |
|
|
|
mime_type, _ = mimetypes.guess_type(filename) |
|
|
|
|
|
|
|
resp = ssrf_proxy.head(url, follow_redirects=True) |
|
|
|
resp = cast(httpx.Response, resp) |
|
|
|
if resp.status_code == httpx.codes.OK: |
|
|
|
if content_disposition := resp.headers.get("Content-Disposition"): |
|
|
|
filename = str(content_disposition.split("filename=")[-1].strip('"')) |
|
|
|
# Re-guess mime_type from updated filename |
|
|
|
mime_type, _ = mimetypes.guess_type(filename) |
|
|
|
file_size = int(resp.headers.get("Content-Length", file_size)) |
|
|
|
mime_type = mime_type or str(resp.headers.get("Content-Type", "")) |
|
|
|
|
|
|
|
return mime_type, filename, file_size |
|
|
|
|