| import mimetypes | import mimetypes | ||||
| import os | |||||
| import urllib.parse | |||||
| import uuid | import uuid | ||||
| from collections.abc import Callable, Mapping, Sequence | from collections.abc import Callable, Mapping, Sequence | ||||
| from typing import Any, cast | from typing import Any, cast | ||||
| def _get_remote_file_info(url: str): | def _get_remote_file_info(url: str): | ||||
| file_size = -1 | file_size = -1 | ||||
| filename = url.split("/")[-1].split("?")[0] or "unknown_file" | |||||
| mime_type = mimetypes.guess_type(filename)[0] or "" | |||||
| parsed_url = urllib.parse.urlparse(url) | |||||
| url_path = parsed_url.path | |||||
| filename = os.path.basename(url_path) | |||||
| # Initialize mime_type from filename as fallback | |||||
| mime_type, _ = mimetypes.guess_type(filename) | |||||
| resp = ssrf_proxy.head(url, follow_redirects=True) | resp = ssrf_proxy.head(url, follow_redirects=True) | ||||
| resp = cast(httpx.Response, resp) | resp = cast(httpx.Response, resp) | ||||
| if resp.status_code == httpx.codes.OK: | if resp.status_code == httpx.codes.OK: | ||||
| if content_disposition := resp.headers.get("Content-Disposition"): | if content_disposition := resp.headers.get("Content-Disposition"): | ||||
| filename = str(content_disposition.split("filename=")[-1].strip('"')) | filename = str(content_disposition.split("filename=")[-1].strip('"')) | ||||
| # Re-guess mime_type from updated filename | |||||
| mime_type, _ = mimetypes.guess_type(filename) | |||||
| file_size = int(resp.headers.get("Content-Length", file_size)) | file_size = int(resp.headers.get("Content-Length", file_size)) | ||||
| mime_type = mime_type or str(resp.headers.get("Content-Type", "")) | |||||
| return mime_type, filename, file_size | return mime_type, filename, file_size | ||||