Quellcode durchsuchen

fix(api): enhance file factory URL handling (#9631)

tags/0.10.1
-LAN- vor 1 Jahr
Ursprung
Commit
ef5f476cd6
Es ist kein Account mit der E-Mail-Adresse des Committers verbunden
1 geänderte Dateien mit 17 neuen und 12 gelöschten Zeilen
  1. 17
    12
      api/factories/file_factory.py

+ 17
- 12
api/factories/file_factory.py Datei anzeigen

@@ -2,6 +2,7 @@ import mimetypes
from collections.abc import Mapping, Sequence
from typing import Any

import httpx
from sqlalchemy import select

from constants import AUDIO_EXTENSIONS, DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS
@@ -154,7 +155,7 @@ def _build_from_local_file(
file = File(
id=mapping.get("id"),
filename=row.name,
extension=row.extension,
extension="." + row.extension,
mime_type=row.mime_type,
tenant_id=tenant_id,
type=file_type,
@@ -177,25 +178,29 @@ def _build_from_remote_url(
url = mapping.get("url")
if not url:
raise ValueError("Invalid file url")
resp = ssrf_proxy.head(url, follow_redirects=True)
resp.raise_for_status()

# Try to extract filename from response headers or URL
content_disposition = resp.headers.get("Content-Disposition")
if content_disposition:
filename = content_disposition.split("filename=")[-1].strip('"')
resp = ssrf_proxy.head(url, follow_redirects=True)
if resp.status_code == httpx.codes.OK:
# Try to extract filename from response headers or URL
content_disposition = resp.headers.get("Content-Disposition")
if content_disposition:
filename = content_disposition.split("filename=")[-1].strip('"')
else:
filename = url.split("/")[-1].split("?")[0]
# Create the File object
file_size = int(resp.headers.get("Content-Length", -1))
mime_type = str(resp.headers.get("Content-Type", ""))
else:
filename = url.split("/")[-1].split("?")[0]
filename = ""
file_size = -1
mime_type = ""

# If filename is empty, set a default one
if not filename:
filename = "unknown_file"

# Determine file extension
extension = "." + filename.split(".")[-1] if "." in filename else ".bin"

# Create the File object
file_size = int(resp.headers.get("Content-Length", -1))
mime_type = str(resp.headers.get("Content-Type", ""))
if not mime_type:
mime_type, _ = mimetypes.guess_type(url)
file = File(

Laden…
Abbrechen
Speichern