Browse Source

Refa: better MIME content type (#8801)

### What problem does this PR solve?

Better uniform MIME content type.

### Type of change

- [x] Refactoring
tags/v0.20.0
Yongteng Lei 3 months ago
parent
commit
72c19b44c3
No account linked to committer's email address
3 changed files with 58 additions and 11 deletions
  1. 6
    4
      api/apps/document_app.py
  2. 7
    7
      api/apps/file_app.py
  3. 45
    0
      api/utils/web_utils.py

+ 6
- 4
api/apps/document_app.py View File

@@ -42,7 +42,7 @@ from api.utils.api_utils import (
validate_request,
)
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
from api.utils.web_utils import html2pdf, is_valid_url
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
from deepdoc.parser.html_parser import RAGFlowHtmlParser
from rag.nlp import search
from rag.utils.storage_factory import STORAGE_IMPL
@@ -505,12 +505,14 @@ def get(doc_id):
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
response = flask.make_response(STORAGE_IMPL.get(b, n))

ext = re.search(r"\.([^.]+)$", doc.name)
ext = re.search(r"\.([^.]+)$", doc.name.lower())
ext = ext.group(1) if ext else None
if ext:
if doc.type == FileType.VISUAL.value:
response.headers.set("Content-Type", "image/%s" % ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
response.headers.set("Content-Type", "application/%s" % ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
response.headers.set("Content-Type", content_type)
return response
except Exception as e:
return server_error_response(e)

+ 7
- 7
api/apps/file_app.py View File

@@ -31,6 +31,7 @@ from api.db.services.file_service import FileService
from api import settings
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from rag.utils.storage_factory import STORAGE_IMPL


@@ -334,15 +335,14 @@ def get(file_id):
blob = STORAGE_IMPL.get(b, n)

response = flask.make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
if ext:
if file.type == FileType.VISUAL.value:
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
response.headers.set(
'Content-Type',
'application/%s' %
ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
response.headers.set("Content-Type", content_type)
return response
except Exception as e:
return server_error_response(e)
@@ -373,4 +373,4 @@ def move():
FileService.move_file(file_ids, parent_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
return server_error_response(e)

+ 45
- 0
api/utils/web_utils.py View File

@@ -31,6 +31,51 @@ from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager


CONTENT_TYPE_MAP = {
# Office
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"doc": "application/msword",
"pdf": "application/pdf",
"csv": "text/csv",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
# Text/code
"txt": "text/plain",
"py": "text/plain",
"js": "text/plain",
"java": "text/plain",
"c": "text/plain",
"cpp": "text/plain",
"h": "text/plain",
"php": "text/plain",
"go": "text/plain",
"ts": "text/plain",
"sh": "text/plain",
"cs": "text/plain",
"kt": "text/plain",
"sql": "text/plain",
# Web
"md": "text/markdown",
"markdown": "text/markdown",
"htm": "text/html",
"html": "text/html",
"json": "application/json",
# Image formats
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"bmp": "image/bmp",
"tiff": "image/tiff",
"tif": "image/tiff",
"webp": "image/webp",
"svg": "image/svg+xml",
"ico": "image/x-icon",
"avif": "image/avif",
"heic": "image/heic",
}


def html2pdf(
source: str,
timeout: int = 2,

Loading…
Cancel
Save