### What problem does this PR solve? Better uniform MIME content type. ### Type of change - [x] Refactoringtags/v0.20.0
| @@ -42,7 +42,7 @@ from api.utils.api_utils import ( | |||
| validate_request, | |||
| ) | |||
| from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail | |||
| from api.utils.web_utils import html2pdf, is_valid_url | |||
| from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url | |||
| from deepdoc.parser.html_parser import RAGFlowHtmlParser | |||
| from rag.nlp import search | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| @@ -505,12 +505,14 @@ def get(doc_id): | |||
| b, n = File2DocumentService.get_storage_address(doc_id=doc_id) | |||
| response = flask.make_response(STORAGE_IMPL.get(b, n)) | |||
| ext = re.search(r"\.([^.]+)$", doc.name) | |||
| ext = re.search(r"\.([^.]+)$", doc.name.lower()) | |||
| ext = ext.group(1) if ext else None | |||
| if ext: | |||
| if doc.type == FileType.VISUAL.value: | |||
| response.headers.set("Content-Type", "image/%s" % ext.group(1)) | |||
| content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}") | |||
| else: | |||
| response.headers.set("Content-Type", "application/%s" % ext.group(1)) | |||
| content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") | |||
| response.headers.set("Content-Type", content_type) | |||
| return response | |||
| except Exception as e: | |||
| return server_error_response(e) | |||
| @@ -31,6 +31,7 @@ from api.db.services.file_service import FileService | |||
| from api import settings | |||
| from api.utils.api_utils import get_json_result | |||
| from api.utils.file_utils import filename_type | |||
| from api.utils.web_utils import CONTENT_TYPE_MAP | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| @@ -334,15 +335,14 @@ def get(file_id): | |||
| blob = STORAGE_IMPL.get(b, n) | |||
| response = flask.make_response(blob) | |||
| ext = re.search(r"\.([^.]+)$", file.name) | |||
| ext = re.search(r"\.([^.]+)$", file.name.lower()) | |||
| ext = ext.group(1) if ext else None | |||
| if ext: | |||
| if file.type == FileType.VISUAL.value: | |||
| response.headers.set('Content-Type', 'image/%s' % ext.group(1)) | |||
| content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}") | |||
| else: | |||
| response.headers.set( | |||
| 'Content-Type', | |||
| 'application/%s' % | |||
| ext.group(1)) | |||
| content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") | |||
| response.headers.set("Content-Type", content_type) | |||
| return response | |||
| except Exception as e: | |||
| return server_error_response(e) | |||
| @@ -373,4 +373,4 @@ def move(): | |||
| FileService.move_file(file_ids, parent_id) | |||
| return get_json_result(data=True) | |||
| except Exception as e: | |||
| return server_error_response(e) | |||
| return server_error_response(e) | |||
| @@ -31,6 +31,51 @@ from selenium.webdriver.support.ui import WebDriverWait | |||
| from webdriver_manager.chrome import ChromeDriverManager | |||
| CONTENT_TYPE_MAP = { | |||
| # Office | |||
| "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |||
| "doc": "application/msword", | |||
| "pdf": "application/pdf", | |||
| "csv": "text/csv", | |||
| "xls": "application/vnd.ms-excel", | |||
| "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |||
| # Text/code | |||
| "txt": "text/plain", | |||
| "py": "text/plain", | |||
| "js": "text/plain", | |||
| "java": "text/plain", | |||
| "c": "text/plain", | |||
| "cpp": "text/plain", | |||
| "h": "text/plain", | |||
| "php": "text/plain", | |||
| "go": "text/plain", | |||
| "ts": "text/plain", | |||
| "sh": "text/plain", | |||
| "cs": "text/plain", | |||
| "kt": "text/plain", | |||
| "sql": "text/plain", | |||
| # Web | |||
| "md": "text/markdown", | |||
| "markdown": "text/markdown", | |||
| "htm": "text/html", | |||
| "html": "text/html", | |||
| "json": "application/json", | |||
| # Image formats | |||
| "png": "image/png", | |||
| "jpg": "image/jpeg", | |||
| "jpeg": "image/jpeg", | |||
| "gif": "image/gif", | |||
| "bmp": "image/bmp", | |||
| "tiff": "image/tiff", | |||
| "tif": "image/tiff", | |||
| "webp": "image/webp", | |||
| "svg": "image/svg+xml", | |||
| "ico": "image/x-icon", | |||
| "avif": "image/avif", | |||
| "heic": "image/heic", | |||
| } | |||
| def html2pdf( | |||
| source: str, | |||
| timeout: int = 2, | |||