### What problem does this PR solve? In MySQL, when the thumbnail base64 of a document is relatively large, the display of the document's thumbnail fails. Now, I put the document thumbnail into MiniIO storage. ### Type of change - [✓] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: chongchuanbing <chongchuanbing@gmail.com>tags/v0.13.0
| @@ -51,6 +51,7 @@ from api.utils.api_utils import get_json_result | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory | |||
| from api.utils.web_utils import html2pdf, is_valid_url | |||
| from api.contants import IMG_BASE64_PREFIX | |||
| @manager.route('/upload', methods=['POST']) | |||
| @@ -209,6 +210,11 @@ def list_docs(): | |||
| try: | |||
| docs, tol = DocumentService.get_by_kb_id( | |||
| kb_id, page_number, items_per_page, orderby, desc, keywords) | |||
| for doc_item in docs: | |||
| if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): | |||
| doc_item['thumbnail'] = f'/v1/document/image/{kb_id}-{doc_item['thumbnail']}' | |||
| return get_json_result(data={"total": tol, "docs": docs}) | |||
| except Exception as e: | |||
| return server_error_response(e) | |||
| @@ -13,4 +13,6 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| NAME_LENGTH_LIMIT = 2 ** 10 | |||
| NAME_LENGTH_LIMIT = 2 ** 10 | |||
| IMG_BASE64_PREFIX = 'data:image/png;base64,' | |||
| @@ -26,7 +26,7 @@ from api.db.services.common_service import CommonService | |||
| from api.db.services.document_service import DocumentService | |||
| from api.db.services.file2document_service import File2DocumentService | |||
| from api.utils import get_uuid | |||
| from api.utils.file_utils import filename_type, thumbnail | |||
| from api.utils.file_utils import filename_type, thumbnail_img | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| @@ -354,8 +354,15 @@ class FileService(CommonService): | |||
| location += "_" | |||
| blob = file.read() | |||
| STORAGE_IMPL.put(kb.id, location, blob) | |||
| doc_id = get_uuid() | |||
| img = thumbnail_img(filename, blob) | |||
| thumbnail_location = f'thumbnail_{doc_id}.png' | |||
| STORAGE_IMPL.put(kb.id, thumbnail_location, img) | |||
| doc = { | |||
| "id": get_uuid(), | |||
| "id": doc_id, | |||
| "kb_id": kb.id, | |||
| "parser_id": self.get_parser(filetype, filename, kb.parser_id), | |||
| "parser_config": kb.parser_config, | |||
| @@ -364,7 +371,7 @@ class FileService(CommonService): | |||
| "name": filename, | |||
| "location": location, | |||
| "size": len(blob), | |||
| "thumbnail": thumbnail(filename, blob) | |||
| "thumbnail": thumbnail_location | |||
| } | |||
| DocumentService.insert(doc) | |||
| @@ -25,6 +25,7 @@ from cachetools import LRUCache, cached | |||
| from ruamel.yaml import YAML | |||
| from api.db import FileType | |||
| from api.contants import IMG_BASE64_PREFIX | |||
| PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE") | |||
| RAG_BASE = os.getenv("RAG_BASE") | |||
| @@ -168,23 +169,20 @@ def filename_type(filename): | |||
| return FileType.OTHER.value | |||
| def thumbnail(filename, blob): | |||
| def thumbnail_img(filename, blob): | |||
| filename = filename.lower() | |||
| if re.match(r".*\.pdf$", filename): | |||
| pdf = pdfplumber.open(BytesIO(blob)) | |||
| buffered = BytesIO() | |||
| pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png") | |||
| return "data:image/png;base64," + \ | |||
| base64.b64encode(buffered.getvalue()).decode("utf-8") | |||
| return buffered.getvalue() | |||
| if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): | |||
| image = Image.open(BytesIO(blob)) | |||
| image.thumbnail((30, 30)) | |||
| buffered = BytesIO() | |||
| image.save(buffered, format="png") | |||
| return "data:image/png;base64," + \ | |||
| base64.b64encode(buffered.getvalue()).decode("utf-8") | |||
| return buffered.getvalue() | |||
| if re.match(r".*\.(ppt|pptx)$", filename): | |||
| import aspose.slides as slides | |||
| @@ -194,11 +192,15 @@ def thumbnail(filename, blob): | |||
| buffered = BytesIO() | |||
| presentation.slides[0].get_thumbnail(0.03, 0.03).save( | |||
| buffered, drawing.imaging.ImageFormat.png) | |||
| return "data:image/png;base64," + \ | |||
| base64.b64encode(buffered.getvalue()).decode("utf-8") | |||
| return buffered.getvalue() | |||
| except Exception as e: | |||
| pass | |||
| return None | |||
| def thumbnail(filename, blob): | |||
| img = thumbnail_img(filename, blob) | |||
| return IMG_BASE64_PREFIX + \ | |||
| base64.b64encode(img).decode("utf-8") | |||
| def traversal_files(base): | |||
| for root, ds, fs in os.walk(base): | |||