### What problem does this PR solve? In MySQL, when the thumbnail base64 of a document is relatively large, the display of the document's thumbnail fails. Now, I put the document thumbnail into MiniIO storage. ### Type of change - [✓] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: chongchuanbing <chongchuanbing@gmail.com>tags/v0.13.0
| from rag.utils.storage_factory import STORAGE_IMPL | from rag.utils.storage_factory import STORAGE_IMPL | ||||
| from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory | from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory | ||||
| from api.utils.web_utils import html2pdf, is_valid_url | from api.utils.web_utils import html2pdf, is_valid_url | ||||
| from api.contants import IMG_BASE64_PREFIX | |||||
| @manager.route('/upload', methods=['POST']) | @manager.route('/upload', methods=['POST']) | ||||
| try: | try: | ||||
| docs, tol = DocumentService.get_by_kb_id( | docs, tol = DocumentService.get_by_kb_id( | ||||
| kb_id, page_number, items_per_page, orderby, desc, keywords) | kb_id, page_number, items_per_page, orderby, desc, keywords) | ||||
| for doc_item in docs: | |||||
| if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): | |||||
| doc_item['thumbnail'] = f'/v1/document/image/{kb_id}-{doc_item['thumbnail']}' | |||||
| return get_json_result(data={"total": tol, "docs": docs}) | return get_json_result(data={"total": tol, "docs": docs}) | ||||
| except Exception as e: | except Exception as e: | ||||
| return server_error_response(e) | return server_error_response(e) |
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| NAME_LENGTH_LIMIT = 2 ** 10 | |||||
| NAME_LENGTH_LIMIT = 2 ** 10 | |||||
| IMG_BASE64_PREFIX = 'data:image/png;base64,' |
| from api.db.services.document_service import DocumentService | from api.db.services.document_service import DocumentService | ||||
| from api.db.services.file2document_service import File2DocumentService | from api.db.services.file2document_service import File2DocumentService | ||||
| from api.utils import get_uuid | from api.utils import get_uuid | ||||
| from api.utils.file_utils import filename_type, thumbnail | |||||
| from api.utils.file_utils import filename_type, thumbnail_img | |||||
| from rag.utils.storage_factory import STORAGE_IMPL | from rag.utils.storage_factory import STORAGE_IMPL | ||||
| location += "_" | location += "_" | ||||
| blob = file.read() | blob = file.read() | ||||
| STORAGE_IMPL.put(kb.id, location, blob) | STORAGE_IMPL.put(kb.id, location, blob) | ||||
| doc_id = get_uuid() | |||||
| img = thumbnail_img(filename, blob) | |||||
| thumbnail_location = f'thumbnail_{doc_id}.png' | |||||
| STORAGE_IMPL.put(kb.id, thumbnail_location, img) | |||||
| doc = { | doc = { | ||||
| "id": get_uuid(), | |||||
| "id": doc_id, | |||||
| "kb_id": kb.id, | "kb_id": kb.id, | ||||
| "parser_id": self.get_parser(filetype, filename, kb.parser_id), | "parser_id": self.get_parser(filetype, filename, kb.parser_id), | ||||
| "parser_config": kb.parser_config, | "parser_config": kb.parser_config, | ||||
| "name": filename, | "name": filename, | ||||
| "location": location, | "location": location, | ||||
| "size": len(blob), | "size": len(blob), | ||||
| "thumbnail": thumbnail(filename, blob) | |||||
| "thumbnail": thumbnail_location | |||||
| } | } | ||||
| DocumentService.insert(doc) | DocumentService.insert(doc) | ||||
| from ruamel.yaml import YAML | from ruamel.yaml import YAML | ||||
| from api.db import FileType | from api.db import FileType | ||||
| from api.contants import IMG_BASE64_PREFIX | |||||
| PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE") | PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE") | ||||
| RAG_BASE = os.getenv("RAG_BASE") | RAG_BASE = os.getenv("RAG_BASE") | ||||
| return FileType.OTHER.value | return FileType.OTHER.value | ||||
| def thumbnail(filename, blob): | |||||
| def thumbnail_img(filename, blob): | |||||
| filename = filename.lower() | filename = filename.lower() | ||||
| if re.match(r".*\.pdf$", filename): | if re.match(r".*\.pdf$", filename): | ||||
| pdf = pdfplumber.open(BytesIO(blob)) | pdf = pdfplumber.open(BytesIO(blob)) | ||||
| buffered = BytesIO() | buffered = BytesIO() | ||||
| pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png") | pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png") | ||||
| return "data:image/png;base64," + \ | |||||
| base64.b64encode(buffered.getvalue()).decode("utf-8") | |||||
| return buffered.getvalue() | |||||
| if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): | if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): | ||||
| image = Image.open(BytesIO(blob)) | image = Image.open(BytesIO(blob)) | ||||
| image.thumbnail((30, 30)) | image.thumbnail((30, 30)) | ||||
| buffered = BytesIO() | buffered = BytesIO() | ||||
| image.save(buffered, format="png") | image.save(buffered, format="png") | ||||
| return "data:image/png;base64," + \ | |||||
| base64.b64encode(buffered.getvalue()).decode("utf-8") | |||||
| return buffered.getvalue() | |||||
| if re.match(r".*\.(ppt|pptx)$", filename): | if re.match(r".*\.(ppt|pptx)$", filename): | ||||
| import aspose.slides as slides | import aspose.slides as slides | ||||
| buffered = BytesIO() | buffered = BytesIO() | ||||
| presentation.slides[0].get_thumbnail(0.03, 0.03).save( | presentation.slides[0].get_thumbnail(0.03, 0.03).save( | ||||
| buffered, drawing.imaging.ImageFormat.png) | buffered, drawing.imaging.ImageFormat.png) | ||||
| return "data:image/png;base64," + \ | |||||
| base64.b64encode(buffered.getvalue()).decode("utf-8") | |||||
| return buffered.getvalue() | |||||
| except Exception as e: | except Exception as e: | ||||
| pass | pass | ||||
| return None | |||||
| def thumbnail(filename, blob): | |||||
| img = thumbnail_img(filename, blob) | |||||
| return IMG_BASE64_PREFIX + \ | |||||
| base64.b64encode(img).decode("utf-8") | |||||
| def traversal_files(base): | def traversal_files(base): | ||||
| for root, ds, fs in os.walk(base): | for root, ds, fs in os.walk(base): |