### What problem does this PR solve? Fix the bug causing garbled text #3613 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>tags/v0.14.1
| @@ -162,9 +162,9 @@ def rm(): | |||
| message="Database error (Document removal)!") | |||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||
| File2DocumentService.delete_by_document_id(doc.id) | |||
| FileService.filter_delete( | |||
| [File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||
| if not KnowledgebaseService.delete_by_id(req["kb_id"]): | |||
| return get_data_error_result( | |||
| message="Database error (Knowledgebase removal)!") | |||
| @@ -252,9 +252,9 @@ def delete(tenant_id): | |||
| File.id == f2d[0].file_id, | |||
| ] | |||
| ) | |||
| FileService.filter_delete( | |||
| [File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||
| File2DocumentService.delete_by_document_id(doc.id) | |||
| FileService.filter_delete( | |||
| [File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||
| if not KnowledgebaseService.delete_by_id(id): | |||
| return get_error_data_result(message="Delete dataset error.(Database error)") | |||
| return get_result(code=settings.RetCode.SUCCESS) | |||
| @@ -28,6 +28,8 @@ from cn2an import cn2an | |||
| from PIL import Image | |||
| import json | |||
| import chardet | |||
| all_codecs = [ | |||
| 'utf-8', 'gb2312', 'gbk', 'utf_16', 'ascii', 'big5', 'big5hkscs', | |||
| 'cp037', 'cp273', 'cp424', 'cp437', | |||
| @@ -43,12 +45,17 @@ all_codecs = [ | |||
| 'iso8859_14', 'iso8859_15', 'iso8859_16', 'johab', 'koi8_r', 'koi8_t', 'koi8_u', | |||
| 'kz1048', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2', 'mac_roman', | |||
| 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213', | |||
| 'utf_32', 'utf_32_be', 'utf_32_le''utf_16_be', 'utf_16_le', 'utf_7' | |||
| 'utf_32', 'utf_32_be', 'utf_32_le', 'utf_16_be', 'utf_16_le', 'utf_7', 'windows-1250', 'windows-1251', | |||
| 'windows-1252', 'windows-1253', 'windows-1254', 'windows-1255', 'windows-1256', | |||
| 'windows-1257', 'windows-1258', 'latin-2' | |||
| ] | |||
| def find_codec(blob): | |||
| global all_codecs | |||
| detected = chardet.detect(blob[:1024]) | |||
| if detected['confidence'] > 0.5: | |||
| return detected['encoding'] | |||
| for c in all_codecs: | |||
| try: | |||
| blob[:1024].decode(c) | |||