### What problem does this PR solve? - Replace hardcoded 255-byte file name length checks with FILE_NAME_LEN_LIMIT constant - Update error messages to show the actual limit value - #8290 ### Type of change - [x] Refactoring Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>tags/v0.19.1
| @@ -23,7 +23,7 @@ from flask import request | |||
| from flask_login import current_user, login_required | |||
| from api import settings | |||
| from api.constants import IMG_BASE64_PREFIX | |||
| from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX | |||
| from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus | |||
| from api.db.db_models import File, Task | |||
| from api.db.services import duplicate_name | |||
| @@ -61,8 +61,8 @@ def upload(): | |||
| for file_obj in file_objs: | |||
| if file_obj.filename == "": | |||
| return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(file_obj.filename.encode("utf-8")) > 255: | |||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||
| return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| e, kb = KnowledgebaseService.get_by_id(kb_id) | |||
| if not e: | |||
| @@ -149,8 +149,9 @@ def create(): | |||
| kb_id = req["kb_id"] | |||
| if not kb_id: | |||
| return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(req["name"].encode("utf-8")) > 255: | |||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||
| return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if req["name"].strip() == "": | |||
| return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| req["name"] = req["name"].strip() | |||
| @@ -409,8 +410,8 @@ def rename(): | |||
| return get_data_error_result(message="Document not found!") | |||
| if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | |||
| return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(req["name"].encode("utf-8")) > 255: | |||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||
| return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): | |||
| if d.name == req["name"]: | |||
| @@ -25,6 +25,7 @@ from peewee import OperationalError | |||
| from pydantic import BaseModel, Field, validator | |||
| from api import settings | |||
| from api.constants import FILE_NAME_LEN_LIMIT | |||
| from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus | |||
| from api.db.db_models import File, Task | |||
| from api.db.services.document_service import DocumentService | |||
| @@ -129,8 +130,8 @@ def upload(dataset_id, tenant_id): | |||
| for file_obj in file_objs: | |||
| if file_obj.filename == "": | |||
| return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(file_obj.filename.encode("utf-8")) > 255: | |||
| return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||
| return get_result(message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||
| """ | |||
| # total size | |||
| total_size = 0 | |||
| @@ -247,9 +248,9 @@ def update_doc(tenant_id, dataset_id, document_id): | |||
| DocumentService.update_meta_fields(document_id, req["meta_fields"]) | |||
| if "name" in req and req["name"] != doc.name: | |||
| if len(req["name"].encode("utf-8")) > 255: | |||
| if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||
| return get_result( | |||
| message="File name must be 255 bytes or less.", | |||
| message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", | |||
| code=settings.RetCode.ARGUMENT_ERROR, | |||
| ) | |||
| if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | |||
| @@ -13,9 +13,9 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| NAME_LENGTH_LIMIT = 2 ** 10 | |||
| NAME_LENGTH_LIMIT = 2**10 | |||
| IMG_BASE64_PREFIX = 'data:image/png;base64,' | |||
| IMG_BASE64_PREFIX = "data:image/png;base64," | |||
| SERVICE_CONF = "service_conf.yaml" | |||
| @@ -25,4 +25,4 @@ REQUEST_WAIT_SEC = 2 | |||
| REQUEST_MAX_WAIT_SEC = 300 | |||
| DATASET_NAME_LIMIT = 128 | |||
| FILE_NAME_LEN_LIMIT = 256 | |||
| FILE_NAME_LEN_LIMIT = 255 | |||
| @@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor | |||
| from flask_login import current_user | |||
| from peewee import fn | |||
| from api.constants import FILE_NAME_LEN_LIMIT | |||
| from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType | |||
| from api.db.db_models import DB, Document, File, File2Document, Knowledgebase | |||
| from api.db.services import duplicate_name | |||
| @@ -30,7 +31,7 @@ from api.db.services.file2document_service import File2DocumentService | |||
| from api.utils import get_uuid | |||
| from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| from api.constants import FILE_NAME_LEN_LIMIT | |||
| class FileService(CommonService): | |||
| # Service class for managing file operations and storage | |||
| @@ -412,8 +413,8 @@ class FileService(CommonService): | |||
| MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) | |||
| if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: | |||
| raise RuntimeError("Exceed the maximum file number of a free user!") | |||
| if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT: | |||
| raise RuntimeError("Exceed the maximum length of file name!") | |||
| if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||
| raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.") | |||
| filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id) | |||
| filetype = filename_type(filename) | |||
| @@ -492,4 +493,3 @@ class FileService(CommonService): | |||
| if re.search(r"\.(eml)$", filename): | |||
| return ParserType.EMAIL.value | |||
| return default | |||