Переглянути джерело

Refa: Implement centralized file name length limit using FILE_NAME_LEN_LIMIT constant (#8318)

### What problem does this PR solve?

- Replace hardcoded 255-byte file name length checks with
FILE_NAME_LEN_LIMIT constant
- Update error messages to show the actual limit value
- #8290

### Type of change

- [x] Refactoring

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
tags/v0.19.1
Liu An 4 місяці тому
джерело
коміт
0a13d79b94
Аккаунт користувача з таким Email не знайдено

+ 8
- 7
api/apps/document_app.py Переглянути файл

@@ -23,7 +23,7 @@ from flask import request
from flask_login import current_user, login_required

from api import settings
from api.constants import IMG_BASE64_PREFIX
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus
from api.db.db_models import File, Task
from api.db.services import duplicate_name
@@ -61,8 +61,8 @@ def upload():
for file_obj in file_objs:
if file_obj.filename == "":
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)

e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e:
@@ -149,8 +149,9 @@ def create():
kb_id = req["kb_id"]
if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)

if req["name"].strip() == "":
return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR)
req["name"] = req["name"].strip()
@@ -409,8 +410,8 @@ def rename():
return get_data_error_result(message="Document not found!")
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)

for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
if d.name == req["name"]:

+ 5
- 4
api/apps/sdk/doc.py Переглянути файл

@@ -25,6 +25,7 @@ from peewee import OperationalError
from pydantic import BaseModel, Field, validator

from api import settings
from api.constants import FILE_NAME_LEN_LIMIT
from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus
from api.db.db_models import File, Task
from api.db.services.document_service import DocumentService
@@ -129,8 +130,8 @@ def upload(dataset_id, tenant_id):
for file_obj in file_objs:
if file_obj.filename == "":
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > 255:
return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_result(message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
"""
# total size
total_size = 0
@@ -247,9 +248,9 @@ def update_doc(tenant_id, dataset_id, document_id):
DocumentService.update_meta_fields(document_id, req["meta_fields"])

if "name" in req and req["name"] != doc.name:
if len(req["name"].encode("utf-8")) > 255:
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_result(
message="File name must be 255 bytes or less.",
message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.",
code=settings.RetCode.ARGUMENT_ERROR,
)
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:

+ 3
- 3
api/constants.py Переглянути файл

@@ -13,9 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

NAME_LENGTH_LIMIT = 2 ** 10
NAME_LENGTH_LIMIT = 2**10

IMG_BASE64_PREFIX = 'data:image/png;base64,'
IMG_BASE64_PREFIX = "data:image/png;base64,"

SERVICE_CONF = "service_conf.yaml"

@@ -25,4 +25,4 @@ REQUEST_WAIT_SEC = 2
REQUEST_MAX_WAIT_SEC = 300

DATASET_NAME_LIMIT = 128
FILE_NAME_LEN_LIMIT = 256
FILE_NAME_LEN_LIMIT = 255

+ 4
- 4
api/db/services/file_service.py Переглянути файл

@@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor
from flask_login import current_user
from peewee import fn

from api.constants import FILE_NAME_LEN_LIMIT
from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType
from api.db.db_models import DB, Document, File, File2Document, Knowledgebase
from api.db.services import duplicate_name
@@ -30,7 +31,7 @@ from api.db.services.file2document_service import File2DocumentService
from api.utils import get_uuid
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
from rag.utils.storage_factory import STORAGE_IMPL
from api.constants import FILE_NAME_LEN_LIMIT

class FileService(CommonService):
# Service class for managing file operations and storage
@@ -412,8 +413,8 @@ class FileService(CommonService):
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
raise RuntimeError("Exceed the maximum file number of a free user!")
if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT:
raise RuntimeError("Exceed the maximum length of file name!")
if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.")

filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
filetype = filename_type(filename)
@@ -492,4 +493,3 @@ class FileService(CommonService):
if re.search(r"\.(eml)$", filename):
return ParserType.EMAIL.value
return default


Завантаження…
Відмінити
Зберегти