Bläddra i källkod

Refa: Implement centralized file name length limit using FILE_NAME_LEN_LIMIT constant (#8318)

### What problem does this PR solve?

- Replace hardcoded 255-byte file name length checks with
FILE_NAME_LEN_LIMIT constant
- Update error messages to show the actual limit value
- #8290

### Type of change

- [x] Refactoring

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
tags/v0.19.1
Liu An 4 månader sedan
förälder
incheckning
0a13d79b94
Inget konto är kopplat till bidragsgivarens mejladress
4 ändrade filer med 20 tillägg och 18 borttagningar
  1. 8
    7
      api/apps/document_app.py
  2. 5
    4
      api/apps/sdk/doc.py
  3. 3
    3
      api/constants.py
  4. 4
    4
      api/db/services/file_service.py

+ 8
- 7
api/apps/document_app.py Visa fil

from flask_login import current_user, login_required from flask_login import current_user, login_required


from api import settings from api import settings
from api.constants import IMG_BASE64_PREFIX
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus
from api.db.db_models import File, Task from api.db.db_models import File, Task
from api.db.services import duplicate_name from api.db.services import duplicate_name
for file_obj in file_objs: for file_obj in file_objs:
if file_obj.filename == "": if file_obj.filename == "":
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)


e, kb = KnowledgebaseService.get_by_id(kb_id) e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e: if not e:
kb_id = req["kb_id"] kb_id = req["kb_id"]
if not kb_id: if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)

if req["name"].strip() == "": if req["name"].strip() == "":
return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR)
req["name"] = req["name"].strip() req["name"] = req["name"].strip()
return get_data_error_result(message="Document not found!") return get_data_error_result(message="Document not found!")
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)


for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
if d.name == req["name"]: if d.name == req["name"]:

+ 5
- 4
api/apps/sdk/doc.py Visa fil

from pydantic import BaseModel, Field, validator from pydantic import BaseModel, Field, validator


from api import settings from api import settings
from api.constants import FILE_NAME_LEN_LIMIT
from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus
from api.db.db_models import File, Task from api.db.db_models import File, Task
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
for file_obj in file_objs: for file_obj in file_objs:
if file_obj.filename == "": if file_obj.filename == "":
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > 255:
return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_result(message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
""" """
# total size # total size
total_size = 0 total_size = 0
DocumentService.update_meta_fields(document_id, req["meta_fields"]) DocumentService.update_meta_fields(document_id, req["meta_fields"])


if "name" in req and req["name"] != doc.name: if "name" in req and req["name"] != doc.name:
if len(req["name"].encode("utf-8")) > 255:
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_result( return get_result(
message="File name must be 255 bytes or less.",
message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.",
code=settings.RetCode.ARGUMENT_ERROR, code=settings.RetCode.ARGUMENT_ERROR,
) )
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:

+ 3
- 3
api/constants.py Visa fil

# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.


NAME_LENGTH_LIMIT = 2 ** 10
NAME_LENGTH_LIMIT = 2**10


IMG_BASE64_PREFIX = 'data:image/png;base64,'
IMG_BASE64_PREFIX = "data:image/png;base64,"


SERVICE_CONF = "service_conf.yaml" SERVICE_CONF = "service_conf.yaml"


REQUEST_MAX_WAIT_SEC = 300 REQUEST_MAX_WAIT_SEC = 300


DATASET_NAME_LIMIT = 128 DATASET_NAME_LIMIT = 128
FILE_NAME_LEN_LIMIT = 256
FILE_NAME_LEN_LIMIT = 255

+ 4
- 4
api/db/services/file_service.py Visa fil

from flask_login import current_user from flask_login import current_user
from peewee import fn from peewee import fn


from api.constants import FILE_NAME_LEN_LIMIT
from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType
from api.db.db_models import DB, Document, File, File2Document, Knowledgebase from api.db.db_models import DB, Document, File, File2Document, Knowledgebase
from api.db.services import duplicate_name from api.db.services import duplicate_name
from api.utils import get_uuid from api.utils import get_uuid
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
from rag.utils.storage_factory import STORAGE_IMPL from rag.utils.storage_factory import STORAGE_IMPL
from api.constants import FILE_NAME_LEN_LIMIT


class FileService(CommonService): class FileService(CommonService):
# Service class for managing file operations and storage # Service class for managing file operations and storage
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
raise RuntimeError("Exceed the maximum file number of a free user!") raise RuntimeError("Exceed the maximum file number of a free user!")
if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT:
raise RuntimeError("Exceed the maximum length of file name!")
if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.")


filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id) filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
filetype = filename_type(filename) filetype = filename_type(filename)
if re.search(r"\.(eml)$", filename): if re.search(r"\.(eml)$", filename):
return ParserType.EMAIL.value return ParserType.EMAIL.value
return default return default


Laddar…
Avbryt
Spara