### What problem does this PR solve? - Replace hardcoded 255-byte file name length checks with FILE_NAME_LEN_LIMIT constant - Update error messages to show the actual limit value - #8290 ### Type of change - [x] Refactoring Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>tags/v0.19.1
| from flask_login import current_user, login_required | from flask_login import current_user, login_required | ||||
| from api import settings | from api import settings | ||||
| from api.constants import IMG_BASE64_PREFIX | |||||
| from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX | |||||
| from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus | from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus | ||||
| from api.db.db_models import File, Task | from api.db.db_models import File, Task | ||||
| from api.db.services import duplicate_name | from api.db.services import duplicate_name | ||||
| for file_obj in file_objs: | for file_obj in file_objs: | ||||
| if file_obj.filename == "": | if file_obj.filename == "": | ||||
| return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(file_obj.filename.encode("utf-8")) > 255: | |||||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||||
| return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| e, kb = KnowledgebaseService.get_by_id(kb_id) | e, kb = KnowledgebaseService.get_by_id(kb_id) | ||||
| if not e: | if not e: | ||||
| kb_id = req["kb_id"] | kb_id = req["kb_id"] | ||||
| if not kb_id: | if not kb_id: | ||||
| return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(req["name"].encode("utf-8")) > 255: | |||||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||||
| return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| if req["name"].strip() == "": | if req["name"].strip() == "": | ||||
| return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| req["name"] = req["name"].strip() | req["name"] = req["name"].strip() | ||||
| return get_data_error_result(message="Document not found!") | return get_data_error_result(message="Document not found!") | ||||
| if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | ||||
| return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(req["name"].encode("utf-8")) > 255: | |||||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||||
| return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): | for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): | ||||
| if d.name == req["name"]: | if d.name == req["name"]: |
| from pydantic import BaseModel, Field, validator | from pydantic import BaseModel, Field, validator | ||||
| from api import settings | from api import settings | ||||
| from api.constants import FILE_NAME_LEN_LIMIT | |||||
| from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus | from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus | ||||
| from api.db.db_models import File, Task | from api.db.db_models import File, Task | ||||
| from api.db.services.document_service import DocumentService | from api.db.services.document_service import DocumentService | ||||
| for file_obj in file_objs: | for file_obj in file_objs: | ||||
| if file_obj.filename == "": | if file_obj.filename == "": | ||||
| return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(file_obj.filename.encode("utf-8")) > 255: | |||||
| return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||||
| return get_result(message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| """ | """ | ||||
| # total size | # total size | ||||
| total_size = 0 | total_size = 0 | ||||
| DocumentService.update_meta_fields(document_id, req["meta_fields"]) | DocumentService.update_meta_fields(document_id, req["meta_fields"]) | ||||
| if "name" in req and req["name"] != doc.name: | if "name" in req and req["name"] != doc.name: | ||||
| if len(req["name"].encode("utf-8")) > 255: | |||||
| if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||||
| return get_result( | return get_result( | ||||
| message="File name must be 255 bytes or less.", | |||||
| message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", | |||||
| code=settings.RetCode.ARGUMENT_ERROR, | code=settings.RetCode.ARGUMENT_ERROR, | ||||
| ) | ) | ||||
| if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: |
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| NAME_LENGTH_LIMIT = 2 ** 10 | |||||
| NAME_LENGTH_LIMIT = 2**10 | |||||
| IMG_BASE64_PREFIX = 'data:image/png;base64,' | |||||
| IMG_BASE64_PREFIX = "data:image/png;base64," | |||||
| SERVICE_CONF = "service_conf.yaml" | SERVICE_CONF = "service_conf.yaml" | ||||
| REQUEST_MAX_WAIT_SEC = 300 | REQUEST_MAX_WAIT_SEC = 300 | ||||
| DATASET_NAME_LIMIT = 128 | DATASET_NAME_LIMIT = 128 | ||||
| FILE_NAME_LEN_LIMIT = 256 | |||||
| FILE_NAME_LEN_LIMIT = 255 |
| from flask_login import current_user | from flask_login import current_user | ||||
| from peewee import fn | from peewee import fn | ||||
| from api.constants import FILE_NAME_LEN_LIMIT | |||||
| from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType | from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType | ||||
| from api.db.db_models import DB, Document, File, File2Document, Knowledgebase | from api.db.db_models import DB, Document, File, File2Document, Knowledgebase | ||||
| from api.db.services import duplicate_name | from api.db.services import duplicate_name | ||||
| from api.utils import get_uuid | from api.utils import get_uuid | ||||
| from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img | from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img | ||||
| from rag.utils.storage_factory import STORAGE_IMPL | from rag.utils.storage_factory import STORAGE_IMPL | ||||
| from api.constants import FILE_NAME_LEN_LIMIT | |||||
| class FileService(CommonService): | class FileService(CommonService): | ||||
| # Service class for managing file operations and storage | # Service class for managing file operations and storage | ||||
| MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) | MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) | ||||
| if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: | if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: | ||||
| raise RuntimeError("Exceed the maximum file number of a free user!") | raise RuntimeError("Exceed the maximum file number of a free user!") | ||||
| if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT: | |||||
| raise RuntimeError("Exceed the maximum length of file name!") | |||||
| if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: | |||||
| raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.") | |||||
| filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id) | filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id) | ||||
| filetype = filename_type(filename) | filetype = filename_type(filename) | ||||
| if re.search(r"\.(eml)$", filename): | if re.search(r"\.(eml)$", filename): | ||||
| return ParserType.EMAIL.value | return ParserType.EMAIL.value | ||||
| return default | return default | ||||