### What problem does this PR solve? - Add filename length validation (<=255 bytes) for document upload/rename in both HTTP and SDK APIs - Update error messages for consistency - Fix comparison operator in SDK from '>=' to '>' for filename length check ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.19.1
| for file_obj in file_objs: | for file_obj in file_objs: | ||||
| if file_obj.filename == "": | if file_obj.filename == "": | ||||
| return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(file_obj.filename.encode("utf-8")) > 255: | |||||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| e, kb = KnowledgebaseService.get_by_id(kb_id) | e, kb = KnowledgebaseService.get_by_id(kb_id) | ||||
| if not e: | if not e: | ||||
| kb_id = req["kb_id"] | kb_id = req["kb_id"] | ||||
| if not kb_id: | if not kb_id: | ||||
| return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(req["name"].encode("utf-8")) > 255: | |||||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| try: | try: | ||||
| e, kb = KnowledgebaseService.get_by_id(kb_id) | e, kb = KnowledgebaseService.get_by_id(kb_id) | ||||
| return get_data_error_result(message="Document not found!") | return get_data_error_result(message="Document not found!") | ||||
| if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | ||||
| return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) | return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(req["name"].encode("utf-8")) > 255: | |||||
| return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): | for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): | ||||
| if d.name == req["name"]: | if d.name == req["name"]: | ||||
| return get_data_error_result(message="Duplicated document name in the same knowledgebase.") | return get_data_error_result(message="Duplicated document name in the same knowledgebase.") |
| for file_obj in file_objs: | for file_obj in file_objs: | ||||
| if file_obj.filename == "": | if file_obj.filename == "": | ||||
| return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | ||||
| if len(file_obj.filename.encode("utf-8")) >= 128: | |||||
| return get_result(message="File name should be less than 128 bytes.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| if len(file_obj.filename.encode("utf-8")) > 255: | |||||
| return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) | |||||
| """ | """ | ||||
| # total size | # total size | ||||
| total_size = 0 | total_size = 0 | ||||
| DocumentService.update_meta_fields(document_id, req["meta_fields"]) | DocumentService.update_meta_fields(document_id, req["meta_fields"]) | ||||
| if "name" in req and req["name"] != doc.name: | if "name" in req and req["name"] != doc.name: | ||||
| if len(req["name"].encode("utf-8")) >= 128: | |||||
| if len(req["name"].encode("utf-8")) > 255: | |||||
| return get_result( | return get_result( | ||||
| message="The name should be less than 128 bytes.", | |||||
| message="File name must be 255 bytes or less.", | |||||
| code=settings.RetCode.ARGUMENT_ERROR, | code=settings.RetCode.ARGUMENT_ERROR, | ||||
| ) | ) | ||||
| if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: |
| INVALID_API_TOKEN = "invalid_key_123" | INVALID_API_TOKEN = "invalid_key_123" | ||||
| DATASET_NAME_LIMIT = 128 | DATASET_NAME_LIMIT = 128 | ||||
| DOCUMENT_NAME_LIMIT = 128 | |||||
| DOCUMENT_NAME_LIMIT = 255 | |||||
| CHAT_ASSISTANT_NAME_LIMIT = 255 | CHAT_ASSISTANT_NAME_LIMIT = 255 | ||||
| SESSION_WITH_CHAT_NAME_LIMIT = 255 | SESSION_WITH_CHAT_NAME_LIMIT = 255 |
| [ | [ | ||||
| ("new_name.txt", 0, ""), | ("new_name.txt", 0, ""), | ||||
| ( | ( | ||||
| f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", | |||||
| 101, | |||||
| "The name should be less than 128 bytes.", | |||||
| f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt", | |||||
| 0, | |||||
| "", | |||||
| ), | ), | ||||
| ( | ( | ||||
| 0, | 0, |
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import string | import string | ||||
| from concurrent.futures import ThreadPoolExecutor, as_completed | from concurrent.futures import ThreadPoolExecutor, as_completed | ||||
| assert res.json()["message"] == "No file selected!" | assert res.json()["message"] == "No file selected!" | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_filename_exceeds_max_length(self, HttpApiAuth, add_dataset_func, tmp_path): | |||||
| def test_filename_max_length(self, HttpApiAuth, add_dataset_func, tmp_path): | |||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| # filename_length = 129 | |||||
| fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") | |||||
| fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt") | |||||
| res = upload_documents(HttpApiAuth, dataset_id, [fp]) | res = upload_documents(HttpApiAuth, dataset_id, [fp]) | ||||
| assert res["code"] == 101 | |||||
| assert res["message"] == "File name should be less than 128 bytes." | |||||
| assert res["code"] == 0 | |||||
| assert res["data"][0]["name"] == fp.name | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_invalid_dataset_id(self, HttpApiAuth, tmp_path): | def test_invalid_dataset_id(self, HttpApiAuth, tmp_path): |
| "name, expected_message", | "name, expected_message", | ||||
| [ | [ | ||||
| ("new_name.txt", ""), | ("new_name.txt", ""), | ||||
| (f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", "The name should be less than 128 bytes"), | |||||
| (f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt", ""), | |||||
| (0, "AttributeError"), | (0, "AttributeError"), | ||||
| (None, "AttributeError"), | (None, "AttributeError"), | ||||
| ("", "The extension of file can't be changed"), | ("", "The extension of file can't be changed"), |
| assert str(excinfo.value) == "No file selected!", str(excinfo.value) | assert str(excinfo.value) == "No file selected!", str(excinfo.value) | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_filename_exceeds_max_length(self, add_dataset_func, tmp_path): | |||||
| def test_filename_max_length(self, add_dataset_func, tmp_path): | |||||
| dataset = add_dataset_func | dataset = add_dataset_func | ||||
| fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") | |||||
| fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt") | |||||
| with fp.open("rb") as f: | with fp.open("rb") as f: | ||||
| blob = f.read() | blob = f.read() | ||||
| with pytest.raises(Exception) as excinfo: | |||||
| dataset.upload_documents([{"display_name": fp.name, "blob": blob}]) | |||||
| assert str(excinfo.value) == "File name should be less than 128 bytes.", str(excinfo.value) | |||||
| documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}]) | |||||
| for document in documents: | |||||
| assert document.dataset_id == dataset.id, str(document) | |||||
| assert document.name == fp.name, str(document) | |||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_duplicate_files(self, add_dataset_func, tmp_path): | def test_duplicate_files(self, add_dataset_func, tmp_path): |