### What problem does this PR solve? fix document typo in test ### Type of change - [x] Typotags/v0.19.1
| """ | """ | ||||
| Verifies availability of an embedding model for a specific tenant. | Verifies availability of an embedding model for a specific tenant. | ||||
| Implements a four-stage validation process: | |||||
| 1. Model identifier parsing and validation | |||||
| 2. System support verification | |||||
| 3. Tenant authorization check | |||||
| 4. Database operation error handling | |||||
| Performs comprehensive verification through: | |||||
| 1. Identifier Parsing: Decomposes embd_id into name and factory components | |||||
| 2. System Verification: Checks model registration in LLMService | |||||
| 3. Tenant Authorization: Validates tenant-specific model assignments | |||||
| 4. Built-in Model Check: Confirms inclusion in predefined system models | |||||
| Args: | Args: | ||||
| embd_id (str): Unique identifier for the embedding model in format "model_name@factory" | embd_id (str): Unique identifier for the embedding model in format "model_name@factory" | ||||
| try: | try: | ||||
| llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(embd_id) | llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(embd_id) | ||||
| in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding")) | in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding")) | ||||
| # Tongyi-Qianwen is added to TenantLLM by default, but remains unusable with empty api_key | |||||
| tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id) | tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id) | ||||
| is_tenant_model = any(llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for llm in tenant_llms) | is_tenant_model = any(llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for llm in tenant_llms) | ||||
| is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS | is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS | ||||
| if not ((is_builtin_model or is_tenant_model or in_llm_service)): | |||||
| if not (is_builtin_model or is_tenant_model or in_llm_service): | |||||
| return False, get_error_argument_result(f"Unsupported model: <{embd_id}>") | return False, get_error_argument_result(f"Unsupported model: <{embd_id}>") | ||||
| if not (is_builtin_model or is_tenant_model): | if not (is_builtin_model or is_tenant_model): |
| # FILE MANAGEMENT WITHIN DATASET | # FILE MANAGEMENT WITHIN DATASET | ||||
| def upload_documnets(auth, dataset_id, files_path=None): | |||||
| def upload_documents(auth, dataset_id, files_path=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | ||||
| if files_path is None: | if files_path is None: | ||||
| return res | return res | ||||
| def list_documnets(auth, dataset_id, params=None): | |||||
| def list_documents(auth, dataset_id, params=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.get(url=url, headers=HEADERS, auth=auth, params=params) | res = requests.get(url=url, headers=HEADERS, auth=auth, params=params) | ||||
| return res.json() | return res.json() | ||||
| def update_documnet(auth, dataset_id, document_id, payload=None): | |||||
| def update_document(auth, dataset_id, document_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}/{document_id}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_API_URL}/{document_id}".format(dataset_id=dataset_id) | ||||
| res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| def delete_documnets(auth, dataset_id, payload=None): | |||||
| def delete_documents(auth, dataset_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| def parse_documnets(auth, dataset_id, payload=None): | |||||
| def parse_documents(auth, dataset_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| def stop_parse_documnets(auth, dataset_id, payload=None): | |||||
| def stop_parse_documents(auth, dataset_id, payload=None): | |||||
| url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) | ||||
| res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) | ||||
| return res.json() | return res.json() | ||||
| for i in range(num): | for i in range(num): | ||||
| fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt") | fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt") | ||||
| fps.append(fp) | fps.append(fp) | ||||
| res = upload_documnets(auth, dataset_id, fps) | |||||
| res = upload_documents(auth, dataset_id, fps) | |||||
| document_ids = [] | document_ids = [] | ||||
| for document in res["data"]: | for document in res["data"]: | ||||
| document_ids.append(document["id"]) | document_ids.append(document["id"]) |
| delete_chat_assistants, | delete_chat_assistants, | ||||
| delete_datasets, | delete_datasets, | ||||
| delete_session_with_chat_assistants, | delete_session_with_chat_assistants, | ||||
| list_documnets, | |||||
| parse_documnets, | |||||
| list_documents, | |||||
| parse_documents, | |||||
| ) | ) | ||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from utils import wait_for | from utils import wait_for | ||||
| @wait_for(30, 1, "Document parsing timeout") | @wait_for(30, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnets(_auth, _dataset_id) | |||||
| res = list_documents(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| @pytest.fixture | |||||
| def generate_test_files(request, tmp_path): | |||||
| file_creators = { | |||||
| "docx": (tmp_path / "ragflow_test.docx", create_docx_file), | |||||
| "excel": (tmp_path / "ragflow_test.xlsx", create_excel_file), | |||||
| "ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file), | |||||
| "image": (tmp_path / "ragflow_test.png", create_image_file), | |||||
| "pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file), | |||||
| "txt": (tmp_path / "ragflow_test.txt", create_txt_file), | |||||
| "md": (tmp_path / "ragflow_test.md", create_md_file), | |||||
| "json": (tmp_path / "ragflow_test.json", create_json_file), | |||||
| "eml": (tmp_path / "ragflow_test.eml", create_eml_file), | |||||
| "html": (tmp_path / "ragflow_test.html", create_html_file), | |||||
| } | |||||
| files = {} | |||||
| for file_type, (file_path, creator_func) in file_creators.items(): | |||||
| if request.param in ["", file_type]: | |||||
| creator_func(file_path) | |||||
| files[file_type] = file_path | |||||
| return files | |||||
| @pytest.fixture(scope="class") | |||||
| def ragflow_tmp_dir(request, tmp_path_factory): | |||||
| class_name = request.cls.__name__ | |||||
| return tmp_path_factory.mktemp(class_name) | |||||
| @pytest.fixture(scope="session") | @pytest.fixture(scope="session") | ||||
| def api_key(token): | def api_key(token): | ||||
| return RAGFlowHttpApiAuth(token) | return RAGFlowHttpApiAuth(token) | ||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| @pytest.fixture | |||||
| def generate_test_files(request, tmp_path): | |||||
| file_creators = { | |||||
| "docx": (tmp_path / "ragflow_test.docx", create_docx_file), | |||||
| "excel": (tmp_path / "ragflow_test.xlsx", create_excel_file), | |||||
| "ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file), | |||||
| "image": (tmp_path / "ragflow_test.png", create_image_file), | |||||
| "pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file), | |||||
| "txt": (tmp_path / "ragflow_test.txt", create_txt_file), | |||||
| "md": (tmp_path / "ragflow_test.md", create_md_file), | |||||
| "json": (tmp_path / "ragflow_test.json", create_json_file), | |||||
| "eml": (tmp_path / "ragflow_test.eml", create_eml_file), | |||||
| "html": (tmp_path / "ragflow_test.html", create_html_file), | |||||
| } | |||||
| files = {} | |||||
| for file_type, (file_path, creator_func) in file_creators.items(): | |||||
| if request.param in ["", file_type]: | |||||
| creator_func(file_path) | |||||
| files[file_type] = file_path | |||||
| return files | |||||
| @pytest.fixture(scope="class") | |||||
| def ragflow_tmp_dir(request, tmp_path_factory): | |||||
| class_name = request.cls.__name__ | |||||
| return tmp_path_factory.mktemp(class_name) | |||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def add_dataset(request, api_key): | def add_dataset(request, api_key): | ||||
| def cleanup(): | def cleanup(): | ||||
| @pytest.fixture(scope="class") | @pytest.fixture(scope="class") | ||||
| def add_chunks(api_key, add_document): | def add_chunks(api_key, add_document): | ||||
| dataset_id, document_id = add_document | dataset_id, document_id = add_document | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| condition(api_key, dataset_id) | condition(api_key, dataset_id) | ||||
| chunk_ids = [] | chunk_ids = [] | ||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| dataset_id, document_id = add_document | dataset_id, document_id = add_document | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| condition(api_key, dataset_id) | condition(api_key, dataset_id) | ||||
| chat_assistant_ids = [] | chat_assistant_ids = [] |
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import pytest | import pytest | ||||
| from common import create_chat_assistant, delete_chat_assistants, list_documnets, parse_documnets | |||||
| from common import create_chat_assistant, delete_chat_assistants, list_documents, parse_documents | |||||
| from utils import wait_for | from utils import wait_for | ||||
| @wait_for(30, 1, "Document parsing timeout") | @wait_for(30, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnets(_auth, _dataset_id) | |||||
| res = list_documents(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| dataset_id, document_id = add_document | dataset_id, document_id = add_document | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| condition(api_key, dataset_id) | condition(api_key, dataset_id) | ||||
| chat_assistant_ids = [] | chat_assistant_ids = [] |
| import pytest | import pytest | ||||
| from common import add_chunk, delete_chunks, list_documnets, parse_documnets | |||||
| from common import add_chunk, delete_chunks, list_documents, parse_documents | |||||
| from utils import wait_for | from utils import wait_for | ||||
| @wait_for(30, 1, "Document parsing timeout") | @wait_for(30, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id): | def condition(_auth, _dataset_id): | ||||
| res = list_documnets(_auth, _dataset_id) | |||||
| res = list_documents(_auth, _dataset_id) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def add_chunks_func(request, api_key, add_document): | def add_chunks_func(request, api_key, add_document): | ||||
| dataset_id, document_id = add_document | dataset_id, document_id = add_document | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": [document_id]}) | |||||
| condition(api_key, dataset_id) | condition(api_key, dataset_id) | ||||
| chunk_ids = [] | chunk_ids = [] |
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, add_chunk, delete_documnets, list_chunks | |||||
| from common import INVALID_API_TOKEN, add_chunk, delete_documents, list_chunks | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_add_chunk_to_deleted_document(self, api_key, add_document): | def test_add_chunk_to_deleted_document(self, api_key, add_document): | ||||
| dataset_id, document_id = add_document | dataset_id, document_id = add_document | ||||
| delete_documnets(api_key, dataset_id, {"ids": [document_id]}) | |||||
| delete_documents(api_key, dataset_id, {"ids": [document_id]}) | |||||
| res = add_chunk(api_key, dataset_id, document_id, {"content": "chunk test"}) | res = add_chunk(api_key, dataset_id, document_id, {"content": "chunk test"}) | ||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"You don't own the document {document_id}." | assert res["message"] == f"You don't own the document {document_id}." |
| from random import randint | from random import randint | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, delete_documnets, update_chunk | |||||
| from common import INVALID_API_TOKEN, delete_documents, update_chunk | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_update_chunk_to_deleted_document(self, api_key, add_chunks): | def test_update_chunk_to_deleted_document(self, api_key, add_chunks): | ||||
| dataset_id, document_id, chunk_ids = add_chunks | dataset_id, document_id, chunk_ids = add_chunks | ||||
| delete_documnets(api_key, dataset_id, {"ids": [document_id]}) | |||||
| delete_documents(api_key, dataset_id, {"ids": [document_id]}) | |||||
| res = update_chunk(api_key, dataset_id, document_id, chunk_ids[0]) | res = update_chunk(api_key, dataset_id, document_id, chunk_ids[0]) | ||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == f"Can't find this chunk {chunk_ids[0]}" | assert res["message"] == f"Can't find this chunk {chunk_ids[0]}" |
| import pytest | import pytest | ||||
| from common import bulk_upload_documents, delete_documnets | |||||
| from common import bulk_upload_documents, delete_documents | |||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, 1, ragflow_tmp_dir) | document_ids = bulk_upload_documents(api_key, dataset_id, 1, ragflow_tmp_dir) | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_documnets(api_key, dataset_id, {"ids": document_ids}) | |||||
| delete_documents(api_key, dataset_id, {"ids": document_ids}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| return dataset_id, document_ids[0] | return dataset_id, document_ids[0] | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, 5, ragflow_tmp_dir) | document_ids = bulk_upload_documents(api_key, dataset_id, 5, ragflow_tmp_dir) | ||||
| def cleanup(): | def cleanup(): | ||||
| delete_documnets(api_key, dataset_id, {"ids": document_ids}) | |||||
| delete_documents(api_key, dataset_id, {"ids": document_ids}) | |||||
| request.addfinalizer(cleanup) | request.addfinalizer(cleanup) | ||||
| return dataset_id, document_ids | return dataset_id, document_ids | ||||
| @pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
| def add_documents_func(api_key, add_dataset_func, ragflow_tmp_dir): | |||||
| def add_documents_func(request, api_key, add_dataset_func, ragflow_tmp_dir): | |||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, 3, ragflow_tmp_dir) | document_ids = bulk_upload_documents(api_key, dataset_id, 3, ragflow_tmp_dir) | ||||
| def cleanup(): | |||||
| delete_documents(api_key, dataset_id, {"ids": document_ids}) | |||||
| request.addfinalizer(cleanup) | |||||
| return dataset_id, document_ids | return dataset_id, document_ids |
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documnets, list_documnets | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documents, list_documents | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | ||||
| res = delete_documnets(invalid_auth, "dataset_id") | |||||
| res = delete_documents(invalid_auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = delete_documnets(api_key, dataset_id, payload) | |||||
| res = delete_documents(api_key, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if res["code"] != 0: | if res["code"] != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| res = list_documnets(api_key, dataset_id) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert len(res["data"]["docs"]) == remaining | assert len(res["data"]["docs"]) == remaining | ||||
| assert res["data"]["total"] == remaining | assert res["data"]["total"] == remaining | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, api_key, add_documents_func, dataset_id, expected_code, expected_message): | def test_invalid_dataset_id(self, api_key, add_documents_func, dataset_id, expected_code, expected_message): | ||||
| _, document_ids = add_documents_func | _, document_ids = add_documents_func | ||||
| res = delete_documnets(api_key, dataset_id, {"ids": document_ids[:1]}) | |||||
| res = delete_documents(api_key, dataset_id, {"ids": document_ids[:1]}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = delete_documnets(api_key, dataset_id, payload) | |||||
| res = delete_documents(api_key, dataset_id, payload) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "Documents not found: ['invalid_id']" | assert res["message"] == "Documents not found: ['invalid_id']" | ||||
| res = list_documnets(api_key, dataset_id) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert len(res["data"]["docs"]) == 0 | assert len(res["data"]["docs"]) == 0 | ||||
| assert res["data"]["total"] == 0 | assert res["data"]["total"] == 0 | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_repeated_deletion(self, api_key, add_documents_func): | def test_repeated_deletion(self, api_key, add_documents_func): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| res = delete_documnets(api_key, dataset_id, {"ids": document_ids}) | |||||
| res = delete_documents(api_key, dataset_id, {"ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = delete_documnets(api_key, dataset_id, {"ids": document_ids}) | |||||
| res = delete_documents(api_key, dataset_id, {"ids": document_ids}) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert "Documents not found" in res["message"] | assert "Documents not found" in res["message"] | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_duplicate_deletion(self, api_key, add_documents_func): | def test_duplicate_deletion(self, api_key, add_documents_func): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| res = delete_documnets(api_key, dataset_id, {"ids": document_ids + document_ids}) | |||||
| res = delete_documents(api_key, dataset_id, {"ids": document_ids + document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert "Duplicate document ids" in res["data"]["errors"][0] | assert "Duplicate document ids" in res["data"]["errors"][0] | ||||
| assert res["data"]["success_count"] == 3 | assert res["data"]["success_count"] == 3 | ||||
| res = list_documnets(api_key, dataset_id) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert len(res["data"]["docs"]) == 0 | assert len(res["data"]["docs"]) == 0 | ||||
| assert res["data"]["total"] == 0 | assert res["data"]["total"] == 0 | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_concurrent_deletion(api_key, add_dataset, tmp_path): | def test_concurrent_deletion(api_key, add_dataset, tmp_path): | ||||
| documnets_num = 100 | |||||
| documents_num = 100 | |||||
| dataset_id = add_dataset | dataset_id = add_dataset | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, documnets_num, tmp_path) | |||||
| document_ids = bulk_upload_documents(api_key, dataset_id, documents_num, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [ | futures = [ | ||||
| executor.submit( | executor.submit( | ||||
| delete_documnets, | |||||
| delete_documents, | |||||
| api_key, | api_key, | ||||
| dataset_id, | dataset_id, | ||||
| {"ids": document_ids[i : i + 1]}, | {"ids": document_ids[i : i + 1]}, | ||||
| ) | ) | ||||
| for i in range(documnets_num) | |||||
| for i in range(documents_num) | |||||
| ] | ] | ||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_delete_1k(api_key, add_dataset, tmp_path): | def test_delete_1k(api_key, add_dataset, tmp_path): | ||||
| documnets_num = 1_000 | |||||
| documents_num = 1_000 | |||||
| dataset_id = add_dataset | dataset_id = add_dataset | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, documnets_num, tmp_path) | |||||
| res = list_documnets(api_key, dataset_id) | |||||
| assert res["data"]["total"] == documnets_num | |||||
| document_ids = bulk_upload_documents(api_key, dataset_id, documents_num, tmp_path) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert res["data"]["total"] == documents_num | |||||
| res = delete_documnets(api_key, dataset_id, {"ids": document_ids}) | |||||
| res = delete_documents(api_key, dataset_id, {"ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_documnets(api_key, dataset_id) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert res["data"]["total"] == 0 | assert res["data"]["total"] == 0 |
| # | # | ||||
| import json | import json | ||||
| from concurrent.futures import ThreadPoolExecutor | |||||
| from concurrent.futures import ThreadPoolExecutor, as_completed | |||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, download_document, upload_documnets | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, download_document, upload_documents | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from requests import codes | from requests import codes | ||||
| from utils import compare_by_hash | from utils import compare_by_hash | ||||
| def test_file_type_validation(api_key, add_dataset, generate_test_files, request): | def test_file_type_validation(api_key, add_dataset, generate_test_files, request): | ||||
| dataset_id = add_dataset | dataset_id = add_dataset | ||||
| fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| document_id = res["data"][0]["id"] | document_id = res["data"][0]["id"] | ||||
| res = download_document( | res = download_document( | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_concurrent_download(api_key, add_dataset, tmp_path): | def test_concurrent_download(api_key, add_dataset, tmp_path): | ||||
| document_count = 20 | |||||
| count = 20 | |||||
| dataset_id = add_dataset | dataset_id = add_dataset | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, document_count, tmp_path) | |||||
| document_ids = bulk_upload_documents(api_key, dataset_id, count, tmp_path) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [ | futures = [ | ||||
| document_ids[i], | document_ids[i], | ||||
| tmp_path / f"ragflow_test_download_{i}.txt", | tmp_path / f"ragflow_test_download_{i}.txt", | ||||
| ) | ) | ||||
| for i in range(document_count) | |||||
| for i in range(count) | |||||
| ] | ] | ||||
| responses = [f.result() for f in futures] | |||||
| assert all(r.status_code == codes.ok for r in responses) | |||||
| for i in range(document_count): | |||||
| responses = list(as_completed(futures)) | |||||
| assert len(responses) == count, responses | |||||
| for i in range(count): | |||||
| assert compare_by_hash( | assert compare_by_hash( | ||||
| tmp_path / f"ragflow_test_upload_{i}.txt", | tmp_path / f"ragflow_test_upload_{i}.txt", | ||||
| tmp_path / f"ragflow_test_download_{i}.txt", | tmp_path / f"ragflow_test_download_{i}.txt", |
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, list_documnets | |||||
| from common import INVALID_API_TOKEN, list_documents | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from utils import is_sorted | from utils import is_sorted | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | ||||
| res = list_documnets(invalid_auth, "dataset_id") | |||||
| res = list_documents(invalid_auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @pytest.mark.p1 | @pytest.mark.p1 | ||||
| def test_default(self, api_key, add_documents): | def test_default(self, api_key, add_documents): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["docs"]) == 5 | assert len(res["data"]["docs"]) == 5 | ||||
| assert res["data"]["total"] == 5 | assert res["data"]["total"] == 5 | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, api_key, dataset_id, expected_code, expected_message): | def test_invalid_dataset_id(self, api_key, dataset_id, expected_code, expected_message): | ||||
| res = list_documnets(api_key, dataset_id) | |||||
| res = list_documents(api_key, dataset_id) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]["docs"]) == expected_page_size | assert len(res["data"]["docs"]) == expected_page_size | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]["docs"]) == expected_page_size | assert len(res["data"]["docs"]) == expected_page_size | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if callable(assertions): | if callable(assertions): | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if callable(assertions): | if callable(assertions): | ||||
| ) | ) | ||||
| def test_keywords(self, api_key, add_documents, params, expected_num): | def test_keywords(self, api_key, add_documents, params, expected_num): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["docs"]) == expected_num | assert len(res["data"]["docs"]) == expected_num | ||||
| assert res["data"]["total"] == expected_num | assert res["data"]["total"] == expected_num | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| if params["name"] in [None, ""]: | if params["name"] in [None, ""]: | ||||
| params = {"id": document_id(document_ids)} | params = {"id": document_id(document_ids)} | ||||
| else: | else: | ||||
| params = {"id": document_id} | params = {"id": document_id} | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| else: | else: | ||||
| params = {"id": document_id, "name": name} | params = {"id": document_id, "name": name} | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| if expected_code == 0: | if expected_code == 0: | ||||
| assert len(res["data"]["docs"]) == expected_num | assert len(res["data"]["docs"]) == expected_num | ||||
| else: | else: | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(list_documnets, api_key, dataset_id) for i in range(100)] | |||||
| futures = [executor.submit(list_documents, api_key, dataset_id) for i in range(100)] | |||||
| responses = [f.result() for f in futures] | responses = [f.result() for f in futures] | ||||
| assert all(r["code"] == 0 for r in responses) | assert all(r["code"] == 0 for r in responses) | ||||
| def test_invalid_params(self, api_key, add_documents): | def test_invalid_params(self, api_key, add_documents): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| params = {"a": "b"} | params = {"a": "b"} | ||||
| res = list_documnets(api_key, dataset_id, params=params) | |||||
| res = list_documents(api_key, dataset_id, params=params) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]["docs"]) == 5 | assert len(res["data"]["docs"]) == 5 |
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, list_documents, parse_documents | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from utils import wait_for | from utils import wait_for | ||||
| @wait_for(30, 1, "Document parsing timeout") | @wait_for(30, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id, _document_ids=None): | def condition(_auth, _dataset_id, _document_ids=None): | ||||
| res = list_documnets(_auth, _dataset_id) | |||||
| res = list_documents(_auth, _dataset_id) | |||||
| target_docs = res["data"]["docs"] | target_docs = res["data"]["docs"] | ||||
| if _document_ids is None: | if _document_ids is None: | ||||
| def validate_document_details(auth, dataset_id, document_ids): | def validate_document_details(auth, dataset_id, document_ids): | ||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| res = list_documnets(auth, dataset_id, params={"id": document_id}) | |||||
| res = list_documents(auth, dataset_id, params={"id": document_id}) | |||||
| doc = res["data"]["docs"][0] | doc = res["data"]["docs"][0] | ||||
| assert doc["run"] == "DONE" | assert doc["run"] == "DONE" | ||||
| assert len(doc["process_begin_at"]) > 0 | assert len(doc["process_begin_at"]) > 0 | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | ||||
| res = parse_documnets(invalid_auth, "dataset_id") | |||||
| res = parse_documents(invalid_auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = parse_documnets(api_key, dataset_id, payload) | |||||
| res = parse_documents(api_key, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code != 0: | if expected_code != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| _, document_ids = add_documents_func | _, document_ids = add_documents_func | ||||
| res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = parse_documnets(api_key, dataset_id, payload) | |||||
| res = parse_documents(api_key, dataset_id, payload) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "Documents not found: ['invalid_id']" | assert res["message"] == "Documents not found: ['invalid_id']" | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_repeated_parse(self, api_key, add_documents_func): | def test_repeated_parse(self, api_key, add_documents_func): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| condition(api_key, dataset_id) | condition(api_key, dataset_id) | ||||
| res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_duplicate_parse(self, api_key, add_documents_func): | def test_duplicate_parse(self, api_key, add_documents_func): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| res = parse_documents(api_key, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert "Duplicate document ids" in res["data"]["errors"][0] | assert "Duplicate document ids" in res["data"]["errors"][0] | ||||
| assert res["data"]["success_count"] == 3 | assert res["data"]["success_count"] == 3 | ||||
| def test_parse_100_files(api_key, add_dataset_func, tmp_path): | def test_parse_100_files(api_key, add_dataset_func, tmp_path): | ||||
| @wait_for(100, 1, "Document parsing timeout") | @wait_for(100, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id, _document_num): | def condition(_auth, _dataset_id, _document_num): | ||||
| res = list_documnets(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| document_num = 100 | document_num = 100 | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) | document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) | ||||
| res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| condition(api_key, dataset_id, document_num) | condition(api_key, dataset_id, document_num) | ||||
| def test_concurrent_parse(api_key, add_dataset_func, tmp_path): | def test_concurrent_parse(api_key, add_dataset_func, tmp_path): | ||||
| @wait_for(120, 1, "Document parsing timeout") | @wait_for(120, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id, _document_num): | def condition(_auth, _dataset_id, _document_num): | ||||
| res = list_documnets(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) | |||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| if doc["run"] != "DONE": | if doc["run"] != "DONE": | ||||
| return False | return False | ||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [ | futures = [ | ||||
| executor.submit( | executor.submit( | ||||
| parse_documnets, | |||||
| parse_documents, | |||||
| api_key, | api_key, | ||||
| dataset_id, | dataset_id, | ||||
| {"document_ids": document_ids[i : i + 1]}, | {"document_ids": document_ids[i : i + 1]}, |
| from time import sleep | from time import sleep | ||||
| import pytest | import pytest | ||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets, stop_parse_documnets | |||||
| from common import INVALID_API_TOKEN, bulk_upload_documents, list_documents, parse_documents, stop_parse_documents | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from utils import wait_for | from utils import wait_for | ||||
| def validate_document_parse_done(auth, dataset_id, document_ids): | def validate_document_parse_done(auth, dataset_id, document_ids): | ||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| res = list_documnets(auth, dataset_id, params={"id": document_id}) | |||||
| res = list_documents(auth, dataset_id, params={"id": document_id}) | |||||
| doc = res["data"]["docs"][0] | doc = res["data"]["docs"][0] | ||||
| assert doc["run"] == "DONE" | assert doc["run"] == "DONE" | ||||
| assert len(doc["process_begin_at"]) > 0 | assert len(doc["process_begin_at"]) > 0 | ||||
| def validate_document_parse_cancel(auth, dataset_id, document_ids): | def validate_document_parse_cancel(auth, dataset_id, document_ids): | ||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| res = list_documnets(auth, dataset_id, params={"id": document_id}) | |||||
| res = list_documents(auth, dataset_id, params={"id": document_id}) | |||||
| doc = res["data"]["docs"][0] | doc = res["data"]["docs"][0] | ||||
| assert doc["run"] == "CANCEL" | assert doc["run"] == "CANCEL" | ||||
| assert len(doc["process_begin_at"]) > 0 | assert len(doc["process_begin_at"]) > 0 | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | ||||
| res = stop_parse_documnets(invalid_auth, "dataset_id") | |||||
| res = stop_parse_documents(invalid_auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| @wait_for(10, 1, "Document parsing timeout") | @wait_for(10, 1, "Document parsing timeout") | ||||
| def condition(_auth, _dataset_id, _document_ids): | def condition(_auth, _dataset_id, _document_ids): | ||||
| for _document_id in _document_ids: | for _document_id in _document_ids: | ||||
| res = list_documnets(_auth, _dataset_id, {"id": _document_id}) | |||||
| res = list_documents(_auth, _dataset_id, {"id": _document_id}) | |||||
| if res["data"]["docs"][0]["run"] != "DONE": | if res["data"]["docs"][0]["run"] != "DONE": | ||||
| return False | return False | ||||
| return True | return True | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = stop_parse_documnets(api_key, dataset_id, payload) | |||||
| res = stop_parse_documents(api_key, dataset_id, payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code != 0: | if expected_code != 0: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(api_key, invalid_dataset_id, {"document_ids": document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documents(api_key, invalid_dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ) | ) | ||||
| def test_stop_parse_partial_invalid_document_id(self, api_key, add_documents_func, payload): | def test_stop_parse_partial_invalid_document_id(self, api_key, add_documents_func, payload): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| if callable(payload): | if callable(payload): | ||||
| payload = payload(document_ids) | payload = payload(document_ids) | ||||
| res = stop_parse_documnets(api_key, dataset_id, payload) | |||||
| res = stop_parse_documents(api_key, dataset_id, payload) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "You don't own the document invalid_id." | assert res["message"] == "You don't own the document invalid_id." | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_repeated_stop_parse(self, api_key, add_documents_func): | def test_repeated_stop_parse(self, api_key, add_documents_func): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 102 | assert res["code"] == 102 | ||||
| assert res["message"] == "Can't stop parsing document with progress at 0 or 1" | assert res["message"] == "Can't stop parsing document with progress at 0 or 1" | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_duplicate_stop_parse(self, api_key, add_documents_func): | def test_duplicate_stop_parse(self, api_key, add_documents_func): | ||||
| dataset_id, document_ids = add_documents_func | dataset_id, document_ids = add_documents_func | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids + document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"]["success_count"] == 3 | assert res["data"]["success_count"] == 3 | ||||
| assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] | assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] | ||||
| document_num = 100 | document_num = 100 | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) | document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| sleep(1) | sleep(1) | ||||
| res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| validate_document_parse_cancel(api_key, dataset_id, document_ids) | validate_document_parse_cancel(api_key, dataset_id, document_ids) | ||||
| document_num = 50 | document_num = 50 | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) | document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) | ||||
| parse_documnets(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| parse_documents(api_key, dataset_id, {"document_ids": document_ids}) | |||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [ | futures = [ | ||||
| executor.submit( | executor.submit( | ||||
| stop_parse_documnets, | |||||
| stop_parse_documents, | |||||
| api_key, | api_key, | ||||
| dataset_id, | dataset_id, | ||||
| {"document_ids": document_ids[i : i + 1]}, | {"document_ids": document_ids[i : i + 1]}, |
| import pytest | import pytest | ||||
| from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, list_documnets, update_documnet | |||||
| from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, list_documents, update_document | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | ||||
| res = update_documnet(invalid_auth, "dataset_id", "document_id") | |||||
| res = update_document(invalid_auth, "dataset_id", "document_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ) | ) | ||||
| def test_name(self, api_key, add_documents, name, expected_code, expected_message): | def test_name(self, api_key, add_documents, name, expected_code, expected_message): | ||||
| dataset_id, document_ids = add_documents | dataset_id, document_ids = add_documents | ||||
| res = update_documnet(api_key, dataset_id, document_ids[0], {"name": name}) | |||||
| res = update_document(api_key, dataset_id, document_ids[0], {"name": name}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnets(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documents(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| assert res["data"]["docs"][0]["name"] == name | assert res["data"]["docs"][0]["name"] == name | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ) | ) | ||||
| def test_invalid_document_id(self, api_key, add_documents, document_id, expected_code, expected_message): | def test_invalid_document_id(self, api_key, add_documents, document_id, expected_code, expected_message): | ||||
| dataset_id, _ = add_documents | dataset_id, _ = add_documents | ||||
| res = update_documnet(api_key, dataset_id, document_id, {"name": "new_name.txt"}) | |||||
| res = update_document(api_key, dataset_id, document_id, {"name": "new_name.txt"}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ) | ) | ||||
| def test_invalid_dataset_id(self, api_key, add_documents, dataset_id, expected_code, expected_message): | def test_invalid_dataset_id(self, api_key, add_documents, dataset_id, expected_code, expected_message): | ||||
| _, document_ids = add_documents | _, document_ids = add_documents | ||||
| res = update_documnet(api_key, dataset_id, document_ids[0], {"name": "new_name.txt"}) | |||||
| res = update_document(api_key, dataset_id, document_ids[0], {"name": "new_name.txt"}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ) | ) | ||||
| def test_meta_fields(self, api_key, add_documents, meta_fields, expected_code, expected_message): | def test_meta_fields(self, api_key, add_documents, meta_fields, expected_code, expected_message): | ||||
| dataset_id, document_ids = add_documents | dataset_id, document_ids = add_documents | ||||
| res = update_documnet(api_key, dataset_id, document_ids[0], {"meta_fields": meta_fields}) | |||||
| res = update_document(api_key, dataset_id, document_ids[0], {"meta_fields": meta_fields}) | |||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnets(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documents(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| assert res["data"]["docs"][0]["meta_fields"] == meta_fields | assert res["data"]["docs"][0]["meta_fields"] == meta_fields | ||||
| else: | else: | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| ) | ) | ||||
| def test_chunk_method(self, api_key, add_documents, chunk_method, expected_code, expected_message): | def test_chunk_method(self, api_key, add_documents, chunk_method, expected_code, expected_message): | ||||
| dataset_id, document_ids = add_documents | dataset_id, document_ids = add_documents | ||||
| res = update_documnet(api_key, dataset_id, document_ids[0], {"chunk_method": chunk_method}) | |||||
| res = update_document(api_key, dataset_id, document_ids[0], {"chunk_method": chunk_method}) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnets(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documents(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| if chunk_method != "": | if chunk_method != "": | ||||
| assert res["data"]["docs"][0]["chunk_method"] == chunk_method | assert res["data"]["docs"][0]["chunk_method"] == chunk_method | ||||
| else: | else: | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, document_ids = add_documents | dataset_id, document_ids = add_documents | ||||
| res = update_documnet(api_key, dataset_id, document_ids[0], payload) | |||||
| res = update_document(api_key, dataset_id, document_ids[0], payload) | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| expected_message, | expected_message, | ||||
| ): | ): | ||||
| dataset_id, document_ids = add_documents | dataset_id, document_ids = add_documents | ||||
| res = update_documnet( | |||||
| res = update_document( | |||||
| api_key, | api_key, | ||||
| dataset_id, | dataset_id, | ||||
| document_ids[0], | document_ids[0], | ||||
| ) | ) | ||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| if expected_code == 0: | if expected_code == 0: | ||||
| res = list_documnets(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| res = list_documents(api_key, dataset_id, {"id": document_ids[0]}) | |||||
| if parser_config != {}: | if parser_config != {}: | ||||
| for k, v in parser_config.items(): | for k, v in parser_config.items(): | ||||
| assert res["data"]["docs"][0]["parser_config"][k] == v | assert res["data"]["docs"][0]["parser_config"][k] == v |
| # | # | ||||
| import string | import string | ||||
| from concurrent.futures import ThreadPoolExecutor | |||||
| from concurrent.futures import ThreadPoolExecutor, as_completed | |||||
| import pytest | import pytest | ||||
| import requests | import requests | ||||
| from common import DOCUMENT_NAME_LIMIT, FILE_API_URL, HOST_ADDRESS, INVALID_API_TOKEN, list_datasets, upload_documnets | |||||
| from common import DOCUMENT_NAME_LIMIT, FILE_API_URL, HOST_ADDRESS, INVALID_API_TOKEN, list_datasets, upload_documents | |||||
| from libs.auth import RAGFlowHttpApiAuth | from libs.auth import RAGFlowHttpApiAuth | ||||
| from requests_toolbelt import MultipartEncoder | from requests_toolbelt import MultipartEncoder | ||||
| from utils.file_utils import create_txt_file | from utils.file_utils import create_txt_file | ||||
| ], | ], | ||||
| ) | ) | ||||
| def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | def test_invalid_auth(self, invalid_auth, expected_code, expected_message): | ||||
| res = upload_documnets(invalid_auth, "dataset_id") | |||||
| res = upload_documents(invalid_auth, "dataset_id") | |||||
| assert res["code"] == expected_code | assert res["code"] == expected_code | ||||
| assert res["message"] == expected_message | assert res["message"] == expected_message | ||||
| def test_valid_single_upload(self, api_key, add_dataset_func, tmp_path): | def test_valid_single_upload(self, api_key, add_dataset_func, tmp_path): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"][0]["dataset_id"] == dataset_id | assert res["data"][0]["dataset_id"] == dataset_id | ||||
| assert res["data"][0]["name"] == fp.name | assert res["data"][0]["name"] == fp.name | ||||
| def test_file_type_validation(self, api_key, add_dataset_func, generate_test_files, request): | def test_file_type_validation(self, api_key, add_dataset_func, generate_test_files, request): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | fp = generate_test_files[request.node.callspec.params["generate_test_files"]] | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"][0]["dataset_id"] == dataset_id | assert res["data"][0]["dataset_id"] == dataset_id | ||||
| assert res["data"][0]["name"] == fp.name | assert res["data"][0]["name"] == fp.name | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| fp = tmp_path / f"ragflow_test.{file_type}" | fp = tmp_path / f"ragflow_test.{file_type}" | ||||
| fp.touch() | fp.touch() | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 500 | assert res["code"] == 500 | ||||
| assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!" | assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!" | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_missing_file(self, api_key, add_dataset_func): | def test_missing_file(self, api_key, add_dataset_func): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| res = upload_documnets(api_key, dataset_id) | |||||
| res = upload_documents(api_key, dataset_id) | |||||
| assert res["code"] == 101 | assert res["code"] == 101 | ||||
| assert res["message"] == "No file part!" | assert res["message"] == "No file part!" | ||||
| fp = tmp_path / "empty.txt" | fp = tmp_path / "empty.txt" | ||||
| fp.touch() | fp.touch() | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert res["data"][0]["size"] == 0 | assert res["data"][0]["size"] == 0 | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| # filename_length = 129 | # filename_length = 129 | ||||
| fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") | fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 101 | assert res["code"] == 101 | ||||
| assert res["message"] == "File name should be less than 128 bytes." | assert res["message"] == "File name should be less than 128 bytes." | ||||
| @pytest.mark.p2 | @pytest.mark.p2 | ||||
| def test_invalid_dataset_id(self, api_key, tmp_path): | def test_invalid_dataset_id(self, api_key, tmp_path): | ||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| res = upload_documnets(api_key, "invalid_dataset_id", [fp]) | |||||
| res = upload_documents(api_key, "invalid_dataset_id", [fp]) | |||||
| assert res["code"] == 100 | assert res["code"] == 100 | ||||
| assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")""" | assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")""" | ||||
| def test_duplicate_files(self, api_key, add_dataset_func, tmp_path): | def test_duplicate_files(self, api_key, add_dataset_func, tmp_path): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| res = upload_documnets(api_key, dataset_id, [fp, fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp, fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 2 | assert len(res["data"]) == 2 | ||||
| for i in range(len(res["data"])): | for i in range(len(res["data"])): | ||||
| def test_same_file_repeat(self, api_key, add_dataset_func, tmp_path): | def test_same_file_repeat(self, api_key, add_dataset_func, tmp_path): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| fp = create_txt_file(tmp_path / "ragflow_test.txt") | fp = create_txt_file(tmp_path / "ragflow_test.txt") | ||||
| for i in range(10): | |||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| for i in range(3): | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 1 | assert len(res["data"]) == 1 | ||||
| assert res["data"][0]["dataset_id"] == dataset_id | assert res["data"][0]["dataset_id"] == dataset_id | ||||
| fp = tmp_path / f"{safe_filename}.txt" | fp = tmp_path / f"{safe_filename}.txt" | ||||
| fp.write_text("Sample text content") | fp.write_text("Sample text content") | ||||
| res = upload_documnets(api_key, dataset_id, [fp]) | |||||
| res = upload_documents(api_key, dataset_id, [fp]) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| assert len(res["data"]) == 1 | assert len(res["data"]) == 1 | ||||
| assert res["data"][0]["dataset_id"] == dataset_id | assert res["data"][0]["dataset_id"] == dataset_id | ||||
| for i in range(expected_document_count): | for i in range(expected_document_count): | ||||
| fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") | fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") | ||||
| fps.append(fp) | fps.append(fp) | ||||
| res = upload_documnets(api_key, dataset_id, fps) | |||||
| res = upload_documents(api_key, dataset_id, fps) | |||||
| assert res["code"] == 0 | assert res["code"] == 0 | ||||
| res = list_datasets(api_key, {"id": dataset_id}) | res = list_datasets(api_key, {"id": dataset_id}) | ||||
| def test_concurrent_upload(self, api_key, add_dataset_func, tmp_path): | def test_concurrent_upload(self, api_key, add_dataset_func, tmp_path): | ||||
| dataset_id = add_dataset_func | dataset_id = add_dataset_func | ||||
| expected_document_count = 20 | |||||
| count = 20 | |||||
| fps = [] | fps = [] | ||||
| for i in range(expected_document_count): | |||||
| for i in range(count): | |||||
| fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") | fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") | ||||
| fps.append(fp) | fps.append(fp) | ||||
| with ThreadPoolExecutor(max_workers=5) as executor: | with ThreadPoolExecutor(max_workers=5) as executor: | ||||
| futures = [executor.submit(upload_documnets, api_key, dataset_id, fps[i : i + 1]) for i in range(expected_document_count)] | |||||
| responses = [f.result() for f in futures] | |||||
| assert all(r["code"] == 0 for r in responses) | |||||
| futures = [executor.submit(upload_documents, api_key, dataset_id, fps[i : i + 1]) for i in range(count)] | |||||
| responses = list(as_completed(futures)) | |||||
| assert len(responses) == count, responses | |||||
| res = list_datasets(api_key, {"id": dataset_id}) | res = list_datasets(api_key, {"id": dataset_id}) | ||||
| assert res["data"][0]["document_count"] == expected_document_count | |||||
| assert res["data"][0]["document_count"] == count |