Преглед изворни кода

Fix: document typo in test (#8091)

### What problem does this PR solve?

fix document typo in test

### Type of change

- [x] Typo
tags/v0.19.1
Liu An пре 4 месеци
родитељ
комит
92625e1ca9
No account linked to committer's email address

+ 7
- 7
api/utils/api_utils.py Прегледај датотеку

""" """
Verifies availability of an embedding model for a specific tenant. Verifies availability of an embedding model for a specific tenant.


Implements a four-stage validation process:
1. Model identifier parsing and validation
2. System support verification
3. Tenant authorization check
4. Database operation error handling
Performs comprehensive verification through:
1. Identifier Parsing: Decomposes embd_id into name and factory components
2. System Verification: Checks model registration in LLMService
3. Tenant Authorization: Validates tenant-specific model assignments
4. Built-in Model Check: Confirms inclusion in predefined system models


Args: Args:
embd_id (str): Unique identifier for the embedding model in format "model_name@factory" embd_id (str): Unique identifier for the embedding model in format "model_name@factory"
try: try:
llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(embd_id) llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(embd_id)
in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding")) in_llm_service = bool(LLMService.query(llm_name=llm_name, fid=llm_factory, model_type="embedding"))
# Tongyi-Qianwen is added to TenantLLM by default, but remains unusable with empty api_key
tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id) tenant_llms = TenantLLMService.get_my_llms(tenant_id=tenant_id)
is_tenant_model = any(llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for llm in tenant_llms) is_tenant_model = any(llm["llm_name"] == llm_name and llm["llm_factory"] == llm_factory and llm["model_type"] == "embedding" for llm in tenant_llms)


is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS is_builtin_model = embd_id in settings.BUILTIN_EMBEDDING_MODELS
if not ((is_builtin_model or is_tenant_model or in_llm_service)):
if not (is_builtin_model or is_tenant_model or in_llm_service):
return False, get_error_argument_result(f"Unsupported model: <{embd_id}>") return False, get_error_argument_result(f"Unsupported model: <{embd_id}>")


if not (is_builtin_model or is_tenant_model): if not (is_builtin_model or is_tenant_model):

+ 7
- 7
test/testcases/test_http_api/common.py Прегледај датотеку





# FILE MANAGEMENT WITHIN DATASET # FILE MANAGEMENT WITHIN DATASET
def upload_documnets(auth, dataset_id, files_path=None):
def upload_documents(auth, dataset_id, files_path=None):
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)


if files_path is None: if files_path is None:
return res return res




def list_documnets(auth, dataset_id, params=None):
def list_documents(auth, dataset_id, params=None):
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params) res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
return res.json() return res.json()




def update_documnet(auth, dataset_id, document_id, payload=None):
def update_document(auth, dataset_id, document_id, payload=None):
url = f"{HOST_ADDRESS}{FILE_API_URL}/{document_id}".format(dataset_id=dataset_id) url = f"{HOST_ADDRESS}{FILE_API_URL}/{document_id}".format(dataset_id=dataset_id)
res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload) res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json() return res.json()




def delete_documnets(auth, dataset_id, payload=None):
def delete_documents(auth, dataset_id, payload=None):
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id) url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json() return res.json()




def parse_documnets(auth, dataset_id, payload=None):
def parse_documents(auth, dataset_id, payload=None):
url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id)
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json() return res.json()




def stop_parse_documnets(auth, dataset_id, payload=None):
def stop_parse_documents(auth, dataset_id, payload=None):
url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id) url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id)
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload) res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json() return res.json()
for i in range(num): for i in range(num):
fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt") fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt")
fps.append(fp) fps.append(fp)
res = upload_documnets(auth, dataset_id, fps)
res = upload_documents(auth, dataset_id, fps)
document_ids = [] document_ids = []
for document in res["data"]: for document in res["data"]:
document_ids.append(document["id"]) document_ids.append(document["id"])

+ 34
- 34
test/testcases/test_http_api/conftest.py Прегледај датотеку

delete_chat_assistants, delete_chat_assistants,
delete_datasets, delete_datasets,
delete_session_with_chat_assistants, delete_session_with_chat_assistants,
list_documnets,
parse_documnets,
list_documents,
parse_documents,
) )
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
from utils import wait_for from utils import wait_for


@wait_for(30, 1, "Document parsing timeout") @wait_for(30, 1, "Document parsing timeout")
def condition(_auth, _dataset_id): def condition(_auth, _dataset_id):
res = list_documnets(_auth, _dataset_id)
res = list_documents(_auth, _dataset_id)
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:
if doc["run"] != "DONE": if doc["run"] != "DONE":
return False return False
return True return True




@pytest.fixture
def generate_test_files(request, tmp_path):
file_creators = {
"docx": (tmp_path / "ragflow_test.docx", create_docx_file),
"excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
"ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
"image": (tmp_path / "ragflow_test.png", create_image_file),
"pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
"txt": (tmp_path / "ragflow_test.txt", create_txt_file),
"md": (tmp_path / "ragflow_test.md", create_md_file),
"json": (tmp_path / "ragflow_test.json", create_json_file),
"eml": (tmp_path / "ragflow_test.eml", create_eml_file),
"html": (tmp_path / "ragflow_test.html", create_html_file),
}

files = {}
for file_type, (file_path, creator_func) in file_creators.items():
if request.param in ["", file_type]:
creator_func(file_path)
files[file_type] = file_path
return files


@pytest.fixture(scope="class")
def ragflow_tmp_dir(request, tmp_path_factory):
class_name = request.cls.__name__
return tmp_path_factory.mktemp(class_name)


@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def api_key(token): def api_key(token):
return RAGFlowHttpApiAuth(token) return RAGFlowHttpApiAuth(token)
request.addfinalizer(cleanup) request.addfinalizer(cleanup)




@pytest.fixture
def generate_test_files(request, tmp_path):
file_creators = {
"docx": (tmp_path / "ragflow_test.docx", create_docx_file),
"excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
"ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
"image": (tmp_path / "ragflow_test.png", create_image_file),
"pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
"txt": (tmp_path / "ragflow_test.txt", create_txt_file),
"md": (tmp_path / "ragflow_test.md", create_md_file),
"json": (tmp_path / "ragflow_test.json", create_json_file),
"eml": (tmp_path / "ragflow_test.eml", create_eml_file),
"html": (tmp_path / "ragflow_test.html", create_html_file),
}

files = {}
for file_type, (file_path, creator_func) in file_creators.items():
if request.param in ["", file_type]:
creator_func(file_path)
files[file_type] = file_path
return files


@pytest.fixture(scope="class")
def ragflow_tmp_dir(request, tmp_path_factory):
class_name = request.cls.__name__
return tmp_path_factory.mktemp(class_name)


@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def add_dataset(request, api_key): def add_dataset(request, api_key):
def cleanup(): def cleanup():
@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def add_chunks(api_key, add_document): def add_chunks(api_key, add_document):
dataset_id, document_id = add_document dataset_id, document_id = add_document
parse_documnets(api_key, dataset_id, {"document_ids": [document_id]})
parse_documents(api_key, dataset_id, {"document_ids": [document_id]})
condition(api_key, dataset_id) condition(api_key, dataset_id)


chunk_ids = [] chunk_ids = []
request.addfinalizer(cleanup) request.addfinalizer(cleanup)


dataset_id, document_id = add_document dataset_id, document_id = add_document
parse_documnets(api_key, dataset_id, {"document_ids": [document_id]})
parse_documents(api_key, dataset_id, {"document_ids": [document_id]})
condition(api_key, dataset_id) condition(api_key, dataset_id)


chat_assistant_ids = [] chat_assistant_ids = []

+ 3
- 3
test/testcases/test_http_api/test_chat_assistant_management/conftest.py Прегледај датотеку

# limitations under the License. # limitations under the License.
# #
import pytest import pytest
from common import create_chat_assistant, delete_chat_assistants, list_documnets, parse_documnets
from common import create_chat_assistant, delete_chat_assistants, list_documents, parse_documents
from utils import wait_for from utils import wait_for




@wait_for(30, 1, "Document parsing timeout") @wait_for(30, 1, "Document parsing timeout")
def condition(_auth, _dataset_id): def condition(_auth, _dataset_id):
res = list_documnets(_auth, _dataset_id)
res = list_documents(_auth, _dataset_id)
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:
if doc["run"] != "DONE": if doc["run"] != "DONE":
return False return False
request.addfinalizer(cleanup) request.addfinalizer(cleanup)


dataset_id, document_id = add_document dataset_id, document_id = add_document
parse_documnets(api_key, dataset_id, {"document_ids": [document_id]})
parse_documents(api_key, dataset_id, {"document_ids": [document_id]})
condition(api_key, dataset_id) condition(api_key, dataset_id)


chat_assistant_ids = [] chat_assistant_ids = []

+ 3
- 3
test/testcases/test_http_api/test_chunk_management_within_dataset/conftest.py Прегледај датотеку





import pytest import pytest
from common import add_chunk, delete_chunks, list_documnets, parse_documnets
from common import add_chunk, delete_chunks, list_documents, parse_documents
from utils import wait_for from utils import wait_for




@wait_for(30, 1, "Document parsing timeout") @wait_for(30, 1, "Document parsing timeout")
def condition(_auth, _dataset_id): def condition(_auth, _dataset_id):
res = list_documnets(_auth, _dataset_id)
res = list_documents(_auth, _dataset_id)
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:
if doc["run"] != "DONE": if doc["run"] != "DONE":
return False return False
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def add_chunks_func(request, api_key, add_document): def add_chunks_func(request, api_key, add_document):
dataset_id, document_id = add_document dataset_id, document_id = add_document
parse_documnets(api_key, dataset_id, {"document_ids": [document_id]})
parse_documents(api_key, dataset_id, {"document_ids": [document_id]})
condition(api_key, dataset_id) condition(api_key, dataset_id)


chunk_ids = [] chunk_ids = []

+ 2
- 2
test/testcases/test_http_api/test_chunk_management_within_dataset/test_add_chunk.py Прегледај датотеку

from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor


import pytest import pytest
from common import INVALID_API_TOKEN, add_chunk, delete_documnets, list_chunks
from common import INVALID_API_TOKEN, add_chunk, delete_documents, list_chunks
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth




@pytest.mark.p2 @pytest.mark.p2
def test_add_chunk_to_deleted_document(self, api_key, add_document): def test_add_chunk_to_deleted_document(self, api_key, add_document):
dataset_id, document_id = add_document dataset_id, document_id = add_document
delete_documnets(api_key, dataset_id, {"ids": [document_id]})
delete_documents(api_key, dataset_id, {"ids": [document_id]})
res = add_chunk(api_key, dataset_id, document_id, {"content": "chunk test"}) res = add_chunk(api_key, dataset_id, document_id, {"content": "chunk test"})
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == f"You don't own the document {document_id}." assert res["message"] == f"You don't own the document {document_id}."

+ 2
- 2
test/testcases/test_http_api/test_chunk_management_within_dataset/test_update_chunk.py Прегледај датотеку

from random import randint from random import randint


import pytest import pytest
from common import INVALID_API_TOKEN, delete_documnets, update_chunk
from common import INVALID_API_TOKEN, delete_documents, update_chunk
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth




@pytest.mark.p3 @pytest.mark.p3
def test_update_chunk_to_deleted_document(self, api_key, add_chunks): def test_update_chunk_to_deleted_document(self, api_key, add_chunks):
dataset_id, document_id, chunk_ids = add_chunks dataset_id, document_id, chunk_ids = add_chunks
delete_documnets(api_key, dataset_id, {"ids": [document_id]})
delete_documents(api_key, dataset_id, {"ids": [document_id]})
res = update_chunk(api_key, dataset_id, document_id, chunk_ids[0]) res = update_chunk(api_key, dataset_id, document_id, chunk_ids[0])
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == f"Can't find this chunk {chunk_ids[0]}" assert res["message"] == f"Can't find this chunk {chunk_ids[0]}"

+ 8
- 4
test/testcases/test_http_api/test_file_management_within_dataset/conftest.py Прегледај датотеку





import pytest import pytest
from common import bulk_upload_documents, delete_documnets
from common import bulk_upload_documents, delete_documents




@pytest.fixture(scope="function") @pytest.fixture(scope="function")
document_ids = bulk_upload_documents(api_key, dataset_id, 1, ragflow_tmp_dir) document_ids = bulk_upload_documents(api_key, dataset_id, 1, ragflow_tmp_dir)


def cleanup(): def cleanup():
delete_documnets(api_key, dataset_id, {"ids": document_ids})
delete_documents(api_key, dataset_id, {"ids": document_ids})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)
return dataset_id, document_ids[0] return dataset_id, document_ids[0]
document_ids = bulk_upload_documents(api_key, dataset_id, 5, ragflow_tmp_dir) document_ids = bulk_upload_documents(api_key, dataset_id, 5, ragflow_tmp_dir)


def cleanup(): def cleanup():
delete_documnets(api_key, dataset_id, {"ids": document_ids})
delete_documents(api_key, dataset_id, {"ids": document_ids})


request.addfinalizer(cleanup) request.addfinalizer(cleanup)
return dataset_id, document_ids return dataset_id, document_ids




@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def add_documents_func(api_key, add_dataset_func, ragflow_tmp_dir):
def add_documents_func(request, api_key, add_dataset_func, ragflow_tmp_dir):
dataset_id = add_dataset_func dataset_id = add_dataset_func
document_ids = bulk_upload_documents(api_key, dataset_id, 3, ragflow_tmp_dir) document_ids = bulk_upload_documents(api_key, dataset_id, 3, ragflow_tmp_dir)


def cleanup():
delete_documents(api_key, dataset_id, {"ids": document_ids})

request.addfinalizer(cleanup)
return dataset_id, document_ids return dataset_id, document_ids

+ 21
- 21
test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py Прегледај датотеку

from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor


import pytest import pytest
from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documnets, list_documnets
from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documents, list_documents
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth




], ],
) )
def test_invalid_auth(self, invalid_auth, expected_code, expected_message): def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = delete_documnets(invalid_auth, "dataset_id")
res = delete_documents(invalid_auth, "dataset_id")
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = delete_documnets(api_key, dataset_id, payload)
res = delete_documents(api_key, dataset_id, payload)
assert res["code"] == expected_code assert res["code"] == expected_code
if res["code"] != 0: if res["code"] != 0:
assert res["message"] == expected_message assert res["message"] == expected_message


res = list_documnets(api_key, dataset_id)
res = list_documents(api_key, dataset_id)
assert len(res["data"]["docs"]) == remaining assert len(res["data"]["docs"]) == remaining
assert res["data"]["total"] == remaining assert res["data"]["total"] == remaining


) )
def test_invalid_dataset_id(self, api_key, add_documents_func, dataset_id, expected_code, expected_message): def test_invalid_dataset_id(self, api_key, add_documents_func, dataset_id, expected_code, expected_message):
_, document_ids = add_documents_func _, document_ids = add_documents_func
res = delete_documnets(api_key, dataset_id, {"ids": document_ids[:1]})
res = delete_documents(api_key, dataset_id, {"ids": document_ids[:1]})
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = delete_documnets(api_key, dataset_id, payload)
res = delete_documents(api_key, dataset_id, payload)
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "Documents not found: ['invalid_id']" assert res["message"] == "Documents not found: ['invalid_id']"


res = list_documnets(api_key, dataset_id)
res = list_documents(api_key, dataset_id)
assert len(res["data"]["docs"]) == 0 assert len(res["data"]["docs"]) == 0
assert res["data"]["total"] == 0 assert res["data"]["total"] == 0


@pytest.mark.p2 @pytest.mark.p2
def test_repeated_deletion(self, api_key, add_documents_func): def test_repeated_deletion(self, api_key, add_documents_func):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
res = delete_documnets(api_key, dataset_id, {"ids": document_ids})
res = delete_documents(api_key, dataset_id, {"ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0


res = delete_documnets(api_key, dataset_id, {"ids": document_ids})
res = delete_documents(api_key, dataset_id, {"ids": document_ids})
assert res["code"] == 102 assert res["code"] == 102
assert "Documents not found" in res["message"] assert "Documents not found" in res["message"]


@pytest.mark.p2 @pytest.mark.p2
def test_duplicate_deletion(self, api_key, add_documents_func): def test_duplicate_deletion(self, api_key, add_documents_func):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
res = delete_documnets(api_key, dataset_id, {"ids": document_ids + document_ids})
res = delete_documents(api_key, dataset_id, {"ids": document_ids + document_ids})
assert res["code"] == 0 assert res["code"] == 0
assert "Duplicate document ids" in res["data"]["errors"][0] assert "Duplicate document ids" in res["data"]["errors"][0]
assert res["data"]["success_count"] == 3 assert res["data"]["success_count"] == 3


res = list_documnets(api_key, dataset_id)
res = list_documents(api_key, dataset_id)
assert len(res["data"]["docs"]) == 0 assert len(res["data"]["docs"]) == 0
assert res["data"]["total"] == 0 assert res["data"]["total"] == 0




@pytest.mark.p3 @pytest.mark.p3
def test_concurrent_deletion(api_key, add_dataset, tmp_path): def test_concurrent_deletion(api_key, add_dataset, tmp_path):
documnets_num = 100
documents_num = 100
dataset_id = add_dataset dataset_id = add_dataset
document_ids = bulk_upload_documents(api_key, dataset_id, documnets_num, tmp_path)
document_ids = bulk_upload_documents(api_key, dataset_id, documents_num, tmp_path)


with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [
executor.submit( executor.submit(
delete_documnets,
delete_documents,
api_key, api_key,
dataset_id, dataset_id,
{"ids": document_ids[i : i + 1]}, {"ids": document_ids[i : i + 1]},
) )
for i in range(documnets_num)
for i in range(documents_num)
] ]
responses = [f.result() for f in futures] responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses) assert all(r["code"] == 0 for r in responses)


@pytest.mark.p3 @pytest.mark.p3
def test_delete_1k(api_key, add_dataset, tmp_path): def test_delete_1k(api_key, add_dataset, tmp_path):
documnets_num = 1_000
documents_num = 1_000
dataset_id = add_dataset dataset_id = add_dataset
document_ids = bulk_upload_documents(api_key, dataset_id, documnets_num, tmp_path)
res = list_documnets(api_key, dataset_id)
assert res["data"]["total"] == documnets_num
document_ids = bulk_upload_documents(api_key, dataset_id, documents_num, tmp_path)
res = list_documents(api_key, dataset_id)
assert res["data"]["total"] == documents_num


res = delete_documnets(api_key, dataset_id, {"ids": document_ids})
res = delete_documents(api_key, dataset_id, {"ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0


res = list_documnets(api_key, dataset_id)
res = list_documents(api_key, dataset_id)
assert res["data"]["total"] == 0 assert res["data"]["total"] == 0

+ 9
- 9
test/testcases/test_http_api/test_file_management_within_dataset/test_download_document.py Прегледај датотеку

# #


import json import json
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed


import pytest import pytest
from common import INVALID_API_TOKEN, bulk_upload_documents, download_document, upload_documnets
from common import INVALID_API_TOKEN, bulk_upload_documents, download_document, upload_documents
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
from requests import codes from requests import codes
from utils import compare_by_hash from utils import compare_by_hash
def test_file_type_validation(api_key, add_dataset, generate_test_files, request): def test_file_type_validation(api_key, add_dataset, generate_test_files, request):
dataset_id = add_dataset dataset_id = add_dataset
fp = generate_test_files[request.node.callspec.params["generate_test_files"]] fp = generate_test_files[request.node.callspec.params["generate_test_files"]]
res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
document_id = res["data"][0]["id"] document_id = res["data"][0]["id"]


res = download_document( res = download_document(


@pytest.mark.p3 @pytest.mark.p3
def test_concurrent_download(api_key, add_dataset, tmp_path): def test_concurrent_download(api_key, add_dataset, tmp_path):
document_count = 20
count = 20
dataset_id = add_dataset dataset_id = add_dataset
document_ids = bulk_upload_documents(api_key, dataset_id, document_count, tmp_path)
document_ids = bulk_upload_documents(api_key, dataset_id, count, tmp_path)


with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [
document_ids[i], document_ids[i],
tmp_path / f"ragflow_test_download_{i}.txt", tmp_path / f"ragflow_test_download_{i}.txt",
) )
for i in range(document_count)
for i in range(count)
] ]
responses = [f.result() for f in futures]
assert all(r.status_code == codes.ok for r in responses)
for i in range(document_count):
responses = list(as_completed(futures))
assert len(responses) == count, responses
for i in range(count):
assert compare_by_hash( assert compare_by_hash(
tmp_path / f"ragflow_test_upload_{i}.txt", tmp_path / f"ragflow_test_upload_{i}.txt",
tmp_path / f"ragflow_test_download_{i}.txt", tmp_path / f"ragflow_test_download_{i}.txt",

+ 14
- 14
test/testcases/test_http_api/test_file_management_within_dataset/test_list_documents.py Прегледај датотеку

from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor


import pytest import pytest
from common import INVALID_API_TOKEN, list_documnets
from common import INVALID_API_TOKEN, list_documents
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
from utils import is_sorted from utils import is_sorted


], ],
) )
def test_invalid_auth(self, invalid_auth, expected_code, expected_message): def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = list_documnets(invalid_auth, "dataset_id")
res = list_documents(invalid_auth, "dataset_id")
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


@pytest.mark.p1 @pytest.mark.p1
def test_default(self, api_key, add_documents): def test_default(self, api_key, add_documents):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id)
res = list_documents(api_key, dataset_id)
assert res["code"] == 0 assert res["code"] == 0
assert len(res["data"]["docs"]) == 5 assert len(res["data"]["docs"]) == 5
assert res["data"]["total"] == 5 assert res["data"]["total"] == 5
], ],
) )
def test_invalid_dataset_id(self, api_key, dataset_id, expected_code, expected_message): def test_invalid_dataset_id(self, api_key, dataset_id, expected_code, expected_message):
res = list_documnets(api_key, dataset_id)
res = list_documents(api_key, dataset_id)
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


expected_message, expected_message,
): ):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
assert len(res["data"]["docs"]) == expected_page_size assert len(res["data"]["docs"]) == expected_page_size
expected_message, expected_message,
): ):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
assert len(res["data"]["docs"]) == expected_page_size assert len(res["data"]["docs"]) == expected_page_size
expected_message, expected_message,
): ):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
if callable(assertions): if callable(assertions):
expected_message, expected_message,
): ):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
if callable(assertions): if callable(assertions):
) )
def test_keywords(self, api_key, add_documents, params, expected_num): def test_keywords(self, api_key, add_documents, params, expected_num):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == 0 assert res["code"] == 0
assert len(res["data"]["docs"]) == expected_num assert len(res["data"]["docs"]) == expected_num
assert res["data"]["total"] == expected_num assert res["data"]["total"] == expected_num
expected_message, expected_message,
): ):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
if params["name"] in [None, ""]: if params["name"] in [None, ""]:
params = {"id": document_id(document_ids)} params = {"id": document_id(document_ids)}
else: else:
params = {"id": document_id} params = {"id": document_id}
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)


assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
else: else:
params = {"id": document_id, "name": name} params = {"id": document_id, "name": name}


res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
if expected_code == 0: if expected_code == 0:
assert len(res["data"]["docs"]) == expected_num assert len(res["data"]["docs"]) == expected_num
else: else:
dataset_id, _ = add_documents dataset_id, _ = add_documents


with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(list_documnets, api_key, dataset_id) for i in range(100)]
futures = [executor.submit(list_documents, api_key, dataset_id) for i in range(100)]
responses = [f.result() for f in futures] responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses) assert all(r["code"] == 0 for r in responses)


def test_invalid_params(self, api_key, add_documents): def test_invalid_params(self, api_key, add_documents):
dataset_id, _ = add_documents dataset_id, _ = add_documents
params = {"a": "b"} params = {"a": "b"}
res = list_documnets(api_key, dataset_id, params=params)
res = list_documents(api_key, dataset_id, params=params)
assert res["code"] == 0 assert res["code"] == 0
assert len(res["data"]["docs"]) == 5 assert len(res["data"]["docs"]) == 5

+ 14
- 14
test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py Прегледај датотеку

from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor


import pytest import pytest
from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets
from common import INVALID_API_TOKEN, bulk_upload_documents, list_documents, parse_documents
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
from utils import wait_for from utils import wait_for




@wait_for(30, 1, "Document parsing timeout") @wait_for(30, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_ids=None): def condition(_auth, _dataset_id, _document_ids=None):
res = list_documnets(_auth, _dataset_id)
res = list_documents(_auth, _dataset_id)
target_docs = res["data"]["docs"] target_docs = res["data"]["docs"]


if _document_ids is None: if _document_ids is None:


def validate_document_details(auth, dataset_id, document_ids): def validate_document_details(auth, dataset_id, document_ids):
for document_id in document_ids: for document_id in document_ids:
res = list_documnets(auth, dataset_id, params={"id": document_id})
res = list_documents(auth, dataset_id, params={"id": document_id})
doc = res["data"]["docs"][0] doc = res["data"]["docs"][0]
assert doc["run"] == "DONE" assert doc["run"] == "DONE"
assert len(doc["process_begin_at"]) > 0 assert len(doc["process_begin_at"]) > 0
], ],
) )
def test_invalid_auth(self, invalid_auth, expected_code, expected_message): def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = parse_documnets(invalid_auth, "dataset_id")
res = parse_documents(invalid_auth, "dataset_id")
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = parse_documnets(api_key, dataset_id, payload)
res = parse_documents(api_key, dataset_id, payload)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code != 0: if expected_code != 0:
assert res["message"] == expected_message assert res["message"] == expected_message
expected_message, expected_message,
): ):
_, document_ids = add_documents_func _, document_ids = add_documents_func
res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = parse_documnets(api_key, dataset_id, payload)
res = parse_documents(api_key, dataset_id, payload)
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "Documents not found: ['invalid_id']" assert res["message"] == "Documents not found: ['invalid_id']"


@pytest.mark.p3 @pytest.mark.p3
def test_repeated_parse(self, api_key, add_documents_func): def test_repeated_parse(self, api_key, add_documents_func):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0


condition(api_key, dataset_id) condition(api_key, dataset_id)


res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0


@pytest.mark.p3 @pytest.mark.p3
def test_duplicate_parse(self, api_key, add_documents_func): def test_duplicate_parse(self, api_key, add_documents_func):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids + document_ids})
res = parse_documents(api_key, dataset_id, {"document_ids": document_ids + document_ids})
assert res["code"] == 0 assert res["code"] == 0
assert "Duplicate document ids" in res["data"]["errors"][0] assert "Duplicate document ids" in res["data"]["errors"][0]
assert res["data"]["success_count"] == 3 assert res["data"]["success_count"] == 3
def test_parse_100_files(api_key, add_dataset_func, tmp_path): def test_parse_100_files(api_key, add_dataset_func, tmp_path):
@wait_for(100, 1, "Document parsing timeout") @wait_for(100, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_num): def condition(_auth, _dataset_id, _document_num):
res = list_documnets(_auth, _dataset_id, {"page_size": _document_num})
res = list_documents(_auth, _dataset_id, {"page_size": _document_num})
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:
if doc["run"] != "DONE": if doc["run"] != "DONE":
return False return False
document_num = 100 document_num = 100
dataset_id = add_dataset_func dataset_id = add_dataset_func
document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path)
res = parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0


condition(api_key, dataset_id, document_num) condition(api_key, dataset_id, document_num)
def test_concurrent_parse(api_key, add_dataset_func, tmp_path): def test_concurrent_parse(api_key, add_dataset_func, tmp_path):
@wait_for(120, 1, "Document parsing timeout") @wait_for(120, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_num): def condition(_auth, _dataset_id, _document_num):
res = list_documnets(_auth, _dataset_id, {"page_size": _document_num})
res = list_documents(_auth, _dataset_id, {"page_size": _document_num})
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:
if doc["run"] != "DONE": if doc["run"] != "DONE":
return False return False
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [
executor.submit( executor.submit(
parse_documnets,
parse_documents,
api_key, api_key,
dataset_id, dataset_id,
{"document_ids": document_ids[i : i + 1]}, {"document_ids": document_ids[i : i + 1]},

+ 20
- 20
test/testcases/test_http_api/test_file_management_within_dataset/test_stop_parse_documents.py Прегледај датотеку

from time import sleep from time import sleep


import pytest import pytest
from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets, stop_parse_documnets
from common import INVALID_API_TOKEN, bulk_upload_documents, list_documents, parse_documents, stop_parse_documents
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
from utils import wait_for from utils import wait_for




def validate_document_parse_done(auth, dataset_id, document_ids): def validate_document_parse_done(auth, dataset_id, document_ids):
for document_id in document_ids: for document_id in document_ids:
res = list_documnets(auth, dataset_id, params={"id": document_id})
res = list_documents(auth, dataset_id, params={"id": document_id})
doc = res["data"]["docs"][0] doc = res["data"]["docs"][0]
assert doc["run"] == "DONE" assert doc["run"] == "DONE"
assert len(doc["process_begin_at"]) > 0 assert len(doc["process_begin_at"]) > 0


def validate_document_parse_cancel(auth, dataset_id, document_ids): def validate_document_parse_cancel(auth, dataset_id, document_ids):
for document_id in document_ids: for document_id in document_ids:
res = list_documnets(auth, dataset_id, params={"id": document_id})
res = list_documents(auth, dataset_id, params={"id": document_id})
doc = res["data"]["docs"][0] doc = res["data"]["docs"][0]
assert doc["run"] == "CANCEL" assert doc["run"] == "CANCEL"
assert len(doc["process_begin_at"]) > 0 assert len(doc["process_begin_at"]) > 0
], ],
) )
def test_invalid_auth(self, invalid_auth, expected_code, expected_message): def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = stop_parse_documnets(invalid_auth, "dataset_id")
res = stop_parse_documents(invalid_auth, "dataset_id")
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


@wait_for(10, 1, "Document parsing timeout") @wait_for(10, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_ids): def condition(_auth, _dataset_id, _document_ids):
for _document_id in _document_ids: for _document_id in _document_ids:
res = list_documnets(_auth, _dataset_id, {"id": _document_id})
res = list_documents(_auth, _dataset_id, {"id": _document_id})
if res["data"]["docs"][0]["run"] != "DONE": if res["data"]["docs"][0]["run"] != "DONE":
return False return False
return True return True


dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})


if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)


res = stop_parse_documnets(api_key, dataset_id, payload)
res = stop_parse_documents(api_key, dataset_id, payload)
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code != 0: if expected_code != 0:
assert res["message"] == expected_message assert res["message"] == expected_message
expected_message, expected_message,
): ):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documnets(api_key, invalid_dataset_id, {"document_ids": document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documents(api_key, invalid_dataset_id, {"document_ids": document_ids})
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


) )
def test_stop_parse_partial_invalid_document_id(self, api_key, add_documents_func, payload): def test_stop_parse_partial_invalid_document_id(self, api_key, add_documents_func, payload):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})


if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = stop_parse_documnets(api_key, dataset_id, payload)
res = stop_parse_documents(api_key, dataset_id, payload)
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "You don't own the document invalid_id." assert res["message"] == "You don't own the document invalid_id."


@pytest.mark.p3 @pytest.mark.p3
def test_repeated_stop_parse(self, api_key, add_documents_func): def test_repeated_stop_parse(self, api_key, add_documents_func):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0


res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "Can't stop parsing document with progress at 0 or 1" assert res["message"] == "Can't stop parsing document with progress at 0 or 1"


@pytest.mark.p3 @pytest.mark.p3
def test_duplicate_stop_parse(self, api_key, add_documents_func): def test_duplicate_stop_parse(self, api_key, add_documents_func):
dataset_id, document_ids = add_documents_func dataset_id, document_ids = add_documents_func
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids + document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids + document_ids})
assert res["code"] == 0 assert res["code"] == 0
assert res["data"]["success_count"] == 3 assert res["data"]["success_count"] == 3
assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"]
document_num = 100 document_num = 100
dataset_id = add_dataset_func dataset_id = add_dataset_func
document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path)
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})
sleep(1) sleep(1)
res = stop_parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
res = stop_parse_documents(api_key, dataset_id, {"document_ids": document_ids})
assert res["code"] == 0 assert res["code"] == 0
validate_document_parse_cancel(api_key, dataset_id, document_ids) validate_document_parse_cancel(api_key, dataset_id, document_ids)


document_num = 50 document_num = 50
dataset_id = add_dataset_func dataset_id = add_dataset_func
document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path) document_ids = bulk_upload_documents(api_key, dataset_id, document_num, tmp_path)
parse_documnets(api_key, dataset_id, {"document_ids": document_ids})
parse_documents(api_key, dataset_id, {"document_ids": document_ids})


with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [
executor.submit( executor.submit(
stop_parse_documnets,
stop_parse_documents,
api_key, api_key,
dataset_id, dataset_id,
{"document_ids": document_ids[i : i + 1]}, {"document_ids": document_ids[i : i + 1]},

+ 13
- 13
test/testcases/test_http_api/test_file_management_within_dataset/test_update_document.py Прегледај датотеку





import pytest import pytest
from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, list_documnets, update_documnet
from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, list_documents, update_document
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth




], ],
) )
def test_invalid_auth(self, invalid_auth, expected_code, expected_message): def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = update_documnet(invalid_auth, "dataset_id", "document_id")
res = update_document(invalid_auth, "dataset_id", "document_id")
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


) )
def test_name(self, api_key, add_documents, name, expected_code, expected_message): def test_name(self, api_key, add_documents, name, expected_code, expected_message):
dataset_id, document_ids = add_documents dataset_id, document_ids = add_documents
res = update_documnet(api_key, dataset_id, document_ids[0], {"name": name})
res = update_document(api_key, dataset_id, document_ids[0], {"name": name})
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
res = list_documnets(api_key, dataset_id, {"id": document_ids[0]})
res = list_documents(api_key, dataset_id, {"id": document_ids[0]})
assert res["data"]["docs"][0]["name"] == name assert res["data"]["docs"][0]["name"] == name
else: else:
assert res["message"] == expected_message assert res["message"] == expected_message
) )
def test_invalid_document_id(self, api_key, add_documents, document_id, expected_code, expected_message): def test_invalid_document_id(self, api_key, add_documents, document_id, expected_code, expected_message):
dataset_id, _ = add_documents dataset_id, _ = add_documents
res = update_documnet(api_key, dataset_id, document_id, {"name": "new_name.txt"})
res = update_document(api_key, dataset_id, document_id, {"name": "new_name.txt"})
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


) )
def test_invalid_dataset_id(self, api_key, add_documents, dataset_id, expected_code, expected_message): def test_invalid_dataset_id(self, api_key, add_documents, dataset_id, expected_code, expected_message):
_, document_ids = add_documents _, document_ids = add_documents
res = update_documnet(api_key, dataset_id, document_ids[0], {"name": "new_name.txt"})
res = update_document(api_key, dataset_id, document_ids[0], {"name": "new_name.txt"})
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


) )
def test_meta_fields(self, api_key, add_documents, meta_fields, expected_code, expected_message): def test_meta_fields(self, api_key, add_documents, meta_fields, expected_code, expected_message):
dataset_id, document_ids = add_documents dataset_id, document_ids = add_documents
res = update_documnet(api_key, dataset_id, document_ids[0], {"meta_fields": meta_fields})
res = update_document(api_key, dataset_id, document_ids[0], {"meta_fields": meta_fields})
if expected_code == 0: if expected_code == 0:
res = list_documnets(api_key, dataset_id, {"id": document_ids[0]})
res = list_documents(api_key, dataset_id, {"id": document_ids[0]})
assert res["data"]["docs"][0]["meta_fields"] == meta_fields assert res["data"]["docs"][0]["meta_fields"] == meta_fields
else: else:
assert res["message"] == expected_message assert res["message"] == expected_message
) )
def test_chunk_method(self, api_key, add_documents, chunk_method, expected_code, expected_message): def test_chunk_method(self, api_key, add_documents, chunk_method, expected_code, expected_message):
dataset_id, document_ids = add_documents dataset_id, document_ids = add_documents
res = update_documnet(api_key, dataset_id, document_ids[0], {"chunk_method": chunk_method})
res = update_document(api_key, dataset_id, document_ids[0], {"chunk_method": chunk_method})
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
res = list_documnets(api_key, dataset_id, {"id": document_ids[0]})
res = list_documents(api_key, dataset_id, {"id": document_ids[0]})
if chunk_method != "": if chunk_method != "":
assert res["data"]["docs"][0]["chunk_method"] == chunk_method assert res["data"]["docs"][0]["chunk_method"] == chunk_method
else: else:
expected_message, expected_message,
): ):
dataset_id, document_ids = add_documents dataset_id, document_ids = add_documents
res = update_documnet(api_key, dataset_id, document_ids[0], payload)
res = update_document(api_key, dataset_id, document_ids[0], payload)
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


expected_message, expected_message,
): ):
dataset_id, document_ids = add_documents dataset_id, document_ids = add_documents
res = update_documnet(
res = update_document(
api_key, api_key,
dataset_id, dataset_id,
document_ids[0], document_ids[0],
) )
assert res["code"] == expected_code assert res["code"] == expected_code
if expected_code == 0: if expected_code == 0:
res = list_documnets(api_key, dataset_id, {"id": document_ids[0]})
res = list_documents(api_key, dataset_id, {"id": document_ids[0]})
if parser_config != {}: if parser_config != {}:
for k, v in parser_config.items(): for k, v in parser_config.items():
assert res["data"]["docs"][0]["parser_config"][k] == v assert res["data"]["docs"][0]["parser_config"][k] == v

+ 21
- 21
test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py Прегледај датотеку

# #


import string import string
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed


import pytest import pytest
import requests import requests
from common import DOCUMENT_NAME_LIMIT, FILE_API_URL, HOST_ADDRESS, INVALID_API_TOKEN, list_datasets, upload_documnets
from common import DOCUMENT_NAME_LIMIT, FILE_API_URL, HOST_ADDRESS, INVALID_API_TOKEN, list_datasets, upload_documents
from libs.auth import RAGFlowHttpApiAuth from libs.auth import RAGFlowHttpApiAuth
from requests_toolbelt import MultipartEncoder from requests_toolbelt import MultipartEncoder
from utils.file_utils import create_txt_file from utils.file_utils import create_txt_file
], ],
) )
def test_invalid_auth(self, invalid_auth, expected_code, expected_message): def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = upload_documnets(invalid_auth, "dataset_id")
res = upload_documents(invalid_auth, "dataset_id")
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message


def test_valid_single_upload(self, api_key, add_dataset_func, tmp_path): def test_valid_single_upload(self, api_key, add_dataset_func, tmp_path):
dataset_id = add_dataset_func dataset_id = add_dataset_func
fp = create_txt_file(tmp_path / "ragflow_test.txt") fp = create_txt_file(tmp_path / "ragflow_test.txt")
res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 0 assert res["code"] == 0
assert res["data"][0]["dataset_id"] == dataset_id assert res["data"][0]["dataset_id"] == dataset_id
assert res["data"][0]["name"] == fp.name assert res["data"][0]["name"] == fp.name
def test_file_type_validation(self, api_key, add_dataset_func, generate_test_files, request): def test_file_type_validation(self, api_key, add_dataset_func, generate_test_files, request):
dataset_id = add_dataset_func dataset_id = add_dataset_func
fp = generate_test_files[request.node.callspec.params["generate_test_files"]] fp = generate_test_files[request.node.callspec.params["generate_test_files"]]
res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 0 assert res["code"] == 0
assert res["data"][0]["dataset_id"] == dataset_id assert res["data"][0]["dataset_id"] == dataset_id
assert res["data"][0]["name"] == fp.name assert res["data"][0]["name"] == fp.name
dataset_id = add_dataset_func dataset_id = add_dataset_func
fp = tmp_path / f"ragflow_test.{file_type}" fp = tmp_path / f"ragflow_test.{file_type}"
fp.touch() fp.touch()
res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 500 assert res["code"] == 500
assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!" assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!"


@pytest.mark.p2 @pytest.mark.p2
def test_missing_file(self, api_key, add_dataset_func): def test_missing_file(self, api_key, add_dataset_func):
dataset_id = add_dataset_func dataset_id = add_dataset_func
res = upload_documnets(api_key, dataset_id)
res = upload_documents(api_key, dataset_id)
assert res["code"] == 101 assert res["code"] == 101
assert res["message"] == "No file part!" assert res["message"] == "No file part!"


fp = tmp_path / "empty.txt" fp = tmp_path / "empty.txt"
fp.touch() fp.touch()


res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 0 assert res["code"] == 0
assert res["data"][0]["size"] == 0 assert res["data"][0]["size"] == 0


dataset_id = add_dataset_func dataset_id = add_dataset_func
# filename_length = 129 # filename_length = 129
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 101 assert res["code"] == 101
assert res["message"] == "File name should be less than 128 bytes." assert res["message"] == "File name should be less than 128 bytes."


@pytest.mark.p2 @pytest.mark.p2
def test_invalid_dataset_id(self, api_key, tmp_path): def test_invalid_dataset_id(self, api_key, tmp_path):
fp = create_txt_file(tmp_path / "ragflow_test.txt") fp = create_txt_file(tmp_path / "ragflow_test.txt")
res = upload_documnets(api_key, "invalid_dataset_id", [fp])
res = upload_documents(api_key, "invalid_dataset_id", [fp])
assert res["code"] == 100 assert res["code"] == 100
assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")""" assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")"""


def test_duplicate_files(self, api_key, add_dataset_func, tmp_path): def test_duplicate_files(self, api_key, add_dataset_func, tmp_path):
dataset_id = add_dataset_func dataset_id = add_dataset_func
fp = create_txt_file(tmp_path / "ragflow_test.txt") fp = create_txt_file(tmp_path / "ragflow_test.txt")
res = upload_documnets(api_key, dataset_id, [fp, fp])
res = upload_documents(api_key, dataset_id, [fp, fp])
assert res["code"] == 0 assert res["code"] == 0
assert len(res["data"]) == 2 assert len(res["data"]) == 2
for i in range(len(res["data"])): for i in range(len(res["data"])):
def test_same_file_repeat(self, api_key, add_dataset_func, tmp_path): def test_same_file_repeat(self, api_key, add_dataset_func, tmp_path):
dataset_id = add_dataset_func dataset_id = add_dataset_func
fp = create_txt_file(tmp_path / "ragflow_test.txt") fp = create_txt_file(tmp_path / "ragflow_test.txt")
for i in range(10):
res = upload_documnets(api_key, dataset_id, [fp])
for i in range(3):
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 0 assert res["code"] == 0
assert len(res["data"]) == 1 assert len(res["data"]) == 1
assert res["data"][0]["dataset_id"] == dataset_id assert res["data"][0]["dataset_id"] == dataset_id
fp = tmp_path / f"{safe_filename}.txt" fp = tmp_path / f"{safe_filename}.txt"
fp.write_text("Sample text content") fp.write_text("Sample text content")


res = upload_documnets(api_key, dataset_id, [fp])
res = upload_documents(api_key, dataset_id, [fp])
assert res["code"] == 0 assert res["code"] == 0
assert len(res["data"]) == 1 assert len(res["data"]) == 1
assert res["data"][0]["dataset_id"] == dataset_id assert res["data"][0]["dataset_id"] == dataset_id
for i in range(expected_document_count): for i in range(expected_document_count):
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
fps.append(fp) fps.append(fp)
res = upload_documnets(api_key, dataset_id, fps)
res = upload_documents(api_key, dataset_id, fps)
assert res["code"] == 0 assert res["code"] == 0


res = list_datasets(api_key, {"id": dataset_id}) res = list_datasets(api_key, {"id": dataset_id})
def test_concurrent_upload(self, api_key, add_dataset_func, tmp_path): def test_concurrent_upload(self, api_key, add_dataset_func, tmp_path):
dataset_id = add_dataset_func dataset_id = add_dataset_func


expected_document_count = 20
count = 20
fps = [] fps = []
for i in range(expected_document_count):
for i in range(count):
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt") fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
fps.append(fp) fps.append(fp)


with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(upload_documnets, api_key, dataset_id, fps[i : i + 1]) for i in range(expected_document_count)]
responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses)
futures = [executor.submit(upload_documents, api_key, dataset_id, fps[i : i + 1]) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses


res = list_datasets(api_key, {"id": dataset_id}) res = list_datasets(api_key, {"id": dataset_id})
assert res["data"][0]["document_count"] == expected_document_count
assert res["data"][0]["document_count"] == count

Loading…
Откажи
Сачувај