瀏覽代碼

Add more web test cases (#3702)

### What problem does this PR solve?

Test cases about dataset

### Type of change

- [x] Other (please describe): test cases

---------

Signed-off-by: jinhai <haijin.chn@gmail.com>
tags/v0.14.1
Jin Hai 11 月之前
父節點
當前提交
9fcad0500d
沒有連結到貢獻者的電子郵件帳戶。

+ 20
- 7
api/apps/kb_app.py 查看文件

from api.utils.api_utils import get_json_result from api.utils.api_utils import get_json_result
from api import settings from api import settings
from rag.nlp import search from rag.nlp import search
from api.constants import DATASET_NAME_LIMIT




@manager.route('/create', methods=['post']) @manager.route('/create', methods=['post'])
@validate_request("name") @validate_request("name")
def create(): def create():
req = request.json req = request.json
req["name"] = req["name"].strip()
req["name"] = duplicate_name(
dataset_name = req["name"]
if not isinstance(dataset_name, str):
return get_data_error_result(message="Dataset name must be string.")
if dataset_name == "":
return get_data_error_result(message="Dataset name can't be empty.")
if len(dataset_name) >= DATASET_NAME_LIMIT:
return get_data_error_result(
message=f"Dataset name length is {len(dataset_name)} which is large than {DATASET_NAME_LIMIT}")

dataset_name = dataset_name.strip()
dataset_name = duplicate_name(
KnowledgebaseService.query, KnowledgebaseService.query,
name=req["name"],
name=dataset_name,
tenant_id=current_user.id, tenant_id=current_user.id,
status=StatusEnum.VALID.value) status=StatusEnum.VALID.value)
try: try:
if not KnowledgebaseService.query( if not KnowledgebaseService.query(
created_by=current_user.id, id=req["kb_id"]): created_by=current_user.id, id=req["kb_id"]):
return get_json_result( return get_json_result(
data=False, message='Only owner of knowledgebase authorized for this operation.', code=settings.RetCode.OPERATING_ERROR)
data=False, message='Only owner of knowledgebase authorized for this operation.',
code=settings.RetCode.OPERATING_ERROR)


e, kb = KnowledgebaseService.get_by_id(req["kb_id"]) e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
if not e: if not e:
message="Can't find this knowledgebase!") message="Can't find this knowledgebase!")


if req["name"].lower() != kb.name.lower() \ if req["name"].lower() != kb.name.lower() \
and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
and len(
KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
return get_data_error_result( return get_data_error_result(
message="Duplicated knowledgebase name.") message="Duplicated knowledgebase name.")


) )
try: try:
kbs = KnowledgebaseService.query( kbs = KnowledgebaseService.query(
created_by=current_user.id, id=req["kb_id"])
created_by=current_user.id, id=req["kb_id"])
if not kbs: if not kbs:
return get_json_result( return get_json_result(
data=False, message='Only owner of knowledgebase authorized for this operation.', code=settings.RetCode.OPERATING_ERROR)
data=False, message='Only owner of knowledgebase authorized for this operation.',
code=settings.RetCode.OPERATING_ERROR)


for doc in DocumentService.query(kb_id=req["kb_id"]): for doc in DocumentService.query(kb_id=req["kb_id"]):
if not DocumentService.remove_document(doc, kbs[0].tenant_id): if not DocumentService.remove_document(doc, kbs[0].tenant_id):

+ 2
- 0
api/constants.py 查看文件

RAG_FLOW_SERVICE_NAME = "ragflow" RAG_FLOW_SERVICE_NAME = "ragflow"
REQUEST_WAIT_SEC = 2 REQUEST_WAIT_SEC = 2
REQUEST_MAX_WAIT_SEC = 300 REQUEST_MAX_WAIT_SEC = 300

DATASET_NAME_LIMIT = 128

+ 3
- 1
rag/utils/infinity_conn.py 查看文件

table_name = f"{indexName}_{knowledgebaseId}" table_name = f"{indexName}_{knowledgebaseId}"
table_instance = db_instance.get_table(table_name) table_instance = db_instance.get_table(table_name)
kb_res = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl() kb_res = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl()
df_list.append(kb_res)
if len(kb_res) != 0 and kb_res.shape[0] > 0:
df_list.append(kb_res)

self.connPool.release_conn(inf_conn) self.connPool.release_conn(inf_conn)
res = concat_dataframes(df_list, ["id"]) res = concat_dataframes(df_list, ["id"])
res_fields = self.getFields(res, res.columns) res_fields = self.getFields(res, res.columns)

+ 8
- 0
sdk/python/test/test_frontend_api/common.py 查看文件



HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380') HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')


DATASET_NAME_LIMIT = 128

def create_dataset(auth, dataset_name): def create_dataset(auth, dataset_name):
authorization = {"Authorization": auth} authorization = {"Authorization": auth}
url = f"{HOST_ADDRESS}/v1/kb/create" url = f"{HOST_ADDRESS}/v1/kb/create"
json = {"kb_id": dataset_id} json = {"kb_id": dataset_id}
res = requests.post(url=url, headers=authorization, json=json) res = requests.post(url=url, headers=authorization, json=json)
return res.json() return res.json()

def update_dataset(auth, json_req):
authorization = {"Authorization": auth}
url = f"{HOST_ADDRESS}/v1/kb/update"
res = requests.post(url=url, headers=authorization, json=json_req)
return res.json()

+ 77
- 7
sdk/python/test/test_frontend_api/test_dataset.py 查看文件

from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset
import requests

from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT
import re
import pytest
import random
import string


def test_dataset(get_auth): def test_dataset(get_auth):
# create dataset # create dataset
assert res.get("code") == 0, f"{res.get('message')}" assert res.get("code") == 0, f"{res.get('message')}"
print(f"{len(dataset_list)} datasets are deleted") print(f"{len(dataset_list)} datasets are deleted")


# delete dataset
# create invalid name dataset
def test_duplicated_name_dataset(get_auth):
# create dataset
for i in range(20):
res = create_dataset(get_auth, "test_create_dataset")
assert res.get("code") == 0, f"{res.get('message')}"

# list dataset
res = list_dataset(get_auth, 1)
data = res.get("data")
dataset_list = []
pattern = r'^test_create_dataset.*'
for item in data:
dataset_name = item.get("name")
dataset_id = item.get("id")
dataset_list.append(dataset_id)
match = re.match(pattern, dataset_name)
assert match != None

for dataset_id in dataset_list:
res = rm_dataset(get_auth, dataset_id)
assert res.get("code") == 0, f"{res.get('message')}"
print(f"{len(dataset_list)} datasets are deleted")

def test_invalid_name_dataset(get_auth):
# create dataset
# with pytest.raises(Exception) as e:
res = create_dataset(get_auth, 0)
assert res['code'] == 102

res = create_dataset(get_auth, "")
assert res['code'] == 102

long_string = ""

while len(long_string) <= DATASET_NAME_LIMIT:
long_string += random.choice(string.ascii_letters + string.digits)

res = create_dataset(get_auth, long_string)
assert res['code'] == 102
print(res)

def test_update_different_params_dataset(get_auth):
# create dataset
res = create_dataset(get_auth, "test_create_dataset")
assert res.get("code") == 0, f"{res.get('message')}"

# list dataset
page_number = 1
dataset_list = []
while True:
res = list_dataset(get_auth, page_number)
data = res.get("data")
for item in data:
dataset_id = item.get("id")
dataset_list.append(dataset_id)
if len(dataset_list) < page_number * 150:
break
page_number += 1

print(f"found {len(dataset_list)} datasets")
dataset_id = dataset_list[0]

json_req = {"kb_id": dataset_id, "name": "test_update_dataset", "description": "test", "permission": "me", "parser_id": "presentation"}
res = update_dataset(get_auth, json_req)
assert res.get("code") == 0, f"{res.get('message')}"

# delete dataset
for dataset_id in dataset_list:
res = rm_dataset(get_auth, dataset_id)
assert res.get("code") == 0, f"{res.get('message')}"
print(f"{len(dataset_list)} datasets are deleted")

# update dataset with different parameters # update dataset with different parameters
# create duplicated name dataset
#

printEnvironment.sh → show_env.sh 查看文件

echo "$distro_id $distro_version (Kernel version: $kernel_version)" echo "$distro_id $distro_version (Kernel version: $kernel_version)"
} }


# get Git repo name
# get Git repository name
git_repo_name='' git_repo_name=''
if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
git_repo_name=$(basename "$(git rev-parse --show-toplevel)") git_repo_name=$(basename "$(git rev-parse --show-toplevel)")
python_version="Python not installed" python_version="Python not installed"
fi fi


# Print all infomation
echo "Current Repo: $git_repo_name"
# Print all information
echo "Current Repository: $git_repo_name"


# get Commit ID # get Commit ID
git_version=$(git log -1 --pretty=format:'%h') git_version=$(git log -1 --pretty=format:'%h')

Loading…
取消
儲存