您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

test_dataset.py 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. from test_sdkbase import TestSdk
  2. from ragflow import RAGFlow
  3. import pytest
  4. from common import API_KEY, HOST_ADDRESS
  5. from api.contants import NAME_LENGTH_LIMIT
  6. class TestDataset(TestSdk):
  7. """
  8. This class contains a suite of tests for the dataset management functionality within the RAGFlow system.
  9. It ensures that the following functionalities as expected:
  10. 1. create a kb
  11. 2. list the kb
  12. 3. get the detail info according to the kb id
  13. 4. update the kb
  14. 5. delete the kb
  15. """
  16. # -----------------------create_dataset---------------------------------
  17. def test_create_dataset_with_success(self):
  18. """
  19. Test the creation of a new dataset with success.
  20. """
  21. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  22. # create a kb
  23. res = ragflow.create_dataset("kb1")
  24. assert res['code'] == 0 and res['message'] == 'success'
  25. def test_create_dataset_with_empty_name(self):
  26. """
  27. Test the creation of a new dataset with an empty name.
  28. """
  29. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  30. res = ragflow.create_dataset("")
  31. assert res['message'] == 'Empty dataset name' and res['code'] == 102
  32. def test_create_dataset_with_name_exceeding_limit(self):
  33. """
  34. Test the creation of a new dataset with the length of name exceeding the limit.
  35. """
  36. name = "k" * NAME_LENGTH_LIMIT + "b"
  37. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  38. res = ragflow.create_dataset(name)
  39. assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
  40. and res['code'] == 102)
  41. def test_create_dataset_name_with_space_in_the_middle(self):
  42. """
  43. Test the creation of a new dataset whose name has space in the middle.
  44. """
  45. name = "k b"
  46. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  47. res = ragflow.create_dataset(name)
  48. assert (res['code'] == 0 and res['message'] == 'success')
  49. def test_create_dataset_name_with_space_in_the_head(self):
  50. """
  51. Test the creation of a new dataset whose name has space in the head.
  52. """
  53. name = " kb"
  54. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  55. res = ragflow.create_dataset(name)
  56. assert (res['code'] == 0 and res['message'] == 'success')
  57. def test_create_dataset_name_with_space_in_the_tail(self):
  58. """
  59. Test the creation of a new dataset whose name has space in the tail.
  60. """
  61. name = "kb "
  62. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  63. res = ragflow.create_dataset(name)
  64. assert (res['code'] == 0 and res['message'] == 'success')
  65. def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
  66. """
  67. Test the creation of a new dataset whose name has space in the head and tail,
  68. and the length of the name exceeds the limit.
  69. """
  70. name = " " + "k" * NAME_LENGTH_LIMIT + " "
  71. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  72. res = ragflow.create_dataset(name)
  73. assert (res['code'] == 0 and res['message'] == 'success')
  74. def test_create_dataset_with_two_same_name(self):
  75. """
  76. Test the creation of two new datasets with the same name.
  77. """
  78. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  79. res = ragflow.create_dataset("kb")
  80. assert (res['code'] == 0 and res['message'] == 'success')
  81. res = ragflow.create_dataset("kb")
  82. assert (res['code'] == 0 and res['message'] == 'success')
  83. def test_create_dataset_with_only_space_in_the_name(self):
  84. """
  85. Test the creation of a dataset whose name only has space.
  86. """
  87. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  88. res = ragflow.create_dataset(" ")
  89. assert (res['code'] == 0 and res['message'] == 'success')
  90. def test_create_dataset_with_space_number_exceeding_limit(self):
  91. """
  92. Test the creation of a dataset with a name that only has space exceeds the allowed limit.
  93. """
  94. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  95. name = " " * NAME_LENGTH_LIMIT
  96. res = ragflow.create_dataset(name)
  97. assert (res['code'] == 0 and res['message'] == 'success')
  98. def test_create_dataset_with_name_having_return(self):
  99. """
  100. Test the creation of a dataset with a name that has return symbol.
  101. """
  102. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  103. name = "kb\n"
  104. res = ragflow.create_dataset(name)
  105. assert (res['code'] == 0 and res['message'] == 'success')
  106. def test_create_dataset_with_name_having_the_null_character(self):
  107. """
  108. Test the creation of a dataset with a name that has the null character.
  109. """
  110. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  111. name = "kb\0"
  112. res = ragflow.create_dataset(name)
  113. assert (res['code'] == 0 and res['message'] == 'success')
  114. # -----------------------list_dataset---------------------------------
  115. def test_list_dataset_success(self):
  116. """
  117. Test listing datasets with a successful outcome.
  118. """
  119. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  120. # Call the list_datasets method
  121. response = ragflow.list_dataset()
  122. code, datasets = response
  123. assert code == 200
  124. def test_list_dataset_with_checking_size_and_name(self):
  125. """
  126. Test listing datasets and verify the size and names of the datasets.
  127. """
  128. datasets_to_create = ["dataset1", "dataset2", "dataset3"]
  129. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  130. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  131. real_name_to_create = set()
  132. for response in created_response:
  133. assert 'data' in response, "Response is missing 'data' key"
  134. dataset_name = response['data']['dataset_name']
  135. real_name_to_create.add(dataset_name)
  136. status_code, listed_data = ragflow.list_dataset(0, 3)
  137. listed_data = listed_data['data']
  138. listed_names = {d['name'] for d in listed_data}
  139. assert listed_names == real_name_to_create
  140. assert status_code == 200
  141. assert len(listed_data) == len(datasets_to_create)
  142. def test_list_dataset_with_getting_empty_result(self):
  143. """
  144. Test listing datasets that should be empty.
  145. """
  146. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  147. datasets_to_create = []
  148. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  149. real_name_to_create = set()
  150. for response in created_response:
  151. assert 'data' in response, "Response is missing 'data' key"
  152. dataset_name = response['data']['dataset_name']
  153. real_name_to_create.add(dataset_name)
  154. status_code, listed_data = ragflow.list_dataset(0, 0)
  155. listed_data = listed_data['data']
  156. listed_names = {d['name'] for d in listed_data}
  157. assert listed_names == real_name_to_create
  158. assert status_code == 200
  159. assert len(listed_data) == 0
  160. def test_list_dataset_with_creating_100_knowledge_bases(self):
  161. """
  162. Test listing 100 datasets and verify the size and names of these datasets.
  163. """
  164. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  165. datasets_to_create = ["dataset1"] * 100
  166. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  167. real_name_to_create = set()
  168. for response in created_response:
  169. assert 'data' in response, "Response is missing 'data' key"
  170. dataset_name = response['data']['dataset_name']
  171. real_name_to_create.add(dataset_name)
  172. status_code, listed_data = ragflow.list_dataset(0, 100)
  173. listed_data = listed_data['data']
  174. listed_names = {d['name'] for d in listed_data}
  175. assert listed_names == real_name_to_create
  176. assert status_code == 200
  177. assert len(listed_data) == 100
  178. def test_list_dataset_with_showing_one_dataset(self):
  179. """
  180. Test listing one dataset and verify the size of the dataset.
  181. """
  182. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  183. response = ragflow.list_dataset(0, 1)
  184. code, response = response
  185. datasets = response['data']
  186. assert len(datasets) == 1
  187. def test_list_dataset_failure(self):
  188. """
  189. Test listing datasets with IndexError.
  190. """
  191. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  192. response = ragflow.list_dataset(-1, -1)
  193. _, res = response
  194. assert "IndexError" in res['message']
  195. def test_list_dataset_for_empty_datasets(self):
  196. """
  197. Test listing datasets when the datasets are empty.
  198. """
  199. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  200. response = ragflow.list_dataset()
  201. code, response = response
  202. datasets = response['data']
  203. assert len(datasets) == 0
  204. # TODO: have to set the limitation of the number of datasets
  205. # -----------------------delete_dataset---------------------------------
  206. def test_delete_one_dataset_with_success(self):
  207. """
  208. Test deleting a dataset with success.
  209. """
  210. # get the real name of the created dataset
  211. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  212. res = ragflow.create_dataset("kb0")
  213. real_dataset_name = res['data']['dataset_name']
  214. # delete this dataset
  215. result = ragflow.delete_dataset(real_dataset_name)
  216. assert result["success"] is True
  217. def test_delete_dataset_with_not_existing_dataset(self):
  218. """
  219. Test deleting a dataset that does not exist with failure.
  220. """
  221. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  222. res = ragflow.delete_dataset("weird_dataset")
  223. assert res["success"] is False
  224. def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
  225. """
  226. Test deleting a dataset when creating 100 datasets and deleting 100 datasets.
  227. """
  228. # create 100 datasets
  229. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  230. datasets_to_create = ["dataset1"] * 100
  231. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  232. real_name_to_create = set()
  233. for response in created_response:
  234. assert 'data' in response, "Response is missing 'data' key"
  235. dataset_name = response['data']['dataset_name']
  236. real_name_to_create.add(dataset_name)
  237. for name in real_name_to_create:
  238. res = ragflow.delete_dataset(name)
  239. assert res["success"] is True
  240. def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
  241. """
  242. Test deleting a dataset when its name has space in the middle.
  243. """
  244. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  245. res = ragflow.delete_dataset("k b")
  246. print(res)
  247. assert res["success"] is True
  248. def test_delete_dataset_with_space_in_the_head_of_the_name(self):
  249. """
  250. Test deleting a dataset when its name has space in the head.
  251. """
  252. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  253. res = ragflow.delete_dataset(" kb")
  254. assert res["success"] is False
  255. def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
  256. """
  257. Test deleting a dataset when its name has space in the tail.
  258. """
  259. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  260. res = ragflow.delete_dataset("kb ")
  261. assert res["success"] is False
  262. def test_delete_dataset_with_only_space_in_the_name(self):
  263. """
  264. Test deleting a dataset when its name only has space.
  265. """
  266. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  267. res = ragflow.delete_dataset(" ")
  268. assert res["success"] is False
  269. def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
  270. """
  271. Test deleting a dataset when its name only has space and the number of it exceeds the limit.
  272. """
  273. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  274. name = " " * (NAME_LENGTH_LIMIT + 1)
  275. res = ragflow.delete_dataset(name)
  276. assert res["success"] is False
  277. def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
  278. """
  279. Test deleting a dataset whose name has space in the head and tail,
  280. and the length of the name exceeds the limit.
  281. """
  282. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  283. name = " " + "k" * NAME_LENGTH_LIMIT + " "
  284. res = ragflow.delete_dataset(name)
  285. assert res["success"] is False
  286. # ---------------------------------mix the different methods--------------------
  287. def test_create_and_delete_dataset_together(self):
  288. """
  289. Test creating 1 dataset, and then deleting 1 dataset.
  290. Test creating 10 datasets, and then deleting 10 datasets.
  291. """
  292. # create 1 dataset
  293. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  294. res = ragflow.create_dataset("ddd")
  295. assert res['code'] == 0 and res['message'] == 'success'
  296. # delete 1 dataset
  297. res = ragflow.delete_dataset("ddd")
  298. assert res["success"] is True
  299. # create 10 datasets
  300. datasets_to_create = ["dataset1"] * 10
  301. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  302. real_name_to_create = set()
  303. for response in created_response:
  304. assert 'data' in response, "Response is missing 'data' key"
  305. dataset_name = response['data']['dataset_name']
  306. real_name_to_create.add(dataset_name)
  307. # delete 10 datasets
  308. for name in real_name_to_create:
  309. res = ragflow.delete_dataset(name)
  310. assert res["success"] is True