您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

test_dataset.py 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. from api.settings import RetCode
  2. from test_sdkbase import TestSdk
  3. from ragflow import RAGFlow
  4. import pytest
  5. from common import API_KEY, HOST_ADDRESS
  6. from api.contants import NAME_LENGTH_LIMIT
  7. class TestDataset(TestSdk):
  8. """
  9. This class contains a suite of tests for the dataset management functionality within the RAGFlow system.
  10. It ensures that the following functionalities as expected:
  11. 1. create a kb
  12. 2. list the kb
  13. 3. get the detail info according to the kb id
  14. 4. update the kb
  15. 5. delete the kb
  16. """
  17. def setup_method(self):
  18. """
  19. Delete all the datasets.
  20. """
  21. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  22. listed_data = ragflow.list_dataset()
  23. listed_data = listed_data['data']
  24. listed_names = {d['name'] for d in listed_data}
  25. for name in listed_names:
  26. ragflow.delete_dataset(name)
  27. # -----------------------create_dataset---------------------------------
  28. def test_create_dataset_with_success(self):
  29. """
  30. Test the creation of a new dataset with success.
  31. """
  32. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  33. # create a kb
  34. res = ragflow.create_dataset("kb1")
  35. assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
  36. def test_create_dataset_with_empty_name(self):
  37. """
  38. Test the creation of a new dataset with an empty name.
  39. """
  40. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  41. res = ragflow.create_dataset("")
  42. assert res['message'] == 'Empty dataset name' and res['code'] == RetCode.DATA_ERROR
  43. def test_create_dataset_with_name_exceeding_limit(self):
  44. """
  45. Test the creation of a new dataset with the length of name exceeding the limit.
  46. """
  47. name = "k" * NAME_LENGTH_LIMIT + "b"
  48. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  49. res = ragflow.create_dataset(name)
  50. assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
  51. and res['code'] == RetCode.DATA_ERROR)
  52. def test_create_dataset_name_with_space_in_the_middle(self):
  53. """
  54. Test the creation of a new dataset whose name has space in the middle.
  55. """
  56. name = "k b"
  57. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  58. res = ragflow.create_dataset(name)
  59. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  60. def test_create_dataset_name_with_space_in_the_head(self):
  61. """
  62. Test the creation of a new dataset whose name has space in the head.
  63. """
  64. name = " kb"
  65. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  66. res = ragflow.create_dataset(name)
  67. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  68. def test_create_dataset_name_with_space_in_the_tail(self):
  69. """
  70. Test the creation of a new dataset whose name has space in the tail.
  71. """
  72. name = "kb "
  73. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  74. res = ragflow.create_dataset(name)
  75. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  76. def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
  77. """
  78. Test the creation of a new dataset whose name has space in the head and tail,
  79. and the length of the name exceeds the limit.
  80. """
  81. name = " " + "k" * NAME_LENGTH_LIMIT + " "
  82. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  83. res = ragflow.create_dataset(name)
  84. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  85. def test_create_dataset_with_two_same_name(self):
  86. """
  87. Test the creation of two new datasets with the same name.
  88. """
  89. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  90. res = ragflow.create_dataset("kb")
  91. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  92. res = ragflow.create_dataset("kb")
  93. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  94. def test_create_dataset_with_only_space_in_the_name(self):
  95. """
  96. Test the creation of a dataset whose name only has space.
  97. """
  98. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  99. res = ragflow.create_dataset(" ")
  100. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  101. def test_create_dataset_with_space_number_exceeding_limit(self):
  102. """
  103. Test the creation of a dataset with a name that only has space exceeds the allowed limit.
  104. """
  105. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  106. name = " " * NAME_LENGTH_LIMIT
  107. res = ragflow.create_dataset(name)
  108. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  109. def test_create_dataset_with_name_having_return(self):
  110. """
  111. Test the creation of a dataset with a name that has return symbol.
  112. """
  113. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  114. name = "kb\n"
  115. res = ragflow.create_dataset(name)
  116. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  117. def test_create_dataset_with_name_having_the_null_character(self):
  118. """
  119. Test the creation of a dataset with a name that has the null character.
  120. """
  121. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  122. name = "kb\0"
  123. res = ragflow.create_dataset(name)
  124. assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
  125. # -----------------------list_dataset---------------------------------
  126. def test_list_dataset_success(self):
  127. """
  128. Test listing datasets with a successful outcome.
  129. """
  130. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  131. # Call the list_datasets method
  132. response = ragflow.list_dataset()
  133. assert response['code'] == RetCode.SUCCESS
  134. def test_list_dataset_with_checking_size_and_name(self):
  135. """
  136. Test listing datasets and verify the size and names of the datasets.
  137. """
  138. datasets_to_create = ["dataset1", "dataset2", "dataset3"]
  139. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  140. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  141. real_name_to_create = set()
  142. for response in created_response:
  143. assert 'data' in response, "Response is missing 'data' key"
  144. dataset_name = response['data']['dataset_name']
  145. real_name_to_create.add(dataset_name)
  146. response = ragflow.list_dataset(0, 3)
  147. listed_data = response['data']
  148. listed_names = {d['name'] for d in listed_data}
  149. assert listed_names == real_name_to_create
  150. assert response['code'] == RetCode.SUCCESS
  151. assert len(listed_data) == len(datasets_to_create)
  152. def test_list_dataset_with_getting_empty_result(self):
  153. """
  154. Test listing datasets that should be empty.
  155. """
  156. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  157. datasets_to_create = []
  158. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  159. real_name_to_create = set()
  160. for response in created_response:
  161. assert 'data' in response, "Response is missing 'data' key"
  162. dataset_name = response['data']['dataset_name']
  163. real_name_to_create.add(dataset_name)
  164. response = ragflow.list_dataset(0, 0)
  165. listed_data = response['data']
  166. listed_names = {d['name'] for d in listed_data}
  167. assert listed_names == real_name_to_create
  168. assert response['code'] == RetCode.SUCCESS
  169. assert len(listed_data) == 0
  170. def test_list_dataset_with_creating_100_knowledge_bases(self):
  171. """
  172. Test listing 100 datasets and verify the size and names of these datasets.
  173. """
  174. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  175. datasets_to_create = ["dataset1"] * 100
  176. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  177. real_name_to_create = set()
  178. for response in created_response:
  179. assert 'data' in response, "Response is missing 'data' key"
  180. dataset_name = response['data']['dataset_name']
  181. real_name_to_create.add(dataset_name)
  182. res = ragflow.list_dataset(0, 100)
  183. listed_data = res['data']
  184. listed_names = {d['name'] for d in listed_data}
  185. assert listed_names == real_name_to_create
  186. assert res['code'] == RetCode.SUCCESS
  187. assert len(listed_data) == 100
  188. def test_list_dataset_with_showing_one_dataset(self):
  189. """
  190. Test listing one dataset and verify the size of the dataset.
  191. """
  192. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  193. response = ragflow.list_dataset(0, 1)
  194. datasets = response['data']
  195. assert len(datasets) == 1 and response['code'] == RetCode.SUCCESS
  196. def test_list_dataset_failure(self):
  197. """
  198. Test listing datasets with IndexError.
  199. """
  200. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  201. response = ragflow.list_dataset(-1, -1)
  202. assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
  203. def test_list_dataset_for_empty_datasets(self):
  204. """
  205. Test listing datasets when the datasets are empty.
  206. """
  207. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  208. response = ragflow.list_dataset()
  209. datasets = response['data']
  210. assert len(datasets) == 0 and response['code'] == RetCode.SUCCESS
  211. # TODO: have to set the limitation of the number of datasets
  212. # -----------------------delete_dataset---------------------------------
  213. def test_delete_one_dataset_with_success(self):
  214. """
  215. Test deleting a dataset with success.
  216. """
  217. # get the real name of the created dataset
  218. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  219. res = ragflow.create_dataset("kb0")
  220. real_dataset_name = res['data']['dataset_name']
  221. # delete this dataset
  222. res = ragflow.delete_dataset(real_dataset_name)
  223. assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
  224. def test_delete_dataset_with_not_existing_dataset(self):
  225. """
  226. Test deleting a dataset that does not exist with failure.
  227. """
  228. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  229. res = ragflow.delete_dataset("weird_dataset")
  230. assert res['code'] == RetCode.OPERATING_ERROR and res['message'] == 'The dataset cannot be found for your current account.'
  231. def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
  232. """
  233. Test deleting a dataset when creating 100 datasets and deleting 100 datasets.
  234. """
  235. # create 100 datasets
  236. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  237. datasets_to_create = ["dataset1"] * 100
  238. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  239. real_name_to_create = set()
  240. for response in created_response:
  241. assert 'data' in response, "Response is missing 'data' key"
  242. dataset_name = response['data']['dataset_name']
  243. real_name_to_create.add(dataset_name)
  244. for name in real_name_to_create:
  245. res = ragflow.delete_dataset(name)
  246. assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
  247. def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
  248. """
  249. Test deleting a dataset when its name has space in the middle.
  250. """
  251. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  252. ragflow.create_dataset("k b")
  253. res = ragflow.delete_dataset("k b")
  254. assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
  255. def test_delete_dataset_with_space_in_the_head_of_the_name(self):
  256. """
  257. Test deleting a dataset when its name has space in the head.
  258. """
  259. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  260. ragflow.create_dataset(" kb")
  261. res = ragflow.delete_dataset(" kb")
  262. assert (res['code'] == RetCode.OPERATING_ERROR
  263. and res['message'] == 'The dataset cannot be found for your current account.')
  264. def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
  265. """
  266. Test deleting a dataset when its name has space in the tail.
  267. """
  268. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  269. ragflow.create_dataset("kb ")
  270. res = ragflow.delete_dataset("kb ")
  271. assert (res['code'] == RetCode.OPERATING_ERROR
  272. and res['message'] == 'The dataset cannot be found for your current account.')
  273. def test_delete_dataset_with_only_space_in_the_name(self):
  274. """
  275. Test deleting a dataset when its name only has space.
  276. """
  277. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  278. ragflow.create_dataset(" ")
  279. res = ragflow.delete_dataset(" ")
  280. assert (res['code'] == RetCode.OPERATING_ERROR
  281. and res['message'] == 'The dataset cannot be found for your current account.')
  282. def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
  283. """
  284. Test deleting a dataset when its name only has space and the number of it exceeds the limit.
  285. """
  286. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  287. name = " " * (NAME_LENGTH_LIMIT + 1)
  288. ragflow.create_dataset(name)
  289. res = ragflow.delete_dataset(name)
  290. assert (res['code'] == RetCode.OPERATING_ERROR
  291. and res['message'] == 'The dataset cannot be found for your current account.')
  292. def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
  293. """
  294. Test deleting a dataset whose name has space in the head and tail,
  295. and the length of the name exceeds the limit.
  296. """
  297. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  298. name = " " + "k" * NAME_LENGTH_LIMIT + " "
  299. ragflow.create_dataset(name)
  300. res = ragflow.delete_dataset(name)
  301. assert (res['code'] == RetCode.OPERATING_ERROR
  302. and res['message'] == 'The dataset cannot be found for your current account.')
  303. # ---------------------------------get_dataset-----------------------------------------
  304. def test_get_dataset_with_success(self):
  305. """
  306. Test getting a dataset which exists.
  307. """
  308. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  309. response = ragflow.create_dataset("test")
  310. dataset_name = response['data']['dataset_name']
  311. res = ragflow.get_dataset(dataset_name)
  312. assert res['code'] == RetCode.SUCCESS and res['data']['name'] == dataset_name
  313. def test_get_dataset_with_failure(self):
  314. """
  315. Test getting a dataset which does not exist.
  316. """
  317. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  318. res = ragflow.get_dataset("weird_dataset")
  319. assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset!"
  320. # ---------------------------------update a dataset-----------------------------------
  321. def test_update_dataset_without_existing_dataset(self):
  322. """
  323. Test updating a dataset which does not exist.
  324. """
  325. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  326. params = {
  327. 'name': 'new_name3',
  328. 'description': 'new_description',
  329. "permission": 'me',
  330. "parser_id": 'naive',
  331. "language": 'English'
  332. }
  333. res = ragflow.update_dataset("weird_dataset", **params)
  334. assert (res['code'] == RetCode.OPERATING_ERROR
  335. and res['message'] == 'Only the owner of knowledgebase is authorized for this operation!')
  336. def test_update_dataset_with_updating_six_parameters(self):
  337. """
  338. Test updating a dataset when updating six parameters.
  339. """
  340. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  341. ragflow.create_dataset("new_name1")
  342. params = {
  343. 'name': 'new_name',
  344. 'description': 'new_description1',
  345. "permission": 'me',
  346. "parser_id": 'naive',
  347. "language": 'English'
  348. }
  349. res = ragflow.update_dataset("new_name1", **params)
  350. assert res['code'] == RetCode.SUCCESS
  351. assert (res['data']['description'] == 'new_description1'
  352. and res['data']['name'] == 'new_name' and res['data']['permission'] == 'me'
  353. and res['data']['language'] == 'English' and res['data']['parser_id'] == 'naive')
  354. def test_update_dataset_with_updating_two_parameters(self):
  355. """
  356. Test updating a dataset when updating two parameters.
  357. """
  358. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  359. ragflow.create_dataset("new_name2")
  360. params = {
  361. "name": "new_name3",
  362. "language": 'English'
  363. }
  364. res = ragflow.update_dataset("new_name2", **params)
  365. assert (res['code'] == RetCode.SUCCESS and res['data']['name'] == "new_name3"
  366. and res['data']['language'] == 'English')
  367. def test_update_dataset_with_updating_layout_recognize(self):
  368. """Test updating a dataset with only updating the layout_recognize"""
  369. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  370. ragflow.create_dataset("test_update_dataset_with_updating_layout_recognize")
  371. params = {
  372. "layout_recognize": False
  373. }
  374. res = ragflow.update_dataset("test_update_dataset_with_updating_layout_recognize", **params)
  375. assert res['code'] == RetCode.SUCCESS and res['data']['parser_config']['layout_recognize'] is False
  376. def test_update_dataset_with_empty_parameter(self):
  377. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  378. ragflow.create_dataset("test_update_dataset_with_empty_parameter")
  379. params = {}
  380. res = ragflow.update_dataset("test_update_dataset_with_empty_parameter", **params)
  381. assert (res['code'] == RetCode.DATA_ERROR
  382. and res['message'] == 'Please input at least one parameter that you want to update!')
  383. # ---------------------------------mix the different methods--------------------------
  384. def test_create_and_delete_dataset_together(self):
  385. """
  386. Test creating 1 dataset, and then deleting 1 dataset.
  387. Test creating 10 datasets, and then deleting 10 datasets.
  388. """
  389. # create 1 dataset
  390. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  391. res = ragflow.create_dataset("ddd")
  392. assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
  393. # delete 1 dataset
  394. res = ragflow.delete_dataset("ddd")
  395. assert res["code"] == RetCode.SUCCESS
  396. # create 10 datasets
  397. datasets_to_create = ["dataset1"] * 10
  398. created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
  399. real_name_to_create = set()
  400. for response in created_response:
  401. assert 'data' in response, "Response is missing 'data' key"
  402. dataset_name = response['data']['dataset_name']
  403. real_name_to_create.add(dataset_name)
  404. # delete 10 datasets
  405. for name in real_name_to_create:
  406. res = ragflow.delete_dataset(name)
  407. assert res["code"] == RetCode.SUCCESS