您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

test_document.py 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. from api.settings import RetCode
  2. from test_sdkbase import TestSdk
  3. from ragflow import RAGFlow
  4. import pytest
  5. from common import API_KEY, HOST_ADDRESS
  6. from api.contants import NAME_LENGTH_LIMIT
  7. class TestFile(TestSdk):
  8. """
  9. This class contains a suite of tests for the content management functionality within the dataset.
  10. It ensures that the following functionalities as expected:
  11. 1. upload local files
  12. 2. upload remote files
  13. 3. download a file
  14. 4. delete a file
  15. 5. enable rename
  16. 6. list files
  17. 7. start parsing
  18. 8. end parsing
  19. 9. check the status of the file
  20. 10. list the chunks
  21. 11. delete a chunk
  22. 12. insert a new chunk
  23. 13. edit the status of chunk
  24. 14. get the specific chunk
  25. 15. retrieval test
  26. """
  27. # ----------------------------upload local files-----------------------------------------------------
  28. def test_upload_two_files(self):
  29. """
  30. Test uploading two files with success.
  31. """
  32. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  33. created_res = ragflow.create_dataset("test_upload_two_files")
  34. dataset_id = created_res['data']['dataset_id']
  35. file_paths = ["test_data/test.txt", "test_data/test1.txt"]
  36. res = ragflow.upload_local_file(dataset_id, file_paths)
  37. assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
  38. def test_upload_one_file(self):
  39. """
  40. Test uploading one file with success.
  41. """
  42. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  43. created_res = ragflow.create_dataset("test_upload_one_file")
  44. dataset_id = created_res['data']['dataset_id']
  45. file_paths = ["test_data/test.txt"]
  46. res = ragflow.upload_local_file(dataset_id, file_paths)
  47. assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
  48. def test_upload_nonexistent_files(self):
  49. """
  50. Test uploading a file which does not exist.
  51. """
  52. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  53. created_res = ragflow.create_dataset("test_upload_nonexistent_files")
  54. dataset_id = created_res['data']['dataset_id']
  55. file_paths = ["test_data/imagination.txt"]
  56. res = ragflow.upload_local_file(dataset_id, file_paths)
  57. assert res['code'] == RetCode.DATA_ERROR and "does not exist" in res['message']
  58. def test_upload_file_if_dataset_does_not_exist(self):
  59. """
  60. Test uploading files if the dataset id does not exist.
  61. """
  62. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  63. file_paths = ["test_data/test.txt"]
  64. res = ragflow.upload_local_file("111", file_paths)
  65. assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset"
  66. def test_upload_file_without_name(self):
  67. """
  68. Test uploading files that do not have name.
  69. """
  70. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  71. created_res = ragflow.create_dataset("test_upload_file_without_name")
  72. dataset_id = created_res['data']['dataset_id']
  73. file_paths = ["test_data/.txt"]
  74. res = ragflow.upload_local_file(dataset_id, file_paths)
  75. assert res['code'] == RetCode.SUCCESS
  76. def test_upload_file_without_name1(self):
  77. """
  78. Test uploading files that do not have name.
  79. """
  80. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  81. created_res = ragflow.create_dataset("test_upload_file_without_name")
  82. dataset_id = created_res['data']['dataset_id']
  83. file_paths = ["test_data/.txt", "test_data/empty.txt"]
  84. res = ragflow.upload_local_file(dataset_id, file_paths)
  85. assert res['code'] == RetCode.SUCCESS
  86. def test_upload_files_exceeding_the_number_limit(self):
  87. """
  88. Test uploading files whose number exceeds the limit.
  89. """
  90. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  91. created_res = ragflow.create_dataset("test_upload_files_exceeding_the_number_limit")
  92. dataset_id = created_res['data']['dataset_id']
  93. file_paths = ["test_data/test.txt", "test_data/test1.txt"] * 256
  94. res = ragflow.upload_local_file(dataset_id, file_paths)
  95. assert (res['message'] ==
  96. 'You try to upload 512 files, which exceeds the maximum number of uploading files: 256'
  97. and res['code'] == RetCode.DATA_ERROR)
  98. def test_upload_files_without_files(self):
  99. """
  100. Test uploading files without files.
  101. """
  102. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  103. created_res = ragflow.create_dataset("test_upload_files_without_files")
  104. dataset_id = created_res['data']['dataset_id']
  105. file_paths = [None]
  106. res = ragflow.upload_local_file(dataset_id, file_paths)
  107. assert (res['message'] == 'None is not string.' and res['code'] == RetCode.ARGUMENT_ERROR)
  108. def test_upload_files_with_two_files_with_same_name(self):
  109. """
  110. Test uploading files with the same name.
  111. """
  112. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  113. created_res = ragflow.create_dataset("test_upload_files_with_two_files_with_same_name")
  114. dataset_id = created_res['data']['dataset_id']
  115. file_paths = ['test_data/test.txt'] * 2
  116. res = ragflow.upload_local_file(dataset_id, file_paths)
  117. assert (res['message'] == 'success' and res['code'] == RetCode.SUCCESS)
  118. def test_upload_files_with_file_paths(self):
  119. """
  120. Test uploading files with only specifying the file path's repo.
  121. """
  122. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  123. created_res = ragflow.create_dataset("test_upload_files_with_file_paths")
  124. dataset_id = created_res['data']['dataset_id']
  125. file_paths = ['test_data/']
  126. res = ragflow.upload_local_file(dataset_id, file_paths)
  127. assert (res['message'] == 'The file test_data/ does not exist' and res['code'] == RetCode.DATA_ERROR)
  128. def test_upload_files_with_remote_file_path(self):
  129. """
  130. Test uploading files with remote files.
  131. """
  132. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  133. created_res = ragflow.create_dataset("test_upload_files_with_remote_file_path")
  134. dataset_id = created_res['data']['dataset_id']
  135. file_paths = ['https://github.com/genostack/ragflow']
  136. res = ragflow.upload_local_file(dataset_id, file_paths)
  137. assert res['code'] == RetCode.ARGUMENT_ERROR and res['message'] == 'Remote files have not unsupported.'
  138. # ----------------------------delete a file-----------------------------------------------------
  139. def test_delete_one_file(self):
  140. """
  141. Test deleting one file with success.
  142. """
  143. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  144. created_res = ragflow.create_dataset("test_delete_one_file")
  145. dataset_id = created_res['data']['dataset_id']
  146. file_paths = ["test_data/test.txt"]
  147. res = ragflow.upload_local_file(dataset_id, file_paths)
  148. # get the doc_id
  149. data = res['data'][0]
  150. doc_id = data['id']
  151. # delete the files
  152. deleted_res = ragflow.delete_files(doc_id, dataset_id)
  153. # assert value
  154. assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
  155. def test_delete_document_with_not_existing_document(self):
  156. """
  157. Test deleting a document that does not exist with failure.
  158. """
  159. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  160. created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
  161. dataset_id = created_res['data']['dataset_id']
  162. res = ragflow.delete_files("111", dataset_id)
  163. assert res['code'] == RetCode.DATA_ERROR and res['message'] == 'Document 111 not found!'
  164. def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
  165. """
  166. Test deleting documents when uploading 100 docs and deleting 100 docs.
  167. """
  168. # upload 100 docs
  169. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  170. created_res = ragflow.create_dataset("test_delete_one_file")
  171. dataset_id = created_res['data']['dataset_id']
  172. file_paths = ["test_data/test.txt"] * 100
  173. res = ragflow.upload_local_file(dataset_id, file_paths)
  174. # get the doc_id
  175. data = res['data']
  176. for d in data:
  177. doc_id = d['id']
  178. # delete the files
  179. deleted_res = ragflow.delete_files(doc_id, dataset_id)
  180. # assert value
  181. assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
  182. def test_delete_document_from_nonexistent_dataset(self):
  183. """
  184. Test deleting documents from a non-existent dataset
  185. """
  186. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  187. created_res = ragflow.create_dataset("test_delete_one_file")
  188. dataset_id = created_res['data']['dataset_id']
  189. file_paths = ["test_data/test.txt"]
  190. res = ragflow.upload_local_file(dataset_id, file_paths)
  191. # get the doc_id
  192. data = res['data'][0]
  193. doc_id = data['id']
  194. # delete the files
  195. deleted_res = ragflow.delete_files(doc_id, "000")
  196. # assert value
  197. assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
  198. f'The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.')
  199. def test_delete_document_which_is_located_in_other_dataset(self):
  200. """
  201. Test deleting a document which is located in other dataset.
  202. """
  203. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  204. # upload a document
  205. created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
  206. created_res_id = created_res['data']['dataset_id']
  207. file_paths = ["test_data/test.txt"]
  208. res = ragflow.upload_local_file(created_res_id, file_paths)
  209. # other dataset
  210. other_res = ragflow.create_dataset("other_dataset")
  211. other_dataset_id = other_res['data']['dataset_id']
  212. # get the doc_id
  213. data = res['data'][0]
  214. doc_id = data['id']
  215. # delete the files from the other dataset
  216. deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
  217. # assert value
  218. assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
  219. f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
  220. # ----------------------------list files-----------------------------------------------------
  221. def test_list_documents_with_success(self):
  222. """
  223. Test listing documents with a successful outcome.
  224. """
  225. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  226. # upload a document
  227. created_res = ragflow.create_dataset("test_list_documents_with_success")
  228. created_res_id = created_res['data']['dataset_id']
  229. file_paths = ["test_data/test.txt"]
  230. ragflow.upload_local_file(created_res_id, file_paths)
  231. # Call the list_document method
  232. response = ragflow.list_files(created_res_id)
  233. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
  234. def test_list_documents_with_checking_size(self):
  235. """
  236. Test listing documents and verify the size and names of the documents.
  237. """
  238. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  239. # upload 10 documents
  240. created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
  241. created_res_id = created_res['data']['dataset_id']
  242. file_paths = ["test_data/test.txt"] * 10
  243. ragflow.upload_local_file(created_res_id, file_paths)
  244. # Call the list_document method
  245. response = ragflow.list_files(created_res_id)
  246. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
  247. def test_list_documents_with_getting_empty_result(self):
  248. """
  249. Test listing documents that should be empty.
  250. """
  251. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  252. # upload 0 documents
  253. created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
  254. created_res_id = created_res['data']['dataset_id']
  255. # Call the list_document method
  256. response = ragflow.list_files(created_res_id)
  257. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0
  258. def test_list_documents_with_creating_100_documents(self):
  259. """
  260. Test listing 100 documents and verify the size of these documents.
  261. """
  262. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  263. # upload 100 documents
  264. created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
  265. created_res_id = created_res['data']['dataset_id']
  266. file_paths = ["test_data/test.txt"] * 100
  267. ragflow.upload_local_file(created_res_id, file_paths)
  268. # Call the list_document method
  269. response = ragflow.list_files(created_res_id)
  270. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100
  271. def test_list_document_with_failure(self):
  272. """
  273. Test listing documents with IndexError.
  274. """
  275. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  276. created_res = ragflow.create_dataset("test_list_document_with_failure")
  277. created_res_id = created_res['data']['dataset_id']
  278. response = ragflow.list_files(created_res_id, offset=-1, count=-1)
  279. assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
  280. def test_list_document_with_verifying_offset_and_count(self):
  281. """
  282. Test listing documents with verifying the functionalities of offset and count.
  283. """
  284. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  285. created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
  286. created_res_id = created_res['data']['dataset_id']
  287. file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
  288. ragflow.upload_local_file(created_res_id, file_paths)
  289. # Call the list_document method
  290. response = ragflow.list_files(created_res_id, offset=2, count=10)
  291. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
  292. def test_list_document_with_verifying_keywords(self):
  293. """
  294. Test listing documents with verifying the functionality of searching keywords.
  295. """
  296. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  297. created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
  298. created_res_id = created_res['data']['dataset_id']
  299. file_paths = ["test_data/test.txt", "test_data/empty.txt"]
  300. ragflow.upload_local_file(created_res_id, file_paths)
  301. # Call the list_document method
  302. response = ragflow.list_files(created_res_id, keywords="empty")
  303. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
  304. def test_list_document_with_verifying_order_by_and_descend(self):
  305. """
  306. Test listing documents with verifying the functionality of order_by and descend.
  307. """
  308. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  309. created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
  310. created_res_id = created_res['data']['dataset_id']
  311. file_paths = ["test_data/test.txt", "test_data/empty.txt"]
  312. ragflow.upload_local_file(created_res_id, file_paths)
  313. # Call the list_document method
  314. response = ragflow.list_files(created_res_id)
  315. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2
  316. docs = response['data']['docs']
  317. # reverse
  318. i = 1
  319. for doc in docs:
  320. assert doc['name'] in file_paths[i]
  321. i -= 1
  322. def test_list_document_with_verifying_order_by_and_ascend(self):
  323. """
  324. Test listing documents with verifying the functionality of order_by and ascend.
  325. """
  326. ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
  327. created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
  328. created_res_id = created_res['data']['dataset_id']
  329. file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
  330. ragflow.upload_local_file(created_res_id, file_paths)
  331. # Call the list_document method
  332. response = ragflow.list_files(created_res_id, descend=False)
  333. assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3
  334. docs = response['data']['docs']
  335. i = 0
  336. for doc in docs:
  337. assert doc['name'] in file_paths[i]
  338. i += 1
  339. # TODO: have to set the limitation of the number of documents
  340. # ----------------------------download a file-----------------------------------------------------
  341. # ----------------------------enable rename-----------------------------------------------------
  342. # ----------------------------start parsing-----------------------------------------------------
  343. # ----------------------------stop parsing-----------------------------------------------------
  344. # ----------------------------show the status of the file-----------------------------------------------------
  345. # ----------------------------list the chunks of the file-----------------------------------------------------
  346. # ----------------------------delete the chunk-----------------------------------------------------
  347. # ----------------------------edit the status of the chunk-----------------------------------------------------
  348. # ----------------------------insert a new chunk-----------------------------------------------------
  349. # ----------------------------get a specific chunk-----------------------------------------------------
  350. # ----------------------------retrieval test-----------------------------------------------------