您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from ragflow_sdk import RAGFlow
  17. from common import HOST_ADDRESS
  18. import pytest
  19. def test_upload_document_with_success(get_api_key_fixture):
  20. API_KEY = get_api_key_fixture
  21. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  22. ds = rag.create_dataset(name="test_upload_document")
  23. blob = b"Sample document content for test."
  24. with open("test_data/ragflow.txt", "rb") as file:
  25. blob_2 = file.read()
  26. document_infos = []
  27. document_infos.append({"display_name": "test_1.txt", "blob": blob})
  28. document_infos.append({"display_name": "test_2.txt", "blob": blob_2})
  29. ds.upload_documents(document_infos)
  30. def test_update_document_with_success(get_api_key_fixture):
  31. API_KEY = get_api_key_fixture
  32. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  33. ds = rag.create_dataset(name="test_update_document")
  34. blob = b"Sample document content for test."
  35. document_infos = [{"display_name": "test.txt", "blob": blob}]
  36. docs = ds.upload_documents(document_infos)
  37. doc = docs[0]
  38. doc.update({"chunk_method": "manual", "name": "manual.txt"})
  39. def test_download_document_with_success(get_api_key_fixture):
  40. API_KEY = get_api_key_fixture
  41. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  42. ds = rag.create_dataset(name="test_download_document")
  43. blob = b"Sample document content for test."
  44. document_infos = [{"display_name": "test_1.txt", "blob": blob}]
  45. docs = ds.upload_documents(document_infos)
  46. doc = docs[0]
  47. with open("test_download.txt", "wb+") as file:
  48. file.write(doc.download())
  49. def test_list_documents_in_dataset_with_success(get_api_key_fixture):
  50. API_KEY = get_api_key_fixture
  51. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  52. ds = rag.create_dataset(name="test_list_documents")
  53. blob = b"Sample document content for test."
  54. document_infos = [{"display_name": "test.txt", "blob": blob}]
  55. ds.upload_documents(document_infos)
  56. ds.list_documents(keywords="test", page=1, page_size=12)
  57. def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
  58. API_KEY = get_api_key_fixture
  59. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  60. ds = rag.create_dataset(name="test_delete_documents")
  61. name = "test_delete_documents.txt"
  62. blob = b"Sample document content for test."
  63. document_infos = [{"display_name": name, "blob": blob}]
  64. docs = ds.upload_documents(document_infos)
  65. ds.delete_documents([docs[0].id])
  66. # upload and parse the document with different in different parse method.
  67. def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture):
  68. API_KEY = get_api_key_fixture
  69. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  70. ds = rag.create_dataset(name="test_pdf_document")
  71. with open("test_data/test.pdf", "rb") as file:
  72. blob = file.read()
  73. document_infos = [{"display_name": "test.pdf", "blob": blob}]
  74. docs = ds.upload_documents(document_infos)
  75. doc = docs[0]
  76. ds.async_parse_documents([doc.id])
  77. def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture):
  78. API_KEY = get_api_key_fixture
  79. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  80. ds = rag.create_dataset(name="test_docx_document")
  81. with open("test_data/test.docx", "rb") as file:
  82. blob = file.read()
  83. document_infos = [{"display_name": "test.docx", "blob": blob}]
  84. docs = ds.upload_documents(document_infos)
  85. doc = docs[0]
  86. ds.async_parse_documents([doc.id])
  87. def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture):
  88. API_KEY = get_api_key_fixture
  89. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  90. ds = rag.create_dataset(name="test_excel_document")
  91. with open("test_data/test.xlsx", "rb") as file:
  92. blob = file.read()
  93. document_infos = [{"display_name": "test.xlsx", "blob": blob}]
  94. docs = ds.upload_documents(document_infos)
  95. doc = docs[0]
  96. ds.async_parse_documents([doc.id])
  97. def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture):
  98. API_KEY = get_api_key_fixture
  99. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  100. ds = rag.create_dataset(name="test_ppt_document")
  101. with open("test_data/test.ppt", "rb") as file:
  102. blob = file.read()
  103. document_infos = [{"display_name": "test.ppt", "blob": blob}]
  104. docs = ds.upload_documents(document_infos)
  105. doc = docs[0]
  106. ds.async_parse_documents([doc.id])
  107. def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture):
  108. API_KEY = get_api_key_fixture
  109. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  110. ds = rag.create_dataset(name="test_image_document")
  111. with open("test_data/test.jpg", "rb") as file:
  112. blob = file.read()
  113. document_infos = [{"display_name": "test.jpg", "blob": blob}]
  114. docs = ds.upload_documents(document_infos)
  115. doc = docs[0]
  116. ds.async_parse_documents([doc.id])
  117. def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture):
  118. API_KEY = get_api_key_fixture
  119. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  120. ds = rag.create_dataset(name="test_txt_document")
  121. with open("test_data/test.txt", "rb") as file:
  122. blob = file.read()
  123. document_infos = [{"display_name": "test.txt", "blob": blob}]
  124. docs = ds.upload_documents(document_infos)
  125. doc = docs[0]
  126. ds.async_parse_documents([doc.id])
  127. def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture):
  128. API_KEY = get_api_key_fixture
  129. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  130. ds = rag.create_dataset(name="test_md_document")
  131. with open("test_data/test.md", "rb") as file:
  132. blob = file.read()
  133. document_infos = [{"display_name": "test.md", "blob": blob}]
  134. docs = ds.upload_documents(document_infos)
  135. doc = docs[0]
  136. ds.async_parse_documents([doc.id])
  137. def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture):
  138. API_KEY = get_api_key_fixture
  139. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  140. ds = rag.create_dataset(name="test_json_document")
  141. with open("test_data/test.json", "rb") as file:
  142. blob = file.read()
  143. document_infos = [{"display_name": "test.json", "blob": blob}]
  144. docs = ds.upload_documents(document_infos)
  145. doc = docs[0]
  146. ds.async_parse_documents([doc.id])
  147. @pytest.mark.skip(reason="")
  148. def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture):
  149. API_KEY = get_api_key_fixture
  150. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  151. ds = rag.create_dataset(name="test_eml_document")
  152. with open("test_data/test.eml", "rb") as file:
  153. blob = file.read()
  154. document_infos = [{"display_name": "test.eml", "blob": blob}]
  155. docs = ds.upload_documents(document_infos)
  156. doc = docs[0]
  157. ds.async_parse_documents([doc.id])
  158. def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture):
  159. API_KEY = get_api_key_fixture
  160. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  161. ds = rag.create_dataset(name="test_html_document")
  162. with open("test_data/test.html", "rb") as file:
  163. blob = file.read()
  164. document_infos = [{"display_name": "test.html", "blob": blob}]
  165. docs = ds.upload_documents(document_infos)
  166. doc = docs[0]
  167. ds.async_parse_documents([doc.id])