您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. import os
  2. from ragflow_sdk import RAGFlow, DataSet, Document, Chunk
  3. HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
  4. def test_upload_document_with_success(get_api_key_fixture):
  5. """
  6. Test ingesting a document into a dataset with success.
  7. """
  8. # Initialize RAGFlow instance
  9. API_KEY = get_api_key_fixture
  10. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  11. # Step 1: Create a new dataset
  12. ds = rag.create_dataset(name="God")
  13. # Ensure dataset creation was successful
  14. assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
  15. assert ds.name == "God", "Dataset name does not match."
  16. # Step 2: Create a new document
  17. # The blob is the actual file content or a placeholder in this case
  18. blob = b"Sample document content for ingestion test."
  19. blob_2 = b"test_2."
  20. list_1 = []
  21. list_1.append({"name": "Test_1.txt",
  22. "blob": blob})
  23. list_1.append({"name": "Test_2.txt",
  24. "blob": blob_2})
  25. res = ds.upload_documents(list_1)
  26. # Ensure document ingestion was successful
  27. assert res is None, f"Failed to create document, error: {res}"
  28. def test_update_document_with_success(get_api_key_fixture):
  29. """
  30. Test updating a document with success.
  31. Update name or chunk_method are supported
  32. """
  33. API_KEY = get_api_key_fixture
  34. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  35. ds = rag.list_datasets(name="God")
  36. ds = ds[0]
  37. doc = ds.list_documents()
  38. doc = doc[0]
  39. if isinstance(doc, Document):
  40. res = doc.update({"chunk_method": "manual", "name": "manual.txt"})
  41. assert res is None, f"Failed to update document, error: {res}"
  42. else:
  43. assert False, f"Failed to get document, error: {doc}"
  44. def test_download_document_with_success(get_api_key_fixture):
  45. """
  46. Test downloading a document with success.
  47. """
  48. API_KEY = get_api_key_fixture
  49. # Initialize RAGFlow instance
  50. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  51. # Retrieve a document
  52. ds = rag.list_datasets(name="God")
  53. ds = ds[0]
  54. doc = ds.list_documents(name="manual.txt")
  55. doc = doc[0]
  56. # Check if the retrieved document is of type Document
  57. if isinstance(doc, Document):
  58. # Download the document content and save it to a file
  59. with open("ragflow.txt", "wb+") as file:
  60. file.write(doc.download())
  61. # Print the document object for debugging
  62. print(doc)
  63. # Assert that the download was successful
  64. assert True, f"Failed to download document, error: {doc}"
  65. else:
  66. # If the document retrieval fails, assert failure
  67. assert False, f"Failed to get document, error: {doc}"
  68. def test_list_documents_in_dataset_with_success(get_api_key_fixture):
  69. """
  70. Test list all documents into a dataset with success.
  71. """
  72. API_KEY = get_api_key_fixture
  73. # Initialize RAGFlow instance
  74. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  75. # Step 1: Create a new dataset
  76. ds = rag.create_dataset(name="God2")
  77. # Ensure dataset creation was successful
  78. assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
  79. assert ds.name == "God2", "Dataset name does not match."
  80. # Step 2: Create a new document
  81. # The blob is the actual file content or a placeholder in this case
  82. name1 = "Test Document111.txt"
  83. blob1 = b"Sample document content for ingestion test111."
  84. name2 = "Test Document222.txt"
  85. blob2 = b"Sample document content for ingestion test222."
  86. list_1 = [{"name": name1, "blob": blob1}, {"name": name2, "blob": blob2}]
  87. ds.upload_documents(list_1)
  88. for d in ds.list_documents(keywords="test", offset=0, limit=12):
  89. assert isinstance(d, Document), "Failed to upload documents"
  90. def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
  91. """
  92. Test list all documents into a dataset with success.
  93. """
  94. API_KEY = get_api_key_fixture
  95. # Initialize RAGFlow instance
  96. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  97. # Step 1: Create a new dataset
  98. ds = rag.create_dataset(name="God3")
  99. # Ensure dataset creation was successful
  100. assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
  101. assert ds.name == "God3", "Dataset name does not match."
  102. # Step 2: Create a new document
  103. # The blob is the actual file content or a placeholder in this case
  104. name1 = "Test Document333.txt"
  105. blob1 = b"Sample document content for ingestion test333."
  106. name2 = "Test Document444.txt"
  107. blob2 = b"Sample document content for ingestion test444."
  108. ds.upload_documents([{"name": name1, "blob": blob1}, {"name": name2, "blob": blob2}])
  109. for d in ds.list_documents(keywords="document", offset=0, limit=12):
  110. assert isinstance(d, Document)
  111. ds.delete_documents([d.id])
  112. remaining_docs = ds.list_documents(keywords="rag", offset=0, limit=12)
  113. assert len(remaining_docs) == 0, "Documents were not properly deleted."
  114. def test_parse_and_cancel_document(get_api_key_fixture):
  115. API_KEY = get_api_key_fixture
  116. # Initialize RAGFlow with API key and host address
  117. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  118. # Create a dataset with a specific name
  119. ds = rag.create_dataset(name="God4")
  120. # Define the document name and path
  121. name3 = 'westworld.pdf'
  122. path = 'test_data/westworld.pdf'
  123. # Create a document in the dataset using the file path
  124. ds.upload_documents({"name": name3, "blob": open(path, "rb").read()})
  125. # Retrieve the document by name
  126. doc = rag.list_documents(name="westworld.pdf")
  127. doc = doc[0]
  128. ds.async_parse_documents(document_ids=[])
  129. # Print message to confirm asynchronous parsing has been initiated
  130. print("Async parsing initiated")
  131. # Use join to wait for parsing to complete and get progress updates
  132. for progress, msg in doc.join(interval=5, timeout=10):
  133. print(progress, msg)
  134. # Assert that the progress is within the valid range (0 to 100)
  135. assert 0 <= progress <= 100, f"Invalid progress: {progress}"
  136. # Assert that the message is not empty
  137. assert msg, "Message should not be empty"
  138. # Test cancelling the parsing operation
  139. doc.cancel()
  140. # Print message to confirm parsing has been cancelled successfully
  141. print("Parsing cancelled successfully")
  142. def test_bulk_parse_and_cancel_documents(get_api_key_fixture):
  143. API_KEY = get_api_key_fixture
  144. # Initialize RAGFlow with API key and host address
  145. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  146. # Create a dataset
  147. ds = rag.create_dataset(name="God5")
  148. assert ds is not None, "Dataset creation failed"
  149. assert ds.name == "God5", "Dataset name does not match"
  150. # Prepare a list of file names and paths
  151. documents = [
  152. {'name': 'test1.txt', 'path': 'test_data/test1.txt'},
  153. {'name': 'test2.txt', 'path': 'test_data/test2.txt'},
  154. {'name': 'test3.txt', 'path': 'test_data/test3.txt'}
  155. ]
  156. # Create documents in bulk
  157. for doc_info in documents:
  158. with open(doc_info['path'], "rb") as file:
  159. created_doc = rag.create_document(ds, name=doc_info['name'], blob=file.read())
  160. assert created_doc is not None, f"Failed to create document {doc_info['name']}"
  161. # Retrieve document objects in bulk
  162. docs = [rag.get_document(name=doc_info['name']) for doc_info in documents]
  163. ids = [doc.id for doc in docs]
  164. assert len(docs) == len(documents), "Mismatch between created documents and fetched documents"
  165. # Initiate asynchronous parsing for all documents
  166. rag.async_parse_documents(ids)
  167. print("Async bulk parsing initiated")
  168. # Wait for all documents to finish parsing and check progress
  169. for doc in docs:
  170. for progress, msg in doc.join(interval=5, timeout=10):
  171. print(f"{doc.name}: Progress: {progress}, Message: {msg}")
  172. # Assert that progress is within the valid range
  173. assert 0 <= progress <= 100, f"Invalid progress: {progress} for document {doc.name}"
  174. # Assert that the message is not empty
  175. assert msg, f"Message should not be empty for document {doc.name}"
  176. # If progress reaches 100%, assert that parsing is completed successfully
  177. if progress == 100:
  178. assert "completed" in msg.lower(), f"Document {doc.name} did not complete successfully"
  179. # Cancel parsing for all documents in bulk
  180. cancel_result = rag.async_cancel_parse_documents(ids)
  181. assert cancel_result is None or isinstance(cancel_result, type(None)), "Failed to cancel document parsing"
  182. print("Async bulk parsing cancelled")
  183. def test_parse_document_and_chunk_list(get_api_key_fixture):
  184. API_KEY = get_api_key_fixture
  185. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  186. ds = rag.create_dataset(name="God7")
  187. name = 'story.txt'
  188. path = 'test_data/story.txt'
  189. # name = "Test Document rag.txt"
  190. # blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps."
  191. rag.create_document(ds, name=name, blob=open(path, "rb").read())
  192. doc = rag.get_document(name=name)
  193. doc.async_parse()
  194. # Wait for parsing to complete and get progress updates using join
  195. for progress, msg in doc.join(interval=5, timeout=30):
  196. print(progress, msg)
  197. # Assert that progress is within 0 to 100
  198. assert 0 <= progress <= 100, f"Invalid progress: {progress}"
  199. # Assert that the message is not empty
  200. assert msg, "Message should not be empty"
  201. for c in doc.list_chunks(keywords="rag", offset=0, limit=12):
  202. print(c)
  203. assert c is not None, "Chunk is None"
  204. assert "rag" in c['content_with_weight'].lower(), f"Keyword 'rag' not found in chunk content: {c.content}"
  205. def test_add_chunk_to_chunk_list(get_api_key_fixture):
  206. API_KEY = get_api_key_fixture
  207. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  208. doc = rag.get_document(name='story.txt')
  209. chunk = doc.add_chunk(content="assssdd")
  210. assert chunk is not None, "Chunk is None"
  211. assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
  212. def test_delete_chunk_of_chunk_list(get_api_key_fixture):
  213. API_KEY = get_api_key_fixture
  214. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  215. doc = rag.get_document(name='story.txt')
  216. chunk = doc.add_chunk(content="assssdd")
  217. assert chunk is not None, "Chunk is None"
  218. assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
  219. doc = rag.get_document(name='story.txt')
  220. chunk_count_before = doc.chunk_count
  221. chunk.delete()
  222. doc = rag.get_document(name='story.txt')
  223. assert doc.chunk_count == chunk_count_before - 1, "Chunk was not deleted"
  224. def test_update_chunk_content(get_api_key_fixture):
  225. API_KEY = get_api_key_fixture
  226. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  227. doc = rag.get_document(name='story.txt')
  228. chunk = doc.add_chunk(content="assssddd")
  229. assert chunk is not None, "Chunk is None"
  230. assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
  231. chunk.content = "ragflow123"
  232. res = chunk.save()
  233. assert res is True, f"Failed to update chunk content, error: {res}"
  234. def test_update_chunk_available(get_api_key_fixture):
  235. API_KEY = get_api_key_fixture
  236. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  237. doc = rag.get_document(name='story.txt')
  238. chunk = doc.add_chunk(content="ragflow")
  239. assert chunk is not None, "Chunk is None"
  240. assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
  241. chunk.available = 0
  242. res = chunk.save()
  243. assert res is True, f"Failed to update chunk status, error: {res}"
  244. def test_retrieval_chunks(get_api_key_fixture):
  245. API_KEY = get_api_key_fixture
  246. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  247. ds = rag.create_dataset(name="God8")
  248. name = 'ragflow_test.txt'
  249. path = 'test_data/ragflow_test.txt'
  250. rag.create_document(ds, name=name, blob=open(path, "rb").read())
  251. doc = rag.get_document(name=name)
  252. doc.async_parse()
  253. # Wait for parsing to complete and get progress updates using join
  254. for progress, msg in doc.join(interval=5, timeout=30):
  255. print(progress, msg)
  256. assert 0 <= progress <= 100, f"Invalid progress: {progress}"
  257. assert msg, "Message should not be empty"
  258. for c in rag.retrieval(question="What's ragflow?",
  259. datasets=[ds.id], documents=[doc],
  260. offset=0, limit=6, similarity_threshold=0.1,
  261. vector_similarity_weight=0.3,
  262. top_k=1024
  263. ):
  264. print(c)
  265. assert c is not None, "Chunk is None"
  266. assert "ragflow" in c.content.lower(), f"Keyword 'rag' not found in chunk content: {c.content}"