You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

t_chunk.py 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. from ragflow_sdk import RAGFlow
  2. from common import HOST_ADDRESS
  3. from time import sleep
  4. def test_parse_document_with_txt(get_api_key_fixture):
  5. API_KEY = get_api_key_fixture
  6. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  7. ds = rag.create_dataset(name="test_parse_document")
  8. name = 'ragflow_test.txt'
  9. with open("test_data/ragflow_test.txt","rb") as file :
  10. blob = file.read()
  11. docs = ds.upload_documents([{"displayed_name": name, "blob": blob}])
  12. doc = docs[0]
  13. ds.async_parse_documents(document_ids=[doc.id])
  14. '''
  15. for n in range(100):
  16. if doc.progress == 1:
  17. break
  18. sleep(1)
  19. else:
  20. raise Exception("Run time ERROR: Document parsing did not complete in time.")
  21. '''
  22. def test_parse_and_cancel_document(get_api_key_fixture):
  23. API_KEY = get_api_key_fixture
  24. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  25. ds = rag.create_dataset(name="test_parse_and_cancel_document")
  26. name = 'ragflow_test.txt'
  27. with open("test_data/ragflow_test.txt","rb") as file :
  28. blob = file.read()
  29. docs=ds.upload_documents([{"displayed_name": name, "blob": blob}])
  30. doc = docs[0]
  31. ds.async_parse_documents(document_ids=[doc.id])
  32. sleep(1)
  33. if 0 < doc.progress < 1:
  34. ds.async_cancel_parse_documents(document_ids=[doc.id])
  35. def test_bulk_parse_documents(get_api_key_fixture):
  36. API_KEY = get_api_key_fixture
  37. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  38. ds = rag.create_dataset(name="test_bulk_parse_and_cancel_documents")
  39. with open("ragflow.txt","rb") as file:
  40. blob = file.read()
  41. documents = [
  42. {'displayed_name': 'test1.txt', 'blob': blob},
  43. {'displayed_name': 'test2.txt', 'blob': blob},
  44. {'displayed_name': 'test3.txt', 'blob': blob}
  45. ]
  46. docs = ds.upload_documents(documents)
  47. ids = [doc.id for doc in docs]
  48. ds.async_parse_documents(ids)
  49. '''
  50. for n in range(100):
  51. all_completed = all(doc.progress == 1 for doc in docs)
  52. if all_completed:
  53. break
  54. sleep(1)
  55. else:
  56. raise Exception("Run time ERROR: Bulk document parsing did not complete in time.")
  57. '''
  58. def test_list_chunks_with_success(get_api_key_fixture):
  59. API_KEY = get_api_key_fixture
  60. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  61. ds = rag.create_dataset(name="test_list_chunks_with_success")
  62. with open("test_data/ragflow_test.txt", "rb") as file:
  63. blob = file.read()
  64. '''
  65. # chunk_size = 1024 * 1024
  66. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  67. documents = [
  68. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  69. ]
  70. '''
  71. documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
  72. docs = ds.upload_documents(documents)
  73. ids = [doc.id for doc in docs]
  74. ds.async_parse_documents(ids)
  75. '''
  76. for n in range(100):
  77. all_completed = all(doc.progress == 1 for doc in docs)
  78. if all_completed:
  79. break
  80. sleep(1)
  81. else:
  82. raise Exception("Run time ERROR: Chunk document parsing did not complete in time.")
  83. '''
  84. doc = docs[0]
  85. doc.list_chunks()
  86. def test_add_chunk_with_success(get_api_key_fixture):
  87. API_KEY = get_api_key_fixture
  88. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  89. ds = rag.create_dataset(name="test_add_chunk_with_success")
  90. with open("test_data/ragflow_test.txt", "rb") as file:
  91. blob = file.read()
  92. '''
  93. # chunk_size = 1024 * 1024
  94. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  95. documents = [
  96. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  97. ]
  98. '''
  99. documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
  100. docs = ds.upload_documents(documents)
  101. doc = docs[0]
  102. doc.add_chunk(content="This is a chunk addition test")
  103. def test_delete_chunk_with_success(get_api_key_fixture):
  104. API_KEY = get_api_key_fixture
  105. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  106. ds = rag.create_dataset(name="test_delete_chunk_with_success")
  107. with open("test_data/ragflow_test.txt", "rb") as file:
  108. blob = file.read()
  109. '''
  110. # chunk_size = 1024 * 1024
  111. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  112. documents = [
  113. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  114. ]
  115. '''
  116. documents =[{"displayed_name":"test_delete_chunk_with_success.txt","blob":blob}]
  117. docs = ds.upload_documents(documents)
  118. doc = docs[0]
  119. chunk = doc.add_chunk(content="This is a chunk addition test")
  120. sleep(5)
  121. doc.delete_chunks([chunk.id])
  122. def test_update_chunk_content(get_api_key_fixture):
  123. API_KEY = get_api_key_fixture
  124. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  125. ds = rag.create_dataset(name="test_update_chunk_content_with_success")
  126. with open("test_data/ragflow_test.txt", "rb") as file:
  127. blob = file.read()
  128. '''
  129. # chunk_size = 1024 * 1024
  130. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  131. documents = [
  132. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  133. ]
  134. '''
  135. documents =[{"displayed_name":"test_update_chunk_content_with_success.txt","blob":blob}]
  136. docs = ds.upload_documents(documents)
  137. doc = docs[0]
  138. chunk = doc.add_chunk(content="This is a chunk addition test")
  139. # For Elasticsearch, the chunk is not searchable in shot time (~2s).
  140. sleep(3)
  141. chunk.update({"content":"This is a updated content"})
  142. def test_update_chunk_available(get_api_key_fixture):
  143. API_KEY = get_api_key_fixture
  144. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  145. ds = rag.create_dataset(name="test_update_chunk_available_with_success")
  146. with open("test_data/ragflow_test.txt", "rb") as file:
  147. blob = file.read()
  148. '''
  149. # chunk_size = 1024 * 1024
  150. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  151. documents = [
  152. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  153. ]
  154. '''
  155. documents =[{"displayed_name":"test_update_chunk_available_with_success.txt","blob":blob}]
  156. docs = ds.upload_documents(documents)
  157. doc = docs[0]
  158. chunk = doc.add_chunk(content="This is a chunk addition test")
  159. # For Elasticsearch, the chunk is not searchable in shot time (~2s).
  160. sleep(3)
  161. chunk.update({"available":0})
  162. def test_retrieve_chunks(get_api_key_fixture):
  163. API_KEY = get_api_key_fixture
  164. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  165. ds = rag.create_dataset(name="retrieval")
  166. with open("test_data/ragflow_test.txt", "rb") as file:
  167. blob = file.read()
  168. '''
  169. # chunk_size = 1024 * 1024
  170. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  171. documents = [
  172. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  173. ]
  174. '''
  175. documents =[{"displayed_name":"test_retrieve_chunks.txt","blob":blob}]
  176. docs = ds.upload_documents(documents)
  177. doc = docs[0]
  178. doc.add_chunk(content="This is a chunk addition test")
  179. rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id])