Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

t_chunk.py 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. from ragflow_sdk import RAGFlow
  2. from common import HOST_ADDRESS
  3. from time import sleep
  4. def test_parse_document_with_txt(get_api_key_fixture):
  5. API_KEY = get_api_key_fixture
  6. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  7. ds = rag.create_dataset(name="test_parse_document")
  8. name = 'ragflow_test.txt'
  9. with open("test_data/ragflow_test.txt","rb") as file :
  10. blob = file.read()
  11. docs = ds.upload_documents([{"displayed_name": name, "blob": blob}])
  12. doc = docs[0]
  13. ds.async_parse_documents(document_ids=[doc.id])
  14. '''
  15. for n in range(100):
  16. if doc.progress == 1:
  17. break
  18. sleep(1)
  19. else:
  20. raise Exception("Run time ERROR: Document parsing did not complete in time.")
  21. '''
  22. def test_parse_and_cancel_document(get_api_key_fixture):
  23. API_KEY = get_api_key_fixture
  24. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  25. ds = rag.create_dataset(name="test_parse_and_cancel_document")
  26. name = 'ragflow_test.txt'
  27. with open("test_data/ragflow_test.txt","rb") as file :
  28. blob = file.read()
  29. docs=ds.upload_documents([{"displayed_name": name, "blob": blob}])
  30. doc = docs[0]
  31. ds.async_parse_documents(document_ids=[doc.id])
  32. sleep(1)
  33. if 0 < doc.progress < 1:
  34. ds.async_cancel_parse_documents(document_ids=[doc.id])
  35. def test_bulk_parse_documents(get_api_key_fixture):
  36. API_KEY = get_api_key_fixture
  37. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  38. ds = rag.create_dataset(name="test_bulk_parse_and_cancel_documents")
  39. with open("ragflow.txt","rb") as file:
  40. blob = file.read()
  41. documents = [
  42. {'displayed_name': 'test1.txt', 'blob': blob},
  43. {'displayed_name': 'test2.txt', 'blob': blob},
  44. {'displayed_name': 'test3.txt', 'blob': blob}
  45. ]
  46. docs = ds.upload_documents(documents)
  47. ids = [doc.id for doc in docs]
  48. ds.async_parse_documents(ids)
  49. '''
  50. for n in range(100):
  51. all_completed = all(doc.progress == 1 for doc in docs)
  52. if all_completed:
  53. break
  54. sleep(1)
  55. else:
  56. raise Exception("Run time ERROR: Bulk document parsing did not complete in time.")
  57. '''
  58. def test_list_chunks_with_success(get_api_key_fixture):
  59. API_KEY = get_api_key_fixture
  60. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  61. ds = rag.create_dataset(name="test_list_chunks_with_success")
  62. with open("test_data/ragflow_test.txt", "rb") as file:
  63. blob = file.read()
  64. '''
  65. # chunk_size = 1024 * 1024
  66. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  67. documents = [
  68. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  69. ]
  70. '''
  71. documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
  72. docs = ds.upload_documents(documents)
  73. ids = [doc.id for doc in docs]
  74. ds.async_parse_documents(ids)
  75. '''
  76. for n in range(100):
  77. all_completed = all(doc.progress == 1 for doc in docs)
  78. if all_completed:
  79. break
  80. sleep(1)
  81. else:
  82. raise Exception("Run time ERROR: Chunk document parsing did not complete in time.")
  83. '''
  84. doc = docs[0]
  85. doc.list_chunks()
  86. def test_add_chunk_with_success(get_api_key_fixture):
  87. API_KEY = get_api_key_fixture
  88. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  89. ds = rag.create_dataset(name="test_add_chunk_with_success")
  90. with open("test_data/ragflow_test.txt", "rb") as file:
  91. blob = file.read()
  92. '''
  93. # chunk_size = 1024 * 1024
  94. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  95. documents = [
  96. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  97. ]
  98. '''
  99. documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
  100. docs = ds.upload_documents(documents)
  101. doc = docs[0]
  102. doc.add_chunk(content="This is a chunk addition test")
  103. def test_delete_chunk_with_success(get_api_key_fixture):
  104. API_KEY = get_api_key_fixture
  105. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  106. ds = rag.create_dataset(name="test_delete_chunk_with_success")
  107. with open("test_data/ragflow_test.txt", "rb") as file:
  108. blob = file.read()
  109. '''
  110. # chunk_size = 1024 * 1024
  111. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  112. documents = [
  113. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  114. ]
  115. '''
  116. documents =[{"displayed_name":"test_delete_chunk_with_success.txt","blob":blob}]
  117. docs = ds.upload_documents(documents)
  118. doc = docs[0]
  119. chunk = doc.add_chunk(content="This is a chunk addition test")
  120. doc.delete_chunks([chunk.id])
  121. def test_update_chunk_content(get_api_key_fixture):
  122. API_KEY = get_api_key_fixture
  123. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  124. ds = rag.create_dataset(name="test_update_chunk_content_with_success")
  125. with open("test_data/ragflow_test.txt", "rb") as file:
  126. blob = file.read()
  127. '''
  128. # chunk_size = 1024 * 1024
  129. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  130. documents = [
  131. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  132. ]
  133. '''
  134. documents =[{"displayed_name":"test_update_chunk_content_with_success.txt","blob":blob}]
  135. docs = ds.upload_documents(documents)
  136. doc = docs[0]
  137. chunk = doc.add_chunk(content="This is a chunk addition test")
  138. chunk.update({"content":"This is a updated content"})
  139. def test_update_chunk_available(get_api_key_fixture):
  140. API_KEY = get_api_key_fixture
  141. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  142. ds = rag.create_dataset(name="test_update_chunk_available_with_success")
  143. with open("test_data/ragflow_test.txt", "rb") as file:
  144. blob = file.read()
  145. '''
  146. # chunk_size = 1024 * 1024
  147. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  148. documents = [
  149. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  150. ]
  151. '''
  152. documents =[{"displayed_name":"test_update_chunk_available_with_success.txt","blob":blob}]
  153. docs = ds.upload_documents(documents)
  154. doc = docs[0]
  155. chunk = doc.add_chunk(content="This is a chunk addition test")
  156. chunk.update({"available":False})
  157. def test_retrieve_chunks(get_api_key_fixture):
  158. API_KEY = get_api_key_fixture
  159. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  160. ds = rag.create_dataset(name="retrieval")
  161. with open("test_data/ragflow_test.txt", "rb") as file:
  162. blob = file.read()
  163. '''
  164. # chunk_size = 1024 * 1024
  165. # chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
  166. documents = [
  167. {'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
  168. ]
  169. '''
  170. documents =[{"displayed_name":"test_retrieve_chunks.txt","blob":blob}]
  171. docs = ds.upload_documents(documents)
  172. doc = docs[0]
  173. doc.add_chunk(content="This is a chunk addition test")
  174. rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id])