Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

t_document.py 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. from ragflow_sdk import RAGFlow
  2. from common import HOST_ADDRESS
  3. import pytest
  4. def test_upload_document_with_success(get_api_key_fixture):
  5. API_KEY = get_api_key_fixture
  6. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  7. ds = rag.create_dataset(name="test_upload_document")
  8. blob = b"Sample document content for test."
  9. with open("ragflow.txt","rb") as file:
  10. blob_2=file.read()
  11. document_infos = []
  12. document_infos.append({"displayed_name": "test_1.txt","blob": blob})
  13. document_infos.append({"displayed_name": "test_2.txt","blob": blob_2})
  14. ds.upload_documents(document_infos)
  15. def test_update_document_with_success(get_api_key_fixture):
  16. API_KEY = get_api_key_fixture
  17. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  18. ds = rag.create_dataset(name="test_update_document")
  19. blob = b"Sample document content for test."
  20. document_infos=[{"displayed_name":"test.txt","blob":blob}]
  21. docs=ds.upload_documents(document_infos)
  22. doc = docs[0]
  23. doc.update({"chunk_method": "manual", "name": "manual.txt"})
  24. def test_download_document_with_success(get_api_key_fixture):
  25. API_KEY = get_api_key_fixture
  26. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  27. ds = rag.create_dataset(name="test_download_document")
  28. blob = b"Sample document content for test."
  29. document_infos=[{"displayed_name": "test_1.txt","blob": blob}]
  30. docs=ds.upload_documents(document_infos)
  31. doc = docs[0]
  32. with open("test_download.txt","wb+") as file:
  33. file.write(doc.download())
  34. def test_list_documents_in_dataset_with_success(get_api_key_fixture):
  35. API_KEY = get_api_key_fixture
  36. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  37. ds = rag.create_dataset(name="test_list_documents")
  38. blob = b"Sample document content for test."
  39. document_infos = [{"displayed_name": "test.txt","blob":blob}]
  40. ds.upload_documents(document_infos)
  41. ds.list_documents(keywords="test", page=1, page_size=12)
  42. def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
  43. API_KEY = get_api_key_fixture
  44. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  45. ds = rag.create_dataset(name="test_delete_documents")
  46. name = "test_delete_documents.txt"
  47. blob = b"Sample document content for test."
  48. document_infos=[{"displayed_name": name, "blob": blob}]
  49. docs = ds.upload_documents(document_infos)
  50. ds.delete_documents([docs[0].id])
  51. # upload and parse the document with different in different parse method.
  52. def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture):
  53. API_KEY = get_api_key_fixture
  54. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  55. ds = rag.create_dataset(name="test_pdf_document")
  56. with open("test_data/test.pdf","rb") as file:
  57. blob=file.read()
  58. document_infos = [{"displayed_name": "test.pdf","blob": blob}]
  59. docs=ds.upload_documents(document_infos)
  60. doc = docs[0]
  61. ds.async_parse_documents([doc.id])
  62. def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture):
  63. API_KEY = get_api_key_fixture
  64. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  65. ds = rag.create_dataset(name="test_docx_document")
  66. with open("test_data/test.docx","rb") as file:
  67. blob=file.read()
  68. document_infos = [{"displayed_name": "test.docx","blob": blob}]
  69. docs=ds.upload_documents(document_infos)
  70. doc = docs[0]
  71. ds.async_parse_documents([doc.id])
  72. def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture):
  73. API_KEY = get_api_key_fixture
  74. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  75. ds = rag.create_dataset(name="test_excel_document")
  76. with open("test_data/test.xlsx","rb") as file:
  77. blob=file.read()
  78. document_infos = [{"displayed_name": "test.xlsx","blob": blob}]
  79. docs=ds.upload_documents(document_infos)
  80. doc = docs[0]
  81. ds.async_parse_documents([doc.id])
  82. def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture):
  83. API_KEY = get_api_key_fixture
  84. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  85. ds = rag.create_dataset(name="test_ppt_document")
  86. with open("test_data/test.ppt","rb") as file:
  87. blob=file.read()
  88. document_infos = [{"displayed_name": "test.ppt","blob": blob}]
  89. docs=ds.upload_documents(document_infos)
  90. doc = docs[0]
  91. ds.async_parse_documents([doc.id])
  92. def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture):
  93. API_KEY = get_api_key_fixture
  94. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  95. ds = rag.create_dataset(name="test_image_document")
  96. with open("test_data/test.jpg","rb") as file:
  97. blob=file.read()
  98. document_infos = [{"displayed_name": "test.jpg","blob": blob}]
  99. docs=ds.upload_documents(document_infos)
  100. doc = docs[0]
  101. ds.async_parse_documents([doc.id])
  102. def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture):
  103. API_KEY = get_api_key_fixture
  104. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  105. ds = rag.create_dataset(name="test_txt_document")
  106. with open("test_data/test.txt","rb") as file:
  107. blob=file.read()
  108. document_infos = [{"displayed_name": "test.txt","blob": blob}]
  109. docs=ds.upload_documents(document_infos)
  110. doc = docs[0]
  111. ds.async_parse_documents([doc.id])
  112. def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture):
  113. API_KEY = get_api_key_fixture
  114. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  115. ds = rag.create_dataset(name="test_md_document")
  116. with open("test_data/test.md","rb") as file:
  117. blob=file.read()
  118. document_infos = [{"displayed_name": "test.md","blob": blob}]
  119. docs=ds.upload_documents(document_infos)
  120. doc = docs[0]
  121. ds.async_parse_documents([doc.id])
  122. def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture):
  123. API_KEY = get_api_key_fixture
  124. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  125. ds = rag.create_dataset(name="test_json_document")
  126. with open("test_data/test.json","rb") as file:
  127. blob=file.read()
  128. document_infos = [{"displayed_name": "test.json","blob": blob}]
  129. docs=ds.upload_documents(document_infos)
  130. doc = docs[0]
  131. ds.async_parse_documents([doc.id])
  132. @pytest.mark.skip(reason="")
  133. def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture):
  134. API_KEY = get_api_key_fixture
  135. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  136. ds = rag.create_dataset(name="test_eml_document")
  137. with open("test_data/test.eml","rb") as file:
  138. blob=file.read()
  139. document_infos = [{"displayed_name": "test.eml","blob": blob}]
  140. docs=ds.upload_documents(document_infos)
  141. doc = docs[0]
  142. ds.async_parse_documents([doc.id])
  143. def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture):
  144. API_KEY = get_api_key_fixture
  145. rag = RAGFlow(API_KEY, HOST_ADDRESS)
  146. ds = rag.create_dataset(name="test_html_document")
  147. with open("test_data/test.html","rb") as file:
  148. blob=file.read()
  149. document_infos = [{"displayed_name": "test.html","blob": blob}]
  150. docs=ds.upload_documents(document_infos)
  151. doc = docs[0]
  152. ds.async_parse_documents([doc.id])