您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from pathlib import Path
  17. import pytest
  18. from common import (
  19. batch_create_datasets,
  20. bulk_upload_documents,
  21. )
  22. from configs import HOST_ADDRESS, VERSION
  23. from pytest import FixtureRequest
  24. from ragflow_sdk import Chunk, DataSet, Document, RAGFlow
  25. from utils import wait_for
  26. from utils.file_utils import (
  27. create_docx_file,
  28. create_eml_file,
  29. create_excel_file,
  30. create_html_file,
  31. create_image_file,
  32. create_json_file,
  33. create_md_file,
  34. create_pdf_file,
  35. create_ppt_file,
  36. create_txt_file,
  37. )
  38. @wait_for(30, 1, "Document parsing timeout")
  39. def condition(_dataset: DataSet):
  40. documents = _dataset.list_documents(page_size=1000)
  41. for document in documents:
  42. if document.run != "DONE":
  43. return False
  44. return True
  45. @pytest.fixture
  46. def generate_test_files(request: FixtureRequest, tmp_path: Path):
  47. file_creators = {
  48. "docx": (tmp_path / "ragflow_test.docx", create_docx_file),
  49. "excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
  50. "ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
  51. "image": (tmp_path / "ragflow_test.png", create_image_file),
  52. "pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
  53. "txt": (tmp_path / "ragflow_test.txt", create_txt_file),
  54. "md": (tmp_path / "ragflow_test.md", create_md_file),
  55. "json": (tmp_path / "ragflow_test.json", create_json_file),
  56. "eml": (tmp_path / "ragflow_test.eml", create_eml_file),
  57. "html": (tmp_path / "ragflow_test.html", create_html_file),
  58. }
  59. files = {}
  60. for file_type, (file_path, creator_func) in file_creators.items():
  61. if request.param in ["", file_type]:
  62. creator_func(file_path)
  63. files[file_type] = file_path
  64. return files
  65. @pytest.fixture(scope="class")
  66. def ragflow_tmp_dir(request: FixtureRequest, tmp_path_factory: Path) -> Path:
  67. class_name = request.cls.__name__
  68. return tmp_path_factory.mktemp(class_name)
  69. @pytest.fixture(scope="session")
  70. def client(token: str) -> RAGFlow:
  71. return RAGFlow(api_key=token, base_url=HOST_ADDRESS, version=VERSION)
  72. @pytest.fixture(scope="function")
  73. def clear_datasets(request: FixtureRequest, client: RAGFlow):
  74. def cleanup():
  75. client.delete_datasets(ids=None)
  76. request.addfinalizer(cleanup)
  77. @pytest.fixture(scope="class")
  78. def add_dataset(request: FixtureRequest, client: RAGFlow):
  79. def cleanup():
  80. client.delete_datasets(ids=None)
  81. request.addfinalizer(cleanup)
  82. return batch_create_datasets(client, 1)[0]
  83. @pytest.fixture(scope="function")
  84. def add_dataset_func(request: FixtureRequest, client: RAGFlow) -> DataSet:
  85. def cleanup():
  86. client.delete_datasets(ids=None)
  87. request.addfinalizer(cleanup)
  88. return batch_create_datasets(client, 1)[0]
  89. @pytest.fixture(scope="class")
  90. def add_document(add_dataset: DataSet, ragflow_tmp_dir: Path) -> tuple[DataSet, Document]:
  91. return add_dataset, bulk_upload_documents(add_dataset, 1, ragflow_tmp_dir)[0]
  92. @pytest.fixture(scope="class")
  93. def add_chunks(request: FixtureRequest, add_document: tuple[DataSet, Document]) -> tuple[DataSet, Document, list[Chunk]]:
  94. dataset, document = add_document
  95. dataset.async_parse_documents([document.id])
  96. condition(dataset)
  97. chunks = []
  98. for i in range(4):
  99. chunk = document.add_chunk(content=f"chunk test {i}")
  100. chunks.append(chunk)
  101. # issues/6487
  102. from time import sleep
  103. sleep(1)
  104. def cleanup():
  105. try:
  106. document.delete_chunks(ids=[])
  107. except Exception:
  108. pass
  109. request.addfinalizer(cleanup)
  110. return dataset, document, chunks