| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 | 
							- #
 - #  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
 - #
 - #  Licensed under the Apache License, Version 2.0 (the "License");
 - #  you may not use this file except in compliance with the License.
 - #  You may obtain a copy of the License at
 - #
 - #      http://www.apache.org/licenses/LICENSE-2.0
 - #
 - #  Unless required by applicable law or agreed to in writing, software
 - #  distributed under the License is distributed on an "AS IS" BASIS,
 - #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 - #  See the License for the specific language governing permissions and
 - #  limitations under the License.
 - #
 - from time import sleep
 - 
 - import pytest
 - from common import (
 -     batch_add_chunks,
 -     batch_create_datasets,
 -     bulk_upload_documents,
 -     delete_chunks,
 -     delete_dialogs,
 -     list_chunks,
 -     list_documents,
 -     list_kbs,
 -     parse_documents,
 -     rm_kb,
 - )
 - from libs.auth import RAGFlowWebApiAuth
 - from pytest import FixtureRequest
 - from utils import wait_for
 - from utils.file_utils import (
 -     create_docx_file,
 -     create_eml_file,
 -     create_excel_file,
 -     create_html_file,
 -     create_image_file,
 -     create_json_file,
 -     create_md_file,
 -     create_pdf_file,
 -     create_ppt_file,
 -     create_txt_file,
 - )
 - 
 - 
 - @wait_for(30, 1, "Document parsing timeout")
 - def condition(_auth, _kb_id):
 -     res = list_documents(_auth, {"kb_id": _kb_id})
 -     for doc in res["data"]["docs"]:
 -         if doc["run"] != "3":
 -             return False
 -     return True
 - 
 - 
 - @pytest.fixture
 - def generate_test_files(request: FixtureRequest, tmp_path):
 -     file_creators = {
 -         "docx": (tmp_path / "ragflow_test.docx", create_docx_file),
 -         "excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
 -         "ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
 -         "image": (tmp_path / "ragflow_test.png", create_image_file),
 -         "pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
 -         "txt": (tmp_path / "ragflow_test.txt", create_txt_file),
 -         "md": (tmp_path / "ragflow_test.md", create_md_file),
 -         "json": (tmp_path / "ragflow_test.json", create_json_file),
 -         "eml": (tmp_path / "ragflow_test.eml", create_eml_file),
 -         "html": (tmp_path / "ragflow_test.html", create_html_file),
 -     }
 - 
 -     files = {}
 -     for file_type, (file_path, creator_func) in file_creators.items():
 -         if request.param in ["", file_type]:
 -             creator_func(file_path)
 -             files[file_type] = file_path
 -     return files
 - 
 - 
 - @pytest.fixture(scope="class")
 - def ragflow_tmp_dir(request, tmp_path_factory):
 -     class_name = request.cls.__name__
 -     return tmp_path_factory.mktemp(class_name)
 - 
 - 
 - @pytest.fixture(scope="session")
 - def WebApiAuth(auth):
 -     return RAGFlowWebApiAuth(auth)
 - 
 - 
 - @pytest.fixture(scope="function")
 - def clear_datasets(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth):
 -     def cleanup():
 -         res = list_kbs(WebApiAuth, params={"page_size": 1000})
 -         for kb in res["data"]["kbs"]:
 -             rm_kb(WebApiAuth, {"kb_id": kb["id"]})
 - 
 -     request.addfinalizer(cleanup)
 - 
 - 
 - @pytest.fixture(scope="function")
 - def clear_dialogs(request, WebApiAuth):
 -     def cleanup():
 -         delete_dialogs(WebApiAuth)
 - 
 -     request.addfinalizer(cleanup)
 - 
 - 
 - @pytest.fixture(scope="class")
 - def add_dataset(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth) -> str:
 -     def cleanup():
 -         res = list_kbs(WebApiAuth, params={"page_size": 1000})
 -         for kb in res["data"]["kbs"]:
 -             rm_kb(WebApiAuth, {"kb_id": kb["id"]})
 - 
 -     request.addfinalizer(cleanup)
 -     return batch_create_datasets(WebApiAuth, 1)[0]
 - 
 - 
 - @pytest.fixture(scope="function")
 - def add_dataset_func(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth) -> str:
 -     def cleanup():
 -         res = list_kbs(WebApiAuth, params={"page_size": 1000})
 -         for kb in res["data"]["kbs"]:
 -             rm_kb(WebApiAuth, {"kb_id": kb["id"]})
 - 
 -     request.addfinalizer(cleanup)
 -     return batch_create_datasets(WebApiAuth, 1)[0]
 - 
 - 
 - @pytest.fixture(scope="class")
 - def add_document(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
 -     #     def cleanup():
 -     #         res = list_documents(WebApiAuth, {"kb_id": dataset_id})
 -     #         for doc in res["data"]["docs"]:
 -     #             delete_document(WebApiAuth, {"doc_id": doc["id"]})
 - 
 -     #     request.addfinalizer(cleanup)
 - 
 -     dataset_id = add_dataset
 -     return dataset_id, bulk_upload_documents(WebApiAuth, dataset_id, 1, ragflow_tmp_dir)[0]
 - 
 - 
 - @pytest.fixture(scope="class")
 - def add_chunks(request, WebApiAuth, add_document):
 -     def cleanup():
 -         res = list_chunks(WebApiAuth, {"doc_id": document_id})
 -         if res["code"] == 0:
 -             chunk_ids = [chunk["chunk_id"] for chunk in res["data"]["chunks"]]
 -             delete_chunks(WebApiAuth, {"doc_id": document_id, "chunk_ids": chunk_ids})
 - 
 -     request.addfinalizer(cleanup)
 - 
 -     kb_id, document_id = add_document
 -     parse_documents(WebApiAuth, {"doc_ids": [document_id], "run": "1"})
 -     condition(WebApiAuth, kb_id)
 -     chunk_ids = batch_add_chunks(WebApiAuth, document_id, 4)
 -     # issues/6487
 -     sleep(1)
 -     return kb_id, document_id, chunk_ids
 
 
  |