### What problem does this PR solve? - Extended embedding model timeout from 3 to 10 seconds in api_utils.py - Added more time for large file batches and concurrent parsing operations to prevent test flakiness - Import from #8940 - https://github.com/infiniflow/ragflow/actions/runs/16422052652 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.20.0
| from functools import wraps | from functools import wraps | ||||
| from hmac import HMAC | from hmac import HMAC | ||||
| from io import BytesIO | from io import BytesIO | ||||
| from typing import Any, Optional, Union, Callable, Coroutine, Type | |||||
| from typing import Any, Callable, Coroutine, Optional, Type, Union | |||||
| from urllib.parse import quote, urlencode | from urllib.parse import quote, urlencode | ||||
| from uuid import uuid1 | from uuid import uuid1 | ||||
| import trio | |||||
| from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions | |||||
| import requests | import requests | ||||
| import trio | |||||
| from flask import ( | from flask import ( | ||||
| Response, | Response, | ||||
| jsonify, | jsonify, | ||||
| from api.db.db_models import APIToken | from api.db.db_models import APIToken | ||||
| from api.db.services.llm_service import LLMService, TenantLLMService | from api.db.services.llm_service import LLMService, TenantLLMService | ||||
| from api.utils import CustomJSONEncoder, get_uuid, json_dumps | from api.utils import CustomJSONEncoder, get_uuid, json_dumps | ||||
| from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions | |||||
| requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder) | requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder) | ||||
| async def _is_strong_enough(): | async def _is_strong_enough(): | ||||
| nonlocal chat_model, embedding_model | nonlocal chat_model, embedding_model | ||||
| if embedding_model: | if embedding_model: | ||||
| with trio.fail_after(3): | |||||
| with trio.fail_after(10): | |||||
| _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) | _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) | ||||
| if chat_model: | if chat_model: | ||||
| with trio.fail_after(30): | with trio.fail_after(30): |
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path): | def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path): | ||||
| @wait_for(100, 1, "Document parsing timeout") | |||||
| @wait_for(200, 1, "Document parsing timeout") | |||||
| def condition(_auth, _dataset_id, _document_num): | def condition(_auth, _dataset_id, _document_num): | ||||
| res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) | res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) | ||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_concurrent_parse(HttpApiAuth, add_dataset_func, tmp_path): | def test_concurrent_parse(HttpApiAuth, add_dataset_func, tmp_path): | ||||
| @wait_for(120, 1, "Document parsing timeout") | |||||
| @wait_for(200, 1, "Document parsing timeout") | |||||
| def condition(_auth, _dataset_id, _document_num): | def condition(_auth, _dataset_id, _document_num): | ||||
| res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) | res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) | ||||
| for doc in res["data"]["docs"]: | for doc in res["data"]["docs"]: |
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_parse_100_files(add_dataset_func, tmp_path): | def test_parse_100_files(add_dataset_func, tmp_path): | ||||
| @wait_for(100, 1, "Document parsing timeout") | |||||
| @wait_for(200, 1, "Document parsing timeout") | |||||
| def condition(_dataset: DataSet, _count: int): | def condition(_dataset: DataSet, _count: int): | ||||
| documents = _dataset.list_documents(page_size=_count * 2) | documents = _dataset.list_documents(page_size=_count * 2) | ||||
| for document in documents: | for document in documents: | ||||
| @pytest.mark.p3 | @pytest.mark.p3 | ||||
| def test_concurrent_parse(add_dataset_func, tmp_path): | def test_concurrent_parse(add_dataset_func, tmp_path): | ||||
| @wait_for(120, 1, "Document parsing timeout") | |||||
| @wait_for(200, 1, "Document parsing timeout") | |||||
| def condition(_dataset: DataSet, _count: int): | def condition(_dataset: DataSet, _count: int): | ||||
| documents = _dataset.list_documents(page_size=_count * 2) | documents = _dataset.list_documents(page_size=_count * 2) | ||||
| for document in documents: | for document in documents: |