Kaynağa Gözat

Fix: Increase timeouts for document parsing and model checks (#8996)

### What problem does this PR solve?

- Extended embedding model timeout from 3 to 10 seconds in api_utils.py
- Added more time for large file batches and concurrent parsing
operations to prevent test flakiness
- Import from #8940
- https://github.com/infiniflow/ragflow/actions/runs/16422052652

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.20.0
Liu An 3 ay önce
ebeveyn
işleme
b4b6d296ea
No account linked to committer's email address

+ 4
- 6
api/utils/api_utils.py Dosyayı Görüntüle

from functools import wraps from functools import wraps
from hmac import HMAC from hmac import HMAC
from io import BytesIO from io import BytesIO
from typing import Any, Optional, Union, Callable, Coroutine, Type
from typing import Any, Callable, Coroutine, Optional, Type, Union
from urllib.parse import quote, urlencode from urllib.parse import quote, urlencode
from uuid import uuid1 from uuid import uuid1


import trio
from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions


import requests import requests
import trio
from flask import ( from flask import (
Response, Response,
jsonify, jsonify,
from api.db.db_models import APIToken from api.db.db_models import APIToken
from api.db.services.llm_service import LLMService, TenantLLMService from api.db.services.llm_service import LLMService, TenantLLMService
from api.utils import CustomJSONEncoder, get_uuid, json_dumps from api.utils import CustomJSONEncoder, get_uuid, json_dumps
from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions


requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder) requests.models.complexjson.dumps = functools.partial(json.dumps, cls=CustomJSONEncoder)


async def _is_strong_enough(): async def _is_strong_enough():
nonlocal chat_model, embedding_model nonlocal chat_model, embedding_model
if embedding_model: if embedding_model:
with trio.fail_after(3):
with trio.fail_after(10):
_ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"]))
if chat_model: if chat_model:
with trio.fail_after(30): with trio.fail_after(30):

+ 2
- 2
test/testcases/test_http_api/test_file_management_within_dataset/test_parse_documents.py Dosyayı Görüntüle



@pytest.mark.p3 @pytest.mark.p3
def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path): def test_parse_100_files(HttpApiAuth, add_dataset_func, tmp_path):
@wait_for(100, 1, "Document parsing timeout")
@wait_for(200, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_num): def condition(_auth, _dataset_id, _document_num):
res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) res = list_documents(_auth, _dataset_id, {"page_size": _document_num})
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:


@pytest.mark.p3 @pytest.mark.p3
def test_concurrent_parse(HttpApiAuth, add_dataset_func, tmp_path): def test_concurrent_parse(HttpApiAuth, add_dataset_func, tmp_path):
@wait_for(120, 1, "Document parsing timeout")
@wait_for(200, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_num): def condition(_auth, _dataset_id, _document_num):
res = list_documents(_auth, _dataset_id, {"page_size": _document_num}) res = list_documents(_auth, _dataset_id, {"page_size": _document_num})
for doc in res["data"]["docs"]: for doc in res["data"]["docs"]:

+ 2
- 2
test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py Dosyayı Görüntüle



@pytest.mark.p3 @pytest.mark.p3
def test_parse_100_files(add_dataset_func, tmp_path): def test_parse_100_files(add_dataset_func, tmp_path):
@wait_for(100, 1, "Document parsing timeout")
@wait_for(200, 1, "Document parsing timeout")
def condition(_dataset: DataSet, _count: int): def condition(_dataset: DataSet, _count: int):
documents = _dataset.list_documents(page_size=_count * 2) documents = _dataset.list_documents(page_size=_count * 2)
for document in documents: for document in documents:


@pytest.mark.p3 @pytest.mark.p3
def test_concurrent_parse(add_dataset_func, tmp_path): def test_concurrent_parse(add_dataset_func, tmp_path):
@wait_for(120, 1, "Document parsing timeout")
@wait_for(200, 1, "Document parsing timeout")
def condition(_dataset: DataSet, _count: int): def condition(_dataset: DataSet, _count: int):
documents = _dataset.list_documents(page_size=_count * 2) documents = _dataset.list_documents(page_size=_count * 2)
for document in documents: for document in documents:

Loading…
İptal
Kaydet