### What problem does this PR solve? ### Type of change - [x] Performance Improvementtags/v0.20.0
| @@ -670,3 +670,18 @@ def timeout( | |||
| return wrapper | |||
| return decorator | |||
| async def is_strong_enough(chat_model, embedding_model): | |||
| @timeout(30, 2) | |||
| async def _is_strong_enough(): | |||
| nonlocal chat_model, embedding_model | |||
| _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) | |||
| res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role":"user", "content": "Are you strong enough!?"}], {})) | |||
| if res.find("**ERROR**") >= 0: | |||
| raise Exception(res) | |||
| # Pressure test for GraphRAG task | |||
| async with trio.open_nursery() as nursery: | |||
| for _ in range(12): | |||
| nursery.start_soon(_is_strong_enough, chat_model, embedding_model) | |||
| @@ -17,6 +17,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed | |||
| from PIL import Image | |||
| from api.utils.api_utils import timeout | |||
| from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk | |||
| from rag.prompts import vision_llm_figure_describe_prompt | |||
| @@ -80,6 +81,7 @@ class VisionFigureParser: | |||
| def __call__(self, **kwargs): | |||
| callback = kwargs.get("callback", lambda prog, msg: None) | |||
| @timeout(30, 3) | |||
| def process(figure_idx, figure_binary): | |||
| description_text = picture_vision_llm_chunk( | |||
| binary=figure_binary, | |||
| @@ -20,7 +20,7 @@ import trio | |||
| from api import settings | |||
| from api.utils import get_uuid | |||
| from api.utils.api_utils import timeout | |||
| from api.utils.api_utils import timeout, is_strong_enough | |||
| from graphrag.light.graph_extractor import GraphExtractor as LightKGExt | |||
| from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt | |||
| from graphrag.general.community_reports_extractor import CommunityReportsExtractor | |||
| @@ -39,13 +39,6 @@ from rag.nlp import rag_tokenizer, search | |||
| from rag.utils.redis_conn import RedisDistributedLock | |||
| @timeout(30, 2) | |||
| async def _is_strong_enough(chat_model, embedding_model): | |||
| _ = await trio.to_thread.run_sync(lambda: embedding_model.encode(["Are you strong enough!?"])) | |||
| res = await trio.to_thread.run_sync(lambda: chat_model.chat("Nothing special.", [{"role":"user", "content": "Are you strong enough!?"}], {})) | |||
| if res.find("**ERROR**") >= 0: | |||
| raise Exception(res) | |||
| async def run_graphrag( | |||
| row: dict, | |||
| @@ -57,9 +50,7 @@ async def run_graphrag( | |||
| callback, | |||
| ): | |||
| # Pressure test for GraphRAG task | |||
| async with trio.open_nursery() as nursery: | |||
| for _ in range(12): | |||
| nursery.start_soon(_is_strong_enough, chat_model, embedding_model) | |||
| await is_strong_enough(chat_model, embedding_model) | |||
| start = trio.current_time() | |||
| tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"] | |||
| @@ -21,7 +21,7 @@ import sys | |||
| import threading | |||
| import time | |||
| from api.utils.api_utils import timeout | |||
| from api.utils.api_utils import timeout, is_strong_enough | |||
| from api.utils.log_utils import init_root_logger, get_project_base_directory | |||
| from graphrag.general.index import run_graphrag | |||
| from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache | |||
| @@ -466,6 +466,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None): | |||
| @timeout(3600) | |||
| async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None): | |||
| # Pressure test for GraphRAG task | |||
| await is_strong_enough(chat_mdl, embd_mdl) | |||
| chunks = [] | |||
| vctr_nm = "q_%d_vec"%vector_size | |||
| for d in settings.retrievaler.chunk_list(row["doc_id"], row["tenant_id"], [str(row["kb_id"])], | |||