|
|
|
|
|
|
|
|
except Exception: |
|
|
except Exception: |
|
|
logging.exception(f"set_progress({task_id}), progress: {prog}, progress_msg: {msg}, got exception") |
|
|
logging.exception(f"set_progress({task_id}), progress: {prog}, progress_msg: {msg}, got exception") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def collect(): |
|
|
async def collect(): |
|
|
global CONSUMER_NAME, DONE_TASKS, FAILED_TASKS |
|
|
global CONSUMER_NAME, DONE_TASKS, FAILED_TASKS |
|
|
global UNACKED_ITERATOR |
|
|
global UNACKED_ITERATOR |
|
|
|
|
|
|
|
|
return await trio.to_thread.run_sync(lambda: STORAGE_IMPL.get(bucket, name)) |
|
|
return await trio.to_thread.run_sync(lambda: STORAGE_IMPL.get(bucket, name)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@timeout(60*40, 1) |
|
|
async def build_chunks(task, progress_callback): |
|
|
async def build_chunks(task, progress_callback): |
|
|
if task["size"] > DOC_MAXIMUM_SIZE: |
|
|
if task["size"] > DOC_MAXIMUM_SIZE: |
|
|
set_progress(task["id"], prog=-1, msg="File size exceeds( <= %dMb )" % |
|
|
set_progress(task["id"], prog=-1, msg="File size exceeds( <= %dMb )" % |
|
|
|
|
|
|
|
|
try: |
|
|
try: |
|
|
# bind embedding model |
|
|
# bind embedding model |
|
|
embedding_model = LLMBundle(task_tenant_id, LLMType.EMBEDDING, llm_name=task_embedding_id, lang=task_language) |
|
|
embedding_model = LLMBundle(task_tenant_id, LLMType.EMBEDDING, llm_name=task_embedding_id, lang=task_language) |
|
|
|
|
|
await is_strong_enough(None, embedding_model) |
|
|
vts, _ = embedding_model.encode(["ok"]) |
|
|
vts, _ = embedding_model.encode(["ok"]) |
|
|
vector_size = len(vts[0]) |
|
|
vector_size = len(vts[0]) |
|
|
except Exception as e: |
|
|
except Exception as e: |
|
|
|
|
|
|
|
|
if task.get("task_type", "") == "raptor": |
|
|
if task.get("task_type", "") == "raptor": |
|
|
# bind LLM for raptor |
|
|
# bind LLM for raptor |
|
|
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language) |
|
|
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language) |
|
|
|
|
|
await is_strong_enough(chat_model, None) |
|
|
# run RAPTOR |
|
|
# run RAPTOR |
|
|
async with kg_limiter: |
|
|
async with kg_limiter: |
|
|
chunks, token_count = await run_raptor(task, chat_model, embedding_model, vector_size, progress_callback) |
|
|
chunks, token_count = await run_raptor(task, chat_model, embedding_model, vector_size, progress_callback) |
|
|
|
|
|
|
|
|
graphrag_conf = task["kb_parser_config"].get("graphrag", {}) |
|
|
graphrag_conf = task["kb_parser_config"].get("graphrag", {}) |
|
|
start_ts = timer() |
|
|
start_ts = timer() |
|
|
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language) |
|
|
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language) |
|
|
|
|
|
await is_strong_enough(chat_model, None) |
|
|
with_resolution = graphrag_conf.get("resolution", False) |
|
|
with_resolution = graphrag_conf.get("resolution", False) |
|
|
with_community = graphrag_conf.get("community", False) |
|
|
with_community = graphrag_conf.get("community", False) |
|
|
async with kg_limiter: |
|
|
async with kg_limiter: |