浏览代码

Fix 'SCORE' not found bug (#4178)

### What problem does this PR solve?

As title

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Signed-off-by: jinhai <haijin.chn@gmail.com>
tags/v0.15.1
Jin Hai 10 个月前
父节点
当前提交
8f070c3d56
没有帐户链接到提交者的电子邮件
共有 3 个文件被更改,包括 12 次插入12 次删除
  1. 1
    1
      api/db/services/dialog_service.py
  2. 10
    10
      api/db/services/task_service.py
  3. 1
    1
      rag/utils/infinity_conn.py

+ 1
- 1
api/db/services/dialog_service.py 查看文件

retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000 retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000
generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000 generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000


prompt = f"{prompt} ### Elapsed\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms"
prompt = f"{prompt}\n\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms"
return {"answer": answer, "reference": refs, "prompt": prompt} return {"answer": answer, "reference": refs, "prompt": prompt}


if stream: if stream:

+ 10
- 10
api/db/services/task_service.py 查看文件

def new_task(): def new_task():
return {"id": get_uuid(), "doc_id": doc["id"], "progress": 0.0, "from_page": 0, "to_page": 100000000} return {"id": get_uuid(), "doc_id": doc["id"], "progress": 0.0, "from_page": 0, "to_page": 100000000}


tsks = []
parse_task_array = []


if doc["type"] == FileType.PDF.value: if doc["type"] == FileType.PDF.value:
file_bin = STORAGE_IMPL.get(bucket, name) file_bin = STORAGE_IMPL.get(bucket, name)
task = new_task() task = new_task()
task["from_page"] = p task["from_page"] = p
task["to_page"] = min(p + page_size, e) task["to_page"] = min(p + page_size, e)
tsks.append(task)
parse_task_array.append(task)


elif doc["parser_id"] == "table": elif doc["parser_id"] == "table":
file_bin = STORAGE_IMPL.get(bucket, name) file_bin = STORAGE_IMPL.get(bucket, name)
task = new_task() task = new_task()
task["from_page"] = i task["from_page"] = i
task["to_page"] = min(i + 3000, rn) task["to_page"] = min(i + 3000, rn)
tsks.append(task)
parse_task_array.append(task)
else: else:
tsks.append(new_task())
parse_task_array.append(new_task())


chunking_config = DocumentService.get_chunking_config(doc["id"]) chunking_config = DocumentService.get_chunking_config(doc["id"])
for task in tsks:
for task in parse_task_array:
hasher = xxhash.xxh64() hasher = xxhash.xxh64()
for field in sorted(chunking_config.keys()): for field in sorted(chunking_config.keys()):
hasher.update(str(chunking_config[field]).encode("utf-8")) hasher.update(str(chunking_config[field]).encode("utf-8"))
prev_tasks = TaskService.get_tasks(doc["id"]) prev_tasks = TaskService.get_tasks(doc["id"])
ck_num = 0 ck_num = 0
if prev_tasks: if prev_tasks:
for task in tsks:
for task in parse_task_array:
ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config) ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config)
TaskService.filter_delete([Task.doc_id == doc["id"]]) TaskService.filter_delete([Task.doc_id == doc["id"]])
chunk_ids = [] chunk_ids = []
chunking_config["kb_id"]) chunking_config["kb_id"])
DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num}) DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})


bulk_insert_into_db(Task, tsks, True)
bulk_insert_into_db(Task, parse_task_array, True)
DocumentService.begin2parse(doc["id"]) DocumentService.begin2parse(doc["id"])


tsks = [task for task in tsks if task["progress"] < 1.0]
for t in tsks:
unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
for unfinished_task in unfinished_task_array:
assert REDIS_CONN.queue_product( assert REDIS_CONN.queue_product(
SVR_QUEUE_NAME, message=t
SVR_QUEUE_NAME, message=unfinished_task
), "Can't access Redis. Please check the Redis' status." ), "Can't access Redis. Please check the Redis' status."





+ 1
- 1
rag/utils/infinity_conn.py 查看文件

self.connPool.release_conn(inf_conn) self.connPool.release_conn(inf_conn)
res = concat_dataframes(df_list, selectFields) res = concat_dataframes(df_list, selectFields)
if matchExprs: if matchExprs:
res = res.sort(pl.col("SCORE") + pl.col("pagerank_fea"), descending=True, maintain_order=True)
res = res.sort(pl.col("score()") + pl.col("pagerank_fea"), descending=True, maintain_order=True)
res = res.limit(limit) res = res.limit(limit)
logger.debug(f"INFINITY search final result: {str(res)}") logger.debug(f"INFINITY search final result: {str(res)}")
return res, total_hits_count return res, total_hits_count

正在加载...
取消
保存