浏览代码

Perf: reduce upload to minio limiter scope (#7878)

### What problem does this PR solve?
reduce upload_to_minio limter scope

### Type of change
- [x] Performance Improvement
tags/v0.19.1
Stephen Hu 5 个月前
父节点
当前提交
273f36cc54
没有帐户链接到提交者的电子邮件
共有 1 个文件被更改,包括 19 次插入20 次删除
  1. 19
    20
      rag/svr/task_executor.py

+ 19
- 20
rag/svr/task_executor.py 查看文件

@@ -277,28 +277,27 @@ async def build_chunks(task, progress_callback):

async def upload_to_minio(document, chunk):
try:
d = copy.deepcopy(document)
d.update(chunk)
d["id"] = xxhash.xxh64((chunk["content_with_weight"] + str(d["doc_id"])).encode("utf-8")).hexdigest()
d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.now().timestamp()
if not d.get("image"):
_ = d.pop("image", None)
d["img_id"] = ""
docs.append(d)
return

output_buffer = BytesIO()
if isinstance(d["image"], bytes):
output_buffer = BytesIO(d["image"])
else:
d["image"].save(output_buffer, format='JPEG')
async with minio_limiter:
d = copy.deepcopy(document)
d.update(chunk)
d["id"] = xxhash.xxh64((chunk["content_with_weight"] + str(d["doc_id"])).encode("utf-8")).hexdigest()
d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.now().timestamp()
if not d.get("image"):
_ = d.pop("image", None)
d["img_id"] = ""
docs.append(d)
return

output_buffer = BytesIO()
if isinstance(d["image"], bytes):
output_buffer = BytesIO(d["image"])
else:
d["image"].save(output_buffer, format='JPEG')
await trio.to_thread.run_sync(lambda: STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue()))

d["img_id"] = "{}-{}".format(task["kb_id"], d["id"])
del d["image"]
docs.append(d)
d["img_id"] = "{}-{}".format(task["kb_id"], d["id"])
del d["image"]
docs.append(d)
except Exception:
logging.exception(
"Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["id"]))

正在加载...
取消
保存