|
|
|
@@ -277,28 +277,27 @@ async def build_chunks(task, progress_callback): |
|
|
|
|
|
|
|
async def upload_to_minio(document, chunk): |
|
|
|
try: |
|
|
|
d = copy.deepcopy(document) |
|
|
|
d.update(chunk) |
|
|
|
d["id"] = xxhash.xxh64((chunk["content_with_weight"] + str(d["doc_id"])).encode("utf-8")).hexdigest() |
|
|
|
d["create_time"] = str(datetime.now()).replace("T", " ")[:19] |
|
|
|
d["create_timestamp_flt"] = datetime.now().timestamp() |
|
|
|
if not d.get("image"): |
|
|
|
_ = d.pop("image", None) |
|
|
|
d["img_id"] = "" |
|
|
|
docs.append(d) |
|
|
|
return |
|
|
|
|
|
|
|
output_buffer = BytesIO() |
|
|
|
if isinstance(d["image"], bytes): |
|
|
|
output_buffer = BytesIO(d["image"]) |
|
|
|
else: |
|
|
|
d["image"].save(output_buffer, format='JPEG') |
|
|
|
async with minio_limiter: |
|
|
|
d = copy.deepcopy(document) |
|
|
|
d.update(chunk) |
|
|
|
d["id"] = xxhash.xxh64((chunk["content_with_weight"] + str(d["doc_id"])).encode("utf-8")).hexdigest() |
|
|
|
d["create_time"] = str(datetime.now()).replace("T", " ")[:19] |
|
|
|
d["create_timestamp_flt"] = datetime.now().timestamp() |
|
|
|
if not d.get("image"): |
|
|
|
_ = d.pop("image", None) |
|
|
|
d["img_id"] = "" |
|
|
|
docs.append(d) |
|
|
|
return |
|
|
|
|
|
|
|
output_buffer = BytesIO() |
|
|
|
if isinstance(d["image"], bytes): |
|
|
|
output_buffer = BytesIO(d["image"]) |
|
|
|
else: |
|
|
|
d["image"].save(output_buffer, format='JPEG') |
|
|
|
await trio.to_thread.run_sync(lambda: STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue())) |
|
|
|
|
|
|
|
d["img_id"] = "{}-{}".format(task["kb_id"], d["id"]) |
|
|
|
del d["image"] |
|
|
|
docs.append(d) |
|
|
|
d["img_id"] = "{}-{}".format(task["kb_id"], d["id"]) |
|
|
|
del d["image"] |
|
|
|
docs.append(d) |
|
|
|
except Exception: |
|
|
|
logging.exception( |
|
|
|
"Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["id"])) |