Bläddra i källkod

chore: improve opendal storage and ensure closing file after reading files in `load_stream` method (#25874)

tags/1.9.0
Bowen Liang 1 månad sedan
förälder
incheckning
b2e4107c17
Inget konto är kopplat till bidragsgivarens mejladress

+ 14
- 12
api/extensions/storage/opendal_storage.py Visa fil

@@ -3,8 +3,9 @@ import os
from collections.abc import Generator
from pathlib import Path

import opendal # type: ignore[import]
from dotenv import dotenv_values
from opendal import Operator
from opendal.layers import RetryLayer

from extensions.storage.base_storage import BaseStorage

@@ -34,10 +35,9 @@ class OpenDALStorage(BaseStorage):
root = kwargs.get("root", "storage")
Path(root).mkdir(parents=True, exist_ok=True)

self.op = opendal.Operator(scheme=scheme, **kwargs) # type: ignore
retry_layer = RetryLayer(max_times=3, factor=2.0, jitter=True)
self.op = Operator(scheme=scheme, **kwargs).layer(retry_layer)
logger.debug("opendal operator created with scheme %s", scheme)
retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True)
self.op = self.op.layer(retry_layer)
logger.debug("added retry layer to opendal operator")

def save(self, filename: str, data: bytes):
@@ -57,22 +57,24 @@ class OpenDALStorage(BaseStorage):
raise FileNotFoundError("File not found")

batch_size = 4096
file = self.op.open(path=filename, mode="rb")
while chunk := file.read(batch_size):
yield chunk
with self.op.open(
path=filename,
mode="rb",
chunck=batch_size,
) as file:
while chunk := file.read(batch_size):
yield chunk
logger.debug("file %s loaded as stream", filename)

def download(self, filename: str, target_filepath: str):
if not self.exists(filename):
raise FileNotFoundError("File not found")

with Path(target_filepath).open("wb") as f:
f.write(self.op.read(path=filename))
Path(target_filepath).write_bytes(self.op.read(path=filename))
logger.debug("file %s downloaded to %s", filename, target_filepath)

def exists(self, filename: str) -> bool:
res: bool = self.op.exists(path=filename)
return res
return self.op.exists(path=filename)

def delete(self, filename: str):
if self.exists(filename):
@@ -85,7 +87,7 @@ class OpenDALStorage(BaseStorage):
if not self.exists(path):
raise FileNotFoundError("Path not found")

all_files = self.op.scan(path=path)
all_files = self.op.list(path=path)
if files and directories:
logger.debug("files and directories on %s scanned", path)
return [f.path for f in all_files]

+ 1
- 1
api/pyproject.toml Visa fil

@@ -183,7 +183,7 @@ storage = [
"cos-python-sdk-v5==1.9.30",
"esdk-obs-python==3.24.6.1",
"google-cloud-storage==2.16.0",
"opendal~=0.45.16",
"opendal~=0.46.0",
"oss2==2.18.5",
"supabase~=2.18.1",
"tos~=2.7.1",

+ 5
- 2
api/tests/unit_tests/oss/__mock/base.py Visa fil

@@ -21,8 +21,11 @@ def get_example_filename() -> str:
return "test.txt"


def get_example_data() -> bytes:
return b"test"
def get_example_data(length: int = 4) -> bytes:
chars = "test"
result = "".join(chars[i % len(chars)] for i in range(length)).encode()
assert len(result) == length
return result


def get_example_filepath() -> str:

+ 9
- 2
api/tests/unit_tests/oss/opendal/test_opendal.py Visa fil

@@ -57,12 +57,19 @@ class TestOpenDAL:
def test_load_stream(self):
"""Test loading data as a stream."""
filename = get_example_filename()
data = get_example_data()
chunks = 5
chunk_size = 4096
data = get_example_data(length=chunk_size * chunks)

self.storage.save(filename, data)
generator = self.storage.load_stream(filename)
assert isinstance(generator, Generator)
assert next(generator) == data
for i in range(chunks):
fetched = next(generator)
assert len(fetched) == chunk_size
assert fetched == data[i * chunk_size : (i + 1) * chunk_size]
with pytest.raises(StopIteration):
next(generator)

def test_download(self):
"""Test downloading data to a file."""

+ 13
- 13
api/uv.lock Visa fil

@@ -1647,7 +1647,7 @@ storage = [
{ name = "cos-python-sdk-v5", specifier = "==1.9.30" },
{ name = "esdk-obs-python", specifier = "==3.24.6.1" },
{ name = "google-cloud-storage", specifier = "==2.16.0" },
{ name = "opendal", specifier = "~=0.45.16" },
{ name = "opendal", specifier = "~=0.46.0" },
{ name = "oss2", specifier = "==2.18.5" },
{ name = "supabase", specifier = "~=2.18.1" },
{ name = "tos", specifier = "~=2.7.1" },
@@ -3825,18 +3825,18 @@ wheels = [

[[package]]
name = "opendal"
version = "0.45.20"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2f/3f/927dfe1349ae58b9238b8eafba747af648d660a9425f486dda01a10f0b78/opendal-0.45.20.tar.gz", hash = "sha256:9f6f90d9e9f9d6e9e5a34aa7729169ef34d2f1869ad1e01ddc39b1c0ce0c9405", size = 990267, upload-time = "2025-05-26T07:02:11.819Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/84/77/6427e16b8630f0cc71f4a1b01648ed3264f1e04f1f6d9b5d09e5c6a4dd2f/opendal-0.45.20-cp311-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:35acdd8001e4a741532834fdbff3020ffb10b40028bb49fbe93c4f8197d66d8c", size = 26910966, upload-time = "2025-05-26T07:01:24.987Z" },
{ url = "https://files.pythonhosted.org/packages/12/1f/83e415334739f1ab4dba55cdd349abf0b66612249055afb422a354b96ac8/opendal-0.45.20-cp311-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:629bfe8d384364bced6cbeb01f49b99779fa5151c68048a1869ff645ddcfcb25", size = 13002770, upload-time = "2025-05-26T07:01:30.385Z" },
{ url = "https://files.pythonhosted.org/packages/49/94/c5de6ed54a02d7413636c2ccefa71d8dd09c2ada1cd6ecab202feb1fdeda/opendal-0.45.20-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12cc5ac7e441fb93d86d1673112d9fb08580fc3226f864434f4a56a72efec53", size = 14387218, upload-time = "2025-05-26T07:01:33.017Z" },
{ url = "https://files.pythonhosted.org/packages/c6/83/713a1e1de8cbbd69af50e26644bbdeef3c1068b89f442417376fa3c0f591/opendal-0.45.20-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:45a3adae1f473052234fc4054a6f210df3ded9aff10db8d545d0a37eff3b13cc", size = 13424302, upload-time = "2025-05-26T07:01:36.417Z" },
{ url = "https://files.pythonhosted.org/packages/c7/78/c9651e753aaf6eb61887ca372a3f9c2ae57dae03c3159d24deaf018c26dc/opendal-0.45.20-cp311-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8947857052c85a4b0e251d50e23f5f68f0cdd9e509e32e614a5e4b2fc7424c4", size = 13622483, upload-time = "2025-05-26T07:01:38.886Z" },
{ url = "https://files.pythonhosted.org/packages/3c/9d/5d8c20c0fc93df5e349e5694167de30afdc54c5755704cc64764a6cbb309/opendal-0.45.20-cp311-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:891d2f9114efeef648973049ed15e56477e8feb9e48b540bd8d6105ea22a253c", size = 13320229, upload-time = "2025-05-26T07:01:41.965Z" },
{ url = "https://files.pythonhosted.org/packages/21/39/05262f748a2085522e0c85f03eab945589313dc9caedc002872c39162776/opendal-0.45.20-cp311-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:539de9b825f6783d6289d88c0c9ac5415daa4d892d761e3540c565bda51e8997", size = 14574280, upload-time = "2025-05-26T07:01:44.413Z" },
{ url = "https://files.pythonhosted.org/packages/74/83/cc7c6de29b0a7585cd445258d174ca204d37729c3874ad08e515b0bf331c/opendal-0.45.20-cp311-abi3-win_amd64.whl", hash = "sha256:145efd56aa33b493d5b652c3e4f5ae5097ab69d38c132d80f108e9f5c1e4d863", size = 14929888, upload-time = "2025-05-26T07:01:46.929Z" },
version = "0.46.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/33/db/9c37efe16afe6371d66a0be94fa701c281108820198f18443dc997fbf3d8/opendal-0.46.0.tar.gz", hash = "sha256:334aa4c5b3cc0776598ef8d3c154f074f6a9d87981b951d70db1407efed3b06c", size = 989391, upload-time = "2025-07-17T06:58:52.913Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6c/05/a8d9c6a935a181d38b55c2cb7121394a6bdd819909ff453a17e78f45672a/opendal-0.46.0-cp311-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8cd4db71694c93e99055349714c7f7c7177e4767428e9e4bc592e4055edb6dba", size = 26502380, upload-time = "2025-07-17T06:58:16.173Z" },
{ url = "https://files.pythonhosted.org/packages/57/8d/cf684b246fa38ab946f3d11671230d07b5b14d2aeb152b68bd51f4b2210b/opendal-0.46.0-cp311-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3019f923a7e1c5db86a36cee95d0c899ca7379e355bda9eb37e16d076c1f42f3", size = 12684482, upload-time = "2025-07-17T06:58:18.462Z" },
{ url = "https://files.pythonhosted.org/packages/ad/71/36a97a8258cd0f0dd902561d0329a339f5a39a9896f0380763f526e9af89/opendal-0.46.0-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e202ded0be5410546193f563258e9a78a57337f5c2bb553b8802a420c2ef683", size = 14114685, upload-time = "2025-07-17T06:58:20.728Z" },
{ url = "https://files.pythonhosted.org/packages/b7/fa/9a30c17428a12246c6ae17b406e7214a9a3caecec37af6860d27e99f9b66/opendal-0.46.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7db426ba8171d665953836653a596ef1bad3732a1c4dd2e3fa68bc20beee7afc", size = 13191783, upload-time = "2025-07-17T06:58:23.181Z" },
{ url = "https://files.pythonhosted.org/packages/f8/32/4f7351ee242b63c817896afb373e5d5f28e1d9ca4e51b69a7b2e934694cf/opendal-0.46.0-cp311-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:898444dc072201044ed8c1dcce0929ebda8b10b92ba9c95248cf7fcbbc9dc1d7", size = 13358943, upload-time = "2025-07-17T06:58:25.281Z" },
{ url = "https://files.pythonhosted.org/packages/77/e5/f650cf79ffbf7c7c8d7466fe9b4fa04cda97d950f915b8b3e2ced29f0f3e/opendal-0.46.0-cp311-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:998e7a80a3468fd3f8604873aec6777fd25d3101fdbb1b63a4dc5fef14797086", size = 13015627, upload-time = "2025-07-17T06:58:27.28Z" },
{ url = "https://files.pythonhosted.org/packages/c4/d1/77b731016edd494514447322d6b02a2a49c41ad6deeaa824dd2958479574/opendal-0.46.0-cp311-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:093098658482e7b87d16bf2931b5ef0ee22ed6a695f945874c696da72a6d057a", size = 14314675, upload-time = "2025-07-17T06:58:29.622Z" },
{ url = "https://files.pythonhosted.org/packages/1e/93/328f7c72ccf04b915ab88802342d8f79322b7fba5509513b509681651224/opendal-0.46.0-cp311-abi3-win_amd64.whl", hash = "sha256:f5e58abc86db005879340a9187372a8c105c456c762943139a48dde63aad790d", size = 14904045, upload-time = "2025-07-17T06:58:31.692Z" },
]

[[package]]

Laddar…
Avbryt
Spara