Signed-off-by: -LAN- <laipz8200@outlook.com>tags/0.14.0
| @@ -56,20 +56,36 @@ DB_DATABASE=dify | |||
| # Storage configuration | |||
| # use for store upload files, private keys... | |||
| # storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase | |||
| STORAGE_TYPE=local | |||
| STORAGE_LOCAL_PATH=storage | |||
| # storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase | |||
| STORAGE_TYPE=opendal | |||
| # Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal | |||
| STORAGE_OPENDAL_SCHEME=fs | |||
| # OpenDAL FS | |||
| OPENDAL_FS_ROOT=storage | |||
| # OpenDAL S3 | |||
| OPENDAL_S3_ROOT=/ | |||
| OPENDAL_S3_BUCKET=your-bucket-name | |||
| OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com | |||
| OPENDAL_S3_ACCESS_KEY_ID=your-access-key | |||
| OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key | |||
| OPENDAL_S3_REGION=your-region | |||
| OPENDAL_S3_SERVER_SIDE_ENCRYPTION= | |||
| # S3 Storage configuration | |||
| S3_USE_AWS_MANAGED_IAM=false | |||
| S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com | |||
| S3_BUCKET_NAME=your-bucket-name | |||
| S3_ACCESS_KEY=your-access-key | |||
| S3_SECRET_KEY=your-secret-key | |||
| S3_REGION=your-region | |||
| # Azure Blob Storage configuration | |||
| AZURE_BLOB_ACCOUNT_NAME=your-account-name | |||
| AZURE_BLOB_ACCOUNT_KEY=your-account-key | |||
| AZURE_BLOB_CONTAINER_NAME=yout-container-name | |||
| AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net | |||
| # Aliyun oss Storage configuration | |||
| ALIYUN_OSS_BUCKET_NAME=your-bucket-name | |||
| ALIYUN_OSS_ACCESS_KEY=your-access-key | |||
| @@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1 | |||
| ALIYUN_OSS_REGION=your-region | |||
| # Don't start with '/'. OSS doesn't support leading slash in object names. | |||
| ALIYUN_OSS_PATH=your-path | |||
| # Google Storage configuration | |||
| GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name | |||
| GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string | |||
| @@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url | |||
| WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* | |||
| CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* | |||
| # Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase | |||
| # Vector database configuration | |||
| # support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase | |||
| VECTOR_STORE=weaviate | |||
| # Weaviate configuration | |||
| @@ -1,54 +1,69 @@ | |||
| from typing import Any, Optional | |||
| from typing import Any, Literal, Optional | |||
| from urllib.parse import quote_plus | |||
| from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field | |||
| from pydantic_settings import BaseSettings | |||
| from configs.middleware.cache.redis_config import RedisConfig | |||
| from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig | |||
| from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig | |||
| from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig | |||
| from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig | |||
| from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig | |||
| from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig | |||
| from configs.middleware.storage.oci_storage_config import OCIStorageConfig | |||
| from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig | |||
| from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig | |||
| from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig | |||
| from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig | |||
| from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig | |||
| from configs.middleware.vdb.chroma_config import ChromaConfig | |||
| from configs.middleware.vdb.couchbase_config import CouchbaseConfig | |||
| from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig | |||
| from configs.middleware.vdb.lindorm_config import LindormConfig | |||
| from configs.middleware.vdb.milvus_config import MilvusConfig | |||
| from configs.middleware.vdb.myscale_config import MyScaleConfig | |||
| from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig | |||
| from configs.middleware.vdb.opensearch_config import OpenSearchConfig | |||
| from configs.middleware.vdb.oracle_config import OracleConfig | |||
| from configs.middleware.vdb.pgvector_config import PGVectorConfig | |||
| from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig | |||
| from configs.middleware.vdb.qdrant_config import QdrantConfig | |||
| from configs.middleware.vdb.relyt_config import RelytConfig | |||
| from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig | |||
| from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig | |||
| from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig | |||
| from configs.middleware.vdb.upstash_config import UpstashConfig | |||
| from configs.middleware.vdb.vikingdb_config import VikingDBConfig | |||
| from configs.middleware.vdb.weaviate_config import WeaviateConfig | |||
| from .cache.redis_config import RedisConfig | |||
| from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig | |||
| from .storage.amazon_s3_storage_config import S3StorageConfig | |||
| from .storage.azure_blob_storage_config import AzureBlobStorageConfig | |||
| from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig | |||
| from .storage.google_cloud_storage_config import GoogleCloudStorageConfig | |||
| from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig | |||
| from .storage.oci_storage_config import OCIStorageConfig | |||
| from .storage.opendal_storage_config import OpenDALStorageConfig | |||
| from .storage.supabase_storage_config import SupabaseStorageConfig | |||
| from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig | |||
| from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig | |||
| from .vdb.analyticdb_config import AnalyticdbConfig | |||
| from .vdb.baidu_vector_config import BaiduVectorDBConfig | |||
| from .vdb.chroma_config import ChromaConfig | |||
| from .vdb.couchbase_config import CouchbaseConfig | |||
| from .vdb.elasticsearch_config import ElasticsearchConfig | |||
| from .vdb.lindorm_config import LindormConfig | |||
| from .vdb.milvus_config import MilvusConfig | |||
| from .vdb.myscale_config import MyScaleConfig | |||
| from .vdb.oceanbase_config import OceanBaseVectorConfig | |||
| from .vdb.opensearch_config import OpenSearchConfig | |||
| from .vdb.oracle_config import OracleConfig | |||
| from .vdb.pgvector_config import PGVectorConfig | |||
| from .vdb.pgvectors_config import PGVectoRSConfig | |||
| from .vdb.qdrant_config import QdrantConfig | |||
| from .vdb.relyt_config import RelytConfig | |||
| from .vdb.tencent_vector_config import TencentVectorDBConfig | |||
| from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig | |||
| from .vdb.tidb_vector_config import TiDBVectorConfig | |||
| from .vdb.upstash_config import UpstashConfig | |||
| from .vdb.vikingdb_config import VikingDBConfig | |||
| from .vdb.weaviate_config import WeaviateConfig | |||
| class StorageConfig(BaseSettings): | |||
| STORAGE_TYPE: str = Field( | |||
| STORAGE_TYPE: Literal[ | |||
| "opendal", | |||
| "s3", | |||
| "aliyun-oss", | |||
| "azure-blob", | |||
| "baidu-obs", | |||
| "google-storage", | |||
| "huawei-obs", | |||
| "oci-storage", | |||
| "tencent-cos", | |||
| "volcengine-tos", | |||
| "supabase", | |||
| "local", | |||
| ] = Field( | |||
| description="Type of storage to use." | |||
| " Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', " | |||
| "'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.", | |||
| default="local", | |||
| " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', " | |||
| "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.", | |||
| default="opendal", | |||
| ) | |||
| STORAGE_LOCAL_PATH: str = Field( | |||
| description="Path for local storage when STORAGE_TYPE is set to 'local'.", | |||
| default="storage", | |||
| deprecated=True, | |||
| ) | |||
| @@ -235,6 +250,7 @@ class MiddlewareConfig( | |||
| GoogleCloudStorageConfig, | |||
| HuaweiCloudOBSStorageConfig, | |||
| OCIStorageConfig, | |||
| OpenDALStorageConfig, | |||
| S3StorageConfig, | |||
| SupabaseStorageConfig, | |||
| TencentCloudCOSStorageConfig, | |||
| @@ -0,0 +1,51 @@ | |||
| from enum import StrEnum | |||
| from typing import Literal | |||
| from pydantic import Field | |||
| from pydantic_settings import BaseSettings | |||
| class OpenDALScheme(StrEnum): | |||
| FS = "fs" | |||
| S3 = "s3" | |||
| class OpenDALStorageConfig(BaseSettings): | |||
| STORAGE_OPENDAL_SCHEME: str = Field( | |||
| default=OpenDALScheme.FS.value, | |||
| description="OpenDAL scheme.", | |||
| ) | |||
| # FS | |||
| OPENDAL_FS_ROOT: str = Field( | |||
| default="storage", | |||
| description="Root path for local storage.", | |||
| ) | |||
| # S3 | |||
| OPENDAL_S3_ROOT: str = Field( | |||
| default="/", | |||
| description="Root path for S3 storage.", | |||
| ) | |||
| OPENDAL_S3_BUCKET: str = Field( | |||
| default="", | |||
| description="S3 bucket name.", | |||
| ) | |||
| OPENDAL_S3_ENDPOINT: str = Field( | |||
| default="https://s3.amazonaws.com", | |||
| description="S3 endpoint URL.", | |||
| ) | |||
| OPENDAL_S3_ACCESS_KEY_ID: str = Field( | |||
| default="", | |||
| description="S3 access key ID.", | |||
| ) | |||
| OPENDAL_S3_SECRET_ACCESS_KEY: str = Field( | |||
| default="", | |||
| description="S3 secret access key.", | |||
| ) | |||
| OPENDAL_S3_REGION: str = Field( | |||
| default="", | |||
| description="S3 region.", | |||
| ) | |||
| OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field( | |||
| default="", | |||
| description="S3 server-side encryption.", | |||
| ) | |||
| @@ -34,7 +34,6 @@ else | |||
| --workers ${SERVER_WORKER_AMOUNT:-1} \ | |||
| --worker-class ${SERVER_WORKER_CLASS:-gevent} \ | |||
| --timeout ${GUNICORN_TIMEOUT:-200} \ | |||
| --preload \ | |||
| app:app | |||
| fi | |||
| fi | |||
| @@ -1,31 +1,43 @@ | |||
| import logging | |||
| from collections.abc import Generator | |||
| from collections.abc import Callable, Generator, Mapping | |||
| from typing import Union | |||
| from flask import Flask | |||
| from configs import dify_config | |||
| from configs.middleware.storage.opendal_storage_config import OpenDALScheme | |||
| from dify_app import DifyApp | |||
| from extensions.storage.base_storage import BaseStorage | |||
| from extensions.storage.storage_type import StorageType | |||
| logger = logging.getLogger(__name__) | |||
| class Storage: | |||
| def __init__(self): | |||
| self.storage_runner = None | |||
| class Storage: | |||
| def init_app(self, app: Flask): | |||
| storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE) | |||
| with app.app_context(): | |||
| self.storage_runner = storage_factory() | |||
| @staticmethod | |||
| def get_storage_factory(storage_type: str) -> type[BaseStorage]: | |||
| def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]: | |||
| match storage_type: | |||
| case StorageType.S3: | |||
| from extensions.storage.aws_s3_storage import AwsS3Storage | |||
| from extensions.storage.opendal_storage import OpenDALStorage | |||
| kwargs = _load_s3_storage_kwargs() | |||
| return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs) | |||
| case StorageType.OPENDAL: | |||
| from extensions.storage.opendal_storage import OpenDALStorage | |||
| scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME) | |||
| kwargs = _load_opendal_storage_kwargs(scheme) | |||
| return lambda: OpenDALStorage(scheme=scheme, **kwargs) | |||
| case StorageType.LOCAL: | |||
| from extensions.storage.opendal_storage import OpenDALStorage | |||
| return AwsS3Storage | |||
| kwargs = _load_local_storage_kwargs() | |||
| return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs) | |||
| case StorageType.AZURE_BLOB: | |||
| from extensions.storage.azure_blob_storage import AzureBlobStorage | |||
| @@ -62,16 +74,14 @@ class Storage: | |||
| from extensions.storage.supabase_storage import SupabaseStorage | |||
| return SupabaseStorage | |||
| case StorageType.LOCAL | _: | |||
| from extensions.storage.local_fs_storage import LocalFsStorage | |||
| return LocalFsStorage | |||
| case _: | |||
| raise ValueError(f"Unsupported storage type {storage_type}") | |||
| def save(self, filename, data): | |||
| try: | |||
| self.storage_runner.save(filename, data) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to save file {filename}") | |||
| logger.exception(f"Failed to save file {filename}") | |||
| raise e | |||
| def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]: | |||
| @@ -81,45 +91,120 @@ class Storage: | |||
| else: | |||
| return self.load_once(filename) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to load file {filename}") | |||
| logger.exception(f"Failed to load file {filename}") | |||
| raise e | |||
| def load_once(self, filename: str) -> bytes: | |||
| try: | |||
| return self.storage_runner.load_once(filename) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to load_once file {filename}") | |||
| logger.exception(f"Failed to load_once file {filename}") | |||
| raise e | |||
| def load_stream(self, filename: str) -> Generator: | |||
| try: | |||
| return self.storage_runner.load_stream(filename) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to load_stream file {filename}") | |||
| logger.exception(f"Failed to load_stream file {filename}") | |||
| raise e | |||
| def download(self, filename, target_filepath): | |||
| try: | |||
| self.storage_runner.download(filename, target_filepath) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to download file {filename}") | |||
| logger.exception(f"Failed to download file {filename}") | |||
| raise e | |||
| def exists(self, filename): | |||
| try: | |||
| return self.storage_runner.exists(filename) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to check file exists {filename}") | |||
| logger.exception(f"Failed to check file exists {filename}") | |||
| raise e | |||
| def delete(self, filename): | |||
| try: | |||
| return self.storage_runner.delete(filename) | |||
| except Exception as e: | |||
| logging.exception(f"Failed to delete file {filename}") | |||
| logger.exception(f"Failed to delete file {filename}") | |||
| raise e | |||
| def _load_s3_storage_kwargs() -> Mapping[str, str]: | |||
| """ | |||
| Load the kwargs for S3 storage based on dify_config. | |||
| Handles special cases like AWS managed IAM and R2. | |||
| """ | |||
| kwargs = { | |||
| "root": "/", | |||
| "bucket": dify_config.S3_BUCKET_NAME, | |||
| "endpoint": dify_config.S3_ENDPOINT, | |||
| "access_key_id": dify_config.S3_ACCESS_KEY, | |||
| "secret_access_key": dify_config.S3_SECRET_KEY, | |||
| "region": dify_config.S3_REGION, | |||
| } | |||
| kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)} | |||
| # For AWS managed IAM | |||
| if dify_config.S3_USE_AWS_MANAGED_IAM: | |||
| from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS | |||
| logger.debug("Using AWS managed IAM role for S3") | |||
| kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}} | |||
| # For Cloudflare R2 | |||
| if kwargs.get("endpoint"): | |||
| from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint | |||
| if is_r2_endpoint(kwargs["endpoint"]): | |||
| logger.debug("Using R2 for OpenDAL S3") | |||
| kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}} | |||
| return kwargs | |||
| def _load_local_storage_kwargs() -> Mapping[str, str]: | |||
| """ | |||
| Load the kwargs for local storage based on dify_config. | |||
| """ | |||
| return { | |||
| "root": dify_config.STORAGE_LOCAL_PATH, | |||
| } | |||
| def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]: | |||
| """ | |||
| Load the kwargs for OpenDAL storage based on the given scheme. | |||
| """ | |||
| match scheme: | |||
| case OpenDALScheme.FS: | |||
| kwargs = { | |||
| "root": dify_config.OPENDAL_FS_ROOT, | |||
| } | |||
| case OpenDALScheme.S3: | |||
| # Load OpenDAL S3-related configs | |||
| kwargs = { | |||
| "root": dify_config.OPENDAL_S3_ROOT, | |||
| "bucket": dify_config.OPENDAL_S3_BUCKET, | |||
| "endpoint": dify_config.OPENDAL_S3_ENDPOINT, | |||
| "access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID, | |||
| "secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY, | |||
| "region": dify_config.OPENDAL_S3_REGION, | |||
| } | |||
| # For Cloudflare R2 | |||
| if kwargs.get("endpoint"): | |||
| from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint | |||
| if is_r2_endpoint(kwargs["endpoint"]): | |||
| logger.debug("Using R2 for OpenDAL S3") | |||
| kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}} | |||
| case _: | |||
| logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.") | |||
| kwargs = {} | |||
| return kwargs | |||
| storage = Storage() | |||
| @@ -7,9 +7,6 @@ from collections.abc import Generator | |||
| class BaseStorage(ABC): | |||
| """Interface for file storage.""" | |||
| def __init__(self): # noqa: B027 | |||
| pass | |||
| @abstractmethod | |||
| def save(self, filename, data): | |||
| raise NotImplementedError | |||
| @@ -1,62 +0,0 @@ | |||
| import os | |||
| import shutil | |||
| from collections.abc import Generator | |||
| from pathlib import Path | |||
| from flask import current_app | |||
| from configs import dify_config | |||
| from extensions.storage.base_storage import BaseStorage | |||
| class LocalFsStorage(BaseStorage): | |||
| """Implementation for local filesystem storage.""" | |||
| def __init__(self): | |||
| super().__init__() | |||
| folder = dify_config.STORAGE_LOCAL_PATH | |||
| if not os.path.isabs(folder): | |||
| folder = os.path.join(current_app.root_path, folder) | |||
| self.folder = folder | |||
| def _build_filepath(self, filename: str) -> str: | |||
| """Build the full file path based on the folder and filename.""" | |||
| if not self.folder or self.folder.endswith("/"): | |||
| return self.folder + filename | |||
| else: | |||
| return self.folder + "/" + filename | |||
| def save(self, filename, data): | |||
| filepath = self._build_filepath(filename) | |||
| folder = os.path.dirname(filepath) | |||
| os.makedirs(folder, exist_ok=True) | |||
| Path(os.path.join(os.getcwd(), filepath)).write_bytes(data) | |||
| def load_once(self, filename: str) -> bytes: | |||
| filepath = self._build_filepath(filename) | |||
| if not os.path.exists(filepath): | |||
| raise FileNotFoundError("File not found") | |||
| return Path(filepath).read_bytes() | |||
| def load_stream(self, filename: str) -> Generator: | |||
| filepath = self._build_filepath(filename) | |||
| if not os.path.exists(filepath): | |||
| raise FileNotFoundError("File not found") | |||
| with open(filepath, "rb") as f: | |||
| while chunk := f.read(4096): # Read in chunks of 4KB | |||
| yield chunk | |||
| def download(self, filename, target_filepath): | |||
| filepath = self._build_filepath(filename) | |||
| if not os.path.exists(filepath): | |||
| raise FileNotFoundError("File not found") | |||
| shutil.copyfile(filepath, target_filepath) | |||
| def exists(self, filename): | |||
| filepath = self._build_filepath(filename) | |||
| return os.path.exists(filepath) | |||
| def delete(self, filename): | |||
| filepath = self._build_filepath(filename) | |||
| if os.path.exists(filepath): | |||
| os.remove(filepath) | |||
| @@ -0,0 +1,66 @@ | |||
| from collections.abc import Generator | |||
| from pathlib import Path | |||
| from urllib.parse import urlparse | |||
| import opendal | |||
| from configs.middleware.storage.opendal_storage_config import OpenDALScheme | |||
| from extensions.storage.base_storage import BaseStorage | |||
| S3_R2_HOSTNAME = "r2.cloudflarestorage.com" | |||
| S3_R2_COMPATIBLE_KWARGS = { | |||
| "delete_max_size": "700", | |||
| "disable_stat_with_override": "true", | |||
| "region": "auto", | |||
| } | |||
| S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = { | |||
| "server_side_encryption": "aws:kms", | |||
| } | |||
| def is_r2_endpoint(endpoint: str) -> bool: | |||
| if not endpoint: | |||
| return False | |||
| parsed_url = urlparse(endpoint) | |||
| return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME)) | |||
| class OpenDALStorage(BaseStorage): | |||
| def __init__(self, scheme: OpenDALScheme, **kwargs): | |||
| if scheme == OpenDALScheme.FS: | |||
| Path(kwargs["root"]).mkdir(parents=True, exist_ok=True) | |||
| self.op = opendal.Operator(scheme=scheme, **kwargs) | |||
| def save(self, filename: str, data: bytes) -> None: | |||
| self.op.write(path=filename, bs=data) | |||
| def load_once(self, filename: str) -> bytes: | |||
| if not self.exists(filename): | |||
| raise FileNotFoundError("File not found") | |||
| return self.op.read(path=filename) | |||
| def load_stream(self, filename: str) -> Generator: | |||
| if not self.exists(filename): | |||
| raise FileNotFoundError("File not found") | |||
| batch_size = 4096 | |||
| file = self.op.open(path=filename, mode="rb") | |||
| while chunk := file.read(batch_size): | |||
| yield chunk | |||
| def download(self, filename: str, target_filepath: str): | |||
| if not self.exists(filename): | |||
| raise FileNotFoundError("File not found") | |||
| with Path(target_filepath).open("wb") as f: | |||
| f.write(self.op.read(path=filename)) | |||
| def exists(self, filename: str): | |||
| return self.op.stat(path=filename).mode.is_file() | |||
| def delete(self, filename: str): | |||
| if self.exists(filename): | |||
| self.op.delete(path=filename) | |||
| @@ -9,6 +9,7 @@ class StorageType(StrEnum): | |||
| HUAWEI_OBS = "huawei-obs" | |||
| LOCAL = "local" | |||
| OCI_STORAGE = "oci-storage" | |||
| OPENDAL = "opendal" | |||
| S3 = "s3" | |||
| TENCENT_COS = "tencent-cos" | |||
| VOLCENGINE_TOS = "volcengine-tos" | |||
| @@ -134,6 +134,7 @@ bce-python-sdk = "~0.9.23" | |||
| cos-python-sdk-v5 = "1.9.30" | |||
| esdk-obs-python = "3.24.6.1" | |||
| google-cloud-storage = "2.16.0" | |||
| opendal = "~0.45.12" | |||
| oss2 = "2.18.5" | |||
| supabase = "~2.8.1" | |||
| tos = "~2.7.1" | |||
| @@ -0,0 +1,20 @@ | |||
| import pytest | |||
| from extensions.storage.opendal_storage import is_r2_endpoint | |||
| @pytest.mark.parametrize( | |||
| ("endpoint", "expected"), | |||
| [ | |||
| ("https://bucket.r2.cloudflarestorage.com", True), | |||
| ("https://custom-domain.r2.cloudflarestorage.com/", True), | |||
| ("https://bucket.r2.cloudflarestorage.com/path", True), | |||
| ("https://s3.amazonaws.com", False), | |||
| ("https://storage.googleapis.com", False), | |||
| ("http://localhost:9000", False), | |||
| ("invalid-url", False), | |||
| ("", False), | |||
| ], | |||
| ) | |||
| def test_is_r2_endpoint(endpoint: str, expected: bool): | |||
| assert is_r2_endpoint(endpoint) == expected | |||
| @@ -6,7 +6,7 @@ from extensions.storage.base_storage import BaseStorage | |||
| def get_example_folder() -> str: | |||
| return "/dify" | |||
| return "~/dify" | |||
| def get_example_bucket() -> str: | |||
| @@ -22,14 +22,14 @@ def get_example_data() -> bytes: | |||
| def get_example_filepath() -> str: | |||
| return "/test" | |||
| return "~/test" | |||
| class BaseStorageTest: | |||
| @pytest.fixture(autouse=True) | |||
| def setup_method(self): | |||
| def setup_method(self, *args, **kwargs): | |||
| """Should be implemented in child classes to setup specific storage.""" | |||
| self.storage = BaseStorage() | |||
| self.storage: BaseStorage | |||
| def test_save(self): | |||
| """Test saving data.""" | |||
| @@ -1,18 +0,0 @@ | |||
| from collections.abc import Generator | |||
| import pytest | |||
| from extensions.storage.local_fs_storage import LocalFsStorage | |||
| from tests.unit_tests.oss.__mock.base import ( | |||
| BaseStorageTest, | |||
| get_example_folder, | |||
| ) | |||
| from tests.unit_tests.oss.__mock.local import setup_local_fs_mock | |||
| class TestLocalFS(BaseStorageTest): | |||
| @pytest.fixture(autouse=True) | |||
| def setup_method(self, setup_local_fs_mock): | |||
| """Executed before each test method.""" | |||
| self.storage = LocalFsStorage() | |||
| self.storage.folder = get_example_folder() | |||
| @@ -0,0 +1,19 @@ | |||
| import pytest | |||
| from configs.middleware.storage.opendal_storage_config import OpenDALScheme | |||
| from extensions.storage.opendal_storage import OpenDALStorage | |||
| from tests.unit_tests.oss.__mock.base import ( | |||
| BaseStorageTest, | |||
| get_example_folder, | |||
| ) | |||
| from tests.unit_tests.oss.__mock.local import setup_local_fs_mock | |||
| class TestOpenDAL(BaseStorageTest): | |||
| @pytest.fixture(autouse=True) | |||
| def setup_method(self, *args, **kwargs): | |||
| """Executed before each test method.""" | |||
| self.storage = OpenDALStorage( | |||
| scheme=OpenDALScheme.FS, | |||
| root=get_example_folder(), | |||
| ) | |||
| @@ -281,10 +281,23 @@ CONSOLE_CORS_ALLOW_ORIGINS=* | |||
| # ------------------------------ | |||
| # The type of storage to use for storing user files. | |||
| # Supported values are `local` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase` | |||
| # Default: `local` | |||
| STORAGE_TYPE=local | |||
| STORAGE_LOCAL_PATH=storage | |||
| # Supported values are `opendal` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase` | |||
| # Default: `opendal` | |||
| STORAGE_TYPE=opendal | |||
| # Apache OpenDAL Configuration, refer to https://github.com/apache/opendal | |||
| # The scheme for the OpenDAL storage. | |||
| STORAGE_OPENDAL_SCHEME=fs | |||
| # OpenDAL FS | |||
| OPENDAL_FS_ROOT=storage | |||
| # OpenDAL S3 | |||
| OPENDAL_S3_ROOT=/ | |||
| OPENDAL_S3_BUCKET=your-bucket-name | |||
| OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com | |||
| OPENDAL_S3_ACCESS_KEY_ID=your-access-key | |||
| OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key | |||
| OPENDAL_S3_REGION=your-region | |||
| OPENDAL_S3_SERVER_SIDE_ENCRYPTION= | |||
| # S3 Configuration | |||
| # Whether to use AWS managed IAM roles for authenticating with the S3 service. | |||