### What problem does this PR solve? #8074 Oss support opendal(including mysql) ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>tags/v0.19.1
| @@ -9,6 +9,7 @@ mysql: | |||
| port: 5455 | |||
| max_connections: 900 | |||
| stale_timeout: 300 | |||
| max_allowed_packet: 1073741824 | |||
| minio: | |||
| user: 'rag_flow' | |||
| password: 'infini_rag_flow' | |||
| @@ -58,6 +59,11 @@ redis: | |||
| # secret: 'secret' | |||
| # tenant_id: 'tenant_id' | |||
| # container_name: 'container_name' | |||
| # The OSS object storage uses the MySQL configuration above by default. If you need to switch to another object storage service, please uncomment and configure the following parameters. | |||
| # opendal: | |||
| # schema: 'mysql' # Storage type, such as s3, oss, azure, etc. | |||
| # config: | |||
| # oss_table: 'your_table_name' | |||
| # user_default_llm: | |||
| # factory: 'Tongyi-Qianwen' | |||
| # api_key: 'sk-xxxxxxxxxxxxx' | |||
| @@ -62,6 +62,7 @@ dependencies = [ | |||
| "opencv-python==4.10.0.84", | |||
| "opencv-python-headless==4.10.0.84", | |||
| "openpyxl>=3.1.0,<4.0.0", | |||
| "opendal>=0.45.0,<0.46.0", | |||
| "ormsgpack==1.5.0", | |||
| "pandas>=2.2.0,<3.0.0", | |||
| "pdfplumber==0.10.4", | |||
| @@ -71,6 +72,7 @@ dependencies = [ | |||
| "psycopg2-binary==2.9.9", | |||
| "pyclipper==1.3.0.post5", | |||
| "pycryptodomex==3.20.0", | |||
| "pymysql>=1.1.1,<2.0.0", | |||
| "pypdf>=5.0.0,<6.0.0", | |||
| "python-dotenv==1.0.1", | |||
| "python-dateutil==2.8.2", | |||
| @@ -84,6 +86,7 @@ dependencies = [ | |||
| "replicate==0.31.0", | |||
| "roman-numbers==1.0.2", | |||
| "ruamel-base==1.0.0", | |||
| "ruamel-yaml>=0.18.6,<0.19.0", | |||
| "scholarly==1.7.11", | |||
| "scikit-learn==1.5.0", | |||
| "selenium==4.22.0", | |||
| @@ -0,0 +1,115 @@ | |||
| import opendal | |||
| import logging | |||
| import pymysql | |||
| import yaml | |||
| from rag.utils import singleton | |||
| SERVICE_CONF_PATH = "conf/service_conf.yaml" | |||
| CREATE_TABLE_SQL = """ | |||
| CREATE TABLE IF NOT EXISTS `{}` ( | |||
| `key` VARCHAR(255) PRIMARY KEY, | |||
| `value` LONGBLOB, | |||
| `created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |||
| `updated_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP | |||
| ); | |||
| """ | |||
| SET_MAX_ALLOWED_PACKET_SQL = """ | |||
| SET GLOBAL max_allowed_packet={} | |||
| """ | |||
| def get_opendal_config_from_yaml(yaml_path=SERVICE_CONF_PATH): | |||
| try: | |||
| with open(yaml_path, 'r') as f: | |||
| config = yaml.safe_load(f) | |||
| opendal_config = config.get('opendal', {}) | |||
| kwargs = {} | |||
| if opendal_config.get("schema") == 'mysql': | |||
| mysql_config = config.get('mysql', {}) | |||
| kwargs = { | |||
| "schema": "mysql", | |||
| "host": mysql_config.get("host", "127.0.0.1"), | |||
| "port": str(mysql_config.get("port", 3306)), | |||
| "user": mysql_config.get("user", "root"), | |||
| "password": mysql_config.get("password", ""), | |||
| "database": mysql_config.get("name", "test_open_dal"), | |||
| "table": opendal_config.get("config").get("table", "opendal_storage") | |||
| } | |||
| kwargs["connection_string"] = f"mysql://{kwargs['user']}:{kwargs['password']}@{kwargs['host']}:{kwargs['port']}/{kwargs['database']}" | |||
| else: | |||
| schema = opendal_config.get("schema") | |||
| config_data = opendal_config.get("config", {}) | |||
| kwargs = {"schema": schema, **config_data} | |||
| logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs) | |||
| return kwargs | |||
| except Exception as e: | |||
| logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e)) | |||
| raise | |||
| @singleton | |||
| class OpenDALStorage: | |||
| def __init__(self): | |||
| self._kwargs = get_opendal_config_from_yaml() | |||
| self._schema = self._kwargs.get('schema', 'mysql') | |||
| if self._schema == 'mysql': | |||
| self.init_db_config() | |||
| self.init_opendal_mysql_table() | |||
| self._operator = opendal.Operator(self._schema, **self._kwargs) | |||
| logging.info("OpenDALStorage initialized successfully") | |||
| def put(self, bucket, fnm, binary): | |||
| self._operator.write(f"{bucket}/{fnm}", binary) | |||
| def get(self, bucket, fnm): | |||
| return self._operator.read(f"{bucket}/{fnm}") | |||
| def rm(self, bucket, fnm): | |||
| self._operator.delete(f"{bucket}/{fnm}") | |||
| self._operator.__init__() | |||
| def scan(self, bucket, fnm): | |||
| return self._operator.scan(f"{bucket}/{fnm}") | |||
| def obj_exist(self, bucket, fnm): | |||
| return self._operator.exists(f"{bucket}/{fnm}") | |||
| def init_db_config(self): | |||
| try: | |||
| conn = pymysql.connect( | |||
| host=self._kwargs['host'], | |||
| port=int(self._kwargs['port']), | |||
| user=self._kwargs['user'], | |||
| password=self._kwargs['password'], | |||
| database=self._kwargs['database'] | |||
| ) | |||
| cursor = conn.cursor() | |||
| max_packet = self._kwargs.get('max_allowed_packet', 4194304) # Default to 4MB if not specified | |||
| cursor.execute(SET_MAX_ALLOWED_PACKET_SQL.format(max_packet)) | |||
| conn.commit() | |||
| cursor.close() | |||
| conn.close() | |||
| logging.info(f"Database configuration initialized with max_allowed_packet={max_packet}") | |||
| except Exception as e: | |||
| logging.error(f"Failed to initialize database configuration: {str(e)}") | |||
| raise | |||
| def init_opendal_mysql_table(self): | |||
| conn = pymysql.connect( | |||
| host=self._kwargs['host'], | |||
| port=int(self._kwargs['port']), | |||
| user=self._kwargs['user'], | |||
| password=self._kwargs['password'], | |||
| database=self._kwargs['database'] | |||
| ) | |||
| cursor = conn.cursor() | |||
| cursor.execute(CREATE_TABLE_SQL.format(self._kwargs['table'])) | |||
| conn.commit() | |||
| cursor.close() | |||
| conn.close() | |||
| logging.info(f"Table `{self._kwargs['table']}` initialized.") | |||
| @@ -20,6 +20,7 @@ from enum import Enum | |||
| from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob | |||
| from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob | |||
| from rag.utils.minio_conn import RAGFlowMinio | |||
| from rag.utils.opendal_conn import OpenDALStorage | |||
| from rag.utils.s3_conn import RAGFlowS3 | |||
| from rag.utils.oss_conn import RAGFlowOSS | |||
| @@ -30,6 +31,7 @@ class Storage(Enum): | |||
| AZURE_SAS = 3 | |||
| AWS_S3 = 4 | |||
| OSS = 5 | |||
| OPENDAL = 6 | |||
| class StorageFactory: | |||
| @@ -39,6 +41,7 @@ class StorageFactory: | |||
| Storage.AZURE_SAS: RAGFlowAzureSasBlob, | |||
| Storage.AWS_S3: RAGFlowS3, | |||
| Storage.OSS: RAGFlowOSS, | |||
| Storage.OPENDAL: OpenDALStorage | |||
| } | |||
| @classmethod | |||
| @@ -3662,6 +3662,30 @@ wheels = [ | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/26/d0/22f68eb23eea053a31655960f133c0be9726c6a881547e6e9e7e2a946c4f/opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:afcf28bd1209dd58810d33defb622b325d3cbe49dcd7a43a902982c33e5fad05" }, | |||
| ] | |||
| [[package]] | |||
| name = "opendal" | |||
| version = "0.45.20" | |||
| source = { registry = "https://mirrors.aliyun.com/pypi/simple" } | |||
| sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2f/3f/927dfe1349ae58b9238b8eafba747af648d660a9425f486dda01a10f0b78/opendal-0.45.20.tar.gz", hash = "sha256:9f6f90d9e9f9d6e9e5a34aa7729169ef34d2f1869ad1e01ddc39b1c0ce0c9405" } | |||
| wheels = [ | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/f7/d9/b74575762bd9178b0498125f270268e0fb122ee991188e053048da7f002c/opendal-0.45.20-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d6069cef67f501eda221da63320bd1291aee967f5f8678ccee9e6e566ab37c78" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/56/f6/0af7d8a4afe5bae6222c4715f0563fa8c257f0525802da47120e28314353/opendal-0.45.20-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52c4bf9433a3fa17d1f7b18f386a8f601c4b41e3fae9a839d0a861867d6086a" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/96/16/cf0cfc0838c7837f5642824738ad57f84cee658b4cfdd2b25fdfb52ca8a7/opendal-0.45.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:088bc9b20c5f07bbb19a9ff45c32dd3d42cf2d0b4ef40a2319ca27cdc635bf0f" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/b0/76/e903436877895fcf948e36aa728b4b56a3a600c4fd3297d8e4bc38a843be/opendal-0.45.20-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:55efb4388fa03f309de497bf9b9854377fc4045da069c72c9d2df21d24c686cb" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/34/10/7863a90a592ed6bfb2ddde104db23a00586004e2197f86a255ad9f8a9401/opendal-0.45.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:49c966cda40dc6b7b100ea6150d2f29e01ed7db694c5a5168c5fc451872ec77c" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/b4/a3/b77497101e320bcaebb7e99c43d61ca1886795c6a83001d4426cdbc3683d/opendal-0.45.20-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:e81af55e1d8c145119dfa4c9cacd1fd60c1c1fba2207ec5064cb6baae8c3c86b" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/fc/36/21495e4a405d47ece52df98c323ba9467f43e0641e04819ab5732bf0f370/opendal-0.45.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdfcb6840ab8bbd29c36a2a329c1f691023b3cd6a26f8a285dc89f39526017" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/50/28/bb822cad3f3ef15836227751dad46554c499bbefcf0eb34b4cc7e9975e9b/opendal-0.45.20-cp310-cp310-win_amd64.whl", hash = "sha256:e3987c4766a3611ea8cb3a216f21d083ac3e7fa91eb2ff7c0ebe5dc6e6958cce" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/84/77/6427e16b8630f0cc71f4a1b01648ed3264f1e04f1f6d9b5d09e5c6a4dd2f/opendal-0.45.20-cp311-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:35acdd8001e4a741532834fdbff3020ffb10b40028bb49fbe93c4f8197d66d8c" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/12/1f/83e415334739f1ab4dba55cdd349abf0b66612249055afb422a354b96ac8/opendal-0.45.20-cp311-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:629bfe8d384364bced6cbeb01f49b99779fa5151c68048a1869ff645ddcfcb25" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/49/94/c5de6ed54a02d7413636c2ccefa71d8dd09c2ada1cd6ecab202feb1fdeda/opendal-0.45.20-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12cc5ac7e441fb93d86d1673112d9fb08580fc3226f864434f4a56a72efec53" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/c6/83/713a1e1de8cbbd69af50e26644bbdeef3c1068b89f442417376fa3c0f591/opendal-0.45.20-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:45a3adae1f473052234fc4054a6f210df3ded9aff10db8d545d0a37eff3b13cc" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/c7/78/c9651e753aaf6eb61887ca372a3f9c2ae57dae03c3159d24deaf018c26dc/opendal-0.45.20-cp311-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8947857052c85a4b0e251d50e23f5f68f0cdd9e509e32e614a5e4b2fc7424c4" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/3c/9d/5d8c20c0fc93df5e349e5694167de30afdc54c5755704cc64764a6cbb309/opendal-0.45.20-cp311-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:891d2f9114efeef648973049ed15e56477e8feb9e48b540bd8d6105ea22a253c" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/21/39/05262f748a2085522e0c85f03eab945589313dc9caedc002872c39162776/opendal-0.45.20-cp311-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:539de9b825f6783d6289d88c0c9ac5415daa4d892d761e3540c565bda51e8997" }, | |||
| { url = "https://mirrors.aliyun.com/pypi/packages/74/83/cc7c6de29b0a7585cd445258d174ca204d37729c3874ad08e515b0bf331c/opendal-0.45.20-cp311-abi3-win_amd64.whl", hash = "sha256:145efd56aa33b493d5b652c3e4f5ae5097ab69d38c132d80f108e9f5c1e4d863" }, | |||
| ] | |||
| [[package]] | |||
| name = "openpyxl" | |||
| version = "3.1.5" | |||
| @@ -4891,6 +4915,7 @@ dependencies = [ | |||
| { name = "openai" }, | |||
| { name = "opencv-python" }, | |||
| { name = "opencv-python-headless" }, | |||
| { name = "opendal" }, | |||
| { name = "openpyxl" }, | |||
| { name = "opensearch-py" }, | |||
| { name = "ormsgpack" }, | |||
| @@ -5041,6 +5066,7 @@ requires-dist = [ | |||
| { name = "openai", specifier = "==1.45.0" }, | |||
| { name = "opencv-python", specifier = "==4.10.0.84" }, | |||
| { name = "opencv-python-headless", specifier = "==4.10.0.84" }, | |||
| { name = "opendal", specifier = ">=0.45.0,<0.46.0" }, | |||
| { name = "openpyxl", specifier = ">=3.1.0,<4.0.0" }, | |||
| { name = "opensearch-py", specifier = "==2.7.1" }, | |||
| { name = "ormsgpack", specifier = "==1.5.0" }, | |||