瀏覽代碼

Oss support opendal(including mysql) (#8204)

### What problem does this PR solve?

#8074
Oss support opendal(including mysql)

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
tags/v0.19.1
africa-worker 4 月之前
父節點
當前提交
44287fb05f
No account linked to committer's email address
共有 5 個文件被更改,包括 153 次插入0 次删除
  1. 6
    0
      conf/service_conf.yaml
  2. 3
    0
      pyproject.toml
  3. 115
    0
      rag/utils/opendal_conn.py
  4. 3
    0
      rag/utils/storage_factory.py
  5. 26
    0
      uv.lock

+ 6
- 0
conf/service_conf.yaml 查看文件

@@ -9,6 +9,7 @@ mysql:
port: 5455
max_connections: 900
stale_timeout: 300
max_allowed_packet: 1073741824
minio:
user: 'rag_flow'
password: 'infini_rag_flow'
@@ -58,6 +59,11 @@ redis:
# secret: 'secret'
# tenant_id: 'tenant_id'
# container_name: 'container_name'
# The OSS object storage uses the MySQL configuration above by default. If you need to switch to another object storage service, please uncomment and configure the following parameters.
# opendal:
# schema: 'mysql' # Storage type, such as s3, oss, azure, etc.
# config:
# oss_table: 'your_table_name'
# user_default_llm:
# factory: 'Tongyi-Qianwen'
# api_key: 'sk-xxxxxxxxxxxxx'

+ 3
- 0
pyproject.toml 查看文件

@@ -62,6 +62,7 @@ dependencies = [
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"openpyxl>=3.1.0,<4.0.0",
"opendal>=0.45.0,<0.46.0",
"ormsgpack==1.5.0",
"pandas>=2.2.0,<3.0.0",
"pdfplumber==0.10.4",
@@ -71,6 +72,7 @@ dependencies = [
"psycopg2-binary==2.9.9",
"pyclipper==1.3.0.post5",
"pycryptodomex==3.20.0",
"pymysql>=1.1.1,<2.0.0",
"pypdf>=5.0.0,<6.0.0",
"python-dotenv==1.0.1",
"python-dateutil==2.8.2",
@@ -84,6 +86,7 @@ dependencies = [
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"ruamel-yaml>=0.18.6,<0.19.0",
"scholarly==1.7.11",
"scikit-learn==1.5.0",
"selenium==4.22.0",

+ 115
- 0
rag/utils/opendal_conn.py 查看文件

@@ -0,0 +1,115 @@
import opendal
import logging
import pymysql
import yaml

from rag.utils import singleton

SERVICE_CONF_PATH = "conf/service_conf.yaml"

CREATE_TABLE_SQL = """
CREATE TABLE IF NOT EXISTS `{}` (
`key` VARCHAR(255) PRIMARY KEY,
`value` LONGBLOB,
`created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`updated_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
"""
SET_MAX_ALLOWED_PACKET_SQL = """
SET GLOBAL max_allowed_packet={}
"""


def get_opendal_config_from_yaml(yaml_path=SERVICE_CONF_PATH):
try:
with open(yaml_path, 'r') as f:
config = yaml.safe_load(f)

opendal_config = config.get('opendal', {})
kwargs = {}
if opendal_config.get("schema") == 'mysql':
mysql_config = config.get('mysql', {})
kwargs = {
"schema": "mysql",
"host": mysql_config.get("host", "127.0.0.1"),
"port": str(mysql_config.get("port", 3306)),
"user": mysql_config.get("user", "root"),
"password": mysql_config.get("password", ""),
"database": mysql_config.get("name", "test_open_dal"),
"table": opendal_config.get("config").get("table", "opendal_storage")
}
kwargs["connection_string"] = f"mysql://{kwargs['user']}:{kwargs['password']}@{kwargs['host']}:{kwargs['port']}/{kwargs['database']}"
else:
schema = opendal_config.get("schema")
config_data = opendal_config.get("config", {})
kwargs = {"schema": schema, **config_data}
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs)
return kwargs
except Exception as e:
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))
raise


@singleton
class OpenDALStorage:
def __init__(self):
self._kwargs = get_opendal_config_from_yaml()
self._schema = self._kwargs.get('schema', 'mysql')
if self._schema == 'mysql':
self.init_db_config()
self.init_opendal_mysql_table()
self._operator = opendal.Operator(self._schema, **self._kwargs)

logging.info("OpenDALStorage initialized successfully")

def put(self, bucket, fnm, binary):
self._operator.write(f"{bucket}/{fnm}", binary)

def get(self, bucket, fnm):
return self._operator.read(f"{bucket}/{fnm}")

def rm(self, bucket, fnm):
self._operator.delete(f"{bucket}/{fnm}")
self._operator.__init__()

def scan(self, bucket, fnm):
return self._operator.scan(f"{bucket}/{fnm}")

def obj_exist(self, bucket, fnm):
return self._operator.exists(f"{bucket}/{fnm}")


def init_db_config(self):
try:
conn = pymysql.connect(
host=self._kwargs['host'],
port=int(self._kwargs['port']),
user=self._kwargs['user'],
password=self._kwargs['password'],
database=self._kwargs['database']
)
cursor = conn.cursor()
max_packet = self._kwargs.get('max_allowed_packet', 4194304) # Default to 4MB if not specified
cursor.execute(SET_MAX_ALLOWED_PACKET_SQL.format(max_packet))
conn.commit()
cursor.close()
conn.close()
logging.info(f"Database configuration initialized with max_allowed_packet={max_packet}")
except Exception as e:
logging.error(f"Failed to initialize database configuration: {str(e)}")
raise

def init_opendal_mysql_table(self):
conn = pymysql.connect(
host=self._kwargs['host'],
port=int(self._kwargs['port']),
user=self._kwargs['user'],
password=self._kwargs['password'],
database=self._kwargs['database']
)
cursor = conn.cursor()
cursor.execute(CREATE_TABLE_SQL.format(self._kwargs['table']))
conn.commit()
cursor.close()
conn.close()
logging.info(f"Table `{self._kwargs['table']}` initialized.")

+ 3
- 0
rag/utils/storage_factory.py 查看文件

@@ -20,6 +20,7 @@ from enum import Enum
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
from rag.utils.minio_conn import RAGFlowMinio
from rag.utils.opendal_conn import OpenDALStorage
from rag.utils.s3_conn import RAGFlowS3
from rag.utils.oss_conn import RAGFlowOSS

@@ -30,6 +31,7 @@ class Storage(Enum):
AZURE_SAS = 3
AWS_S3 = 4
OSS = 5
OPENDAL = 6


class StorageFactory:
@@ -39,6 +41,7 @@ class StorageFactory:
Storage.AZURE_SAS: RAGFlowAzureSasBlob,
Storage.AWS_S3: RAGFlowS3,
Storage.OSS: RAGFlowOSS,
Storage.OPENDAL: OpenDALStorage
}

@classmethod

+ 26
- 0
uv.lock 查看文件

@@ -3662,6 +3662,30 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/26/d0/22f68eb23eea053a31655960f133c0be9726c6a881547e6e9e7e2a946c4f/opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:afcf28bd1209dd58810d33defb622b325d3cbe49dcd7a43a902982c33e5fad05" },
]

[[package]]
name = "opendal"
version = "0.45.20"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2f/3f/927dfe1349ae58b9238b8eafba747af648d660a9425f486dda01a10f0b78/opendal-0.45.20.tar.gz", hash = "sha256:9f6f90d9e9f9d6e9e5a34aa7729169ef34d2f1869ad1e01ddc39b1c0ce0c9405" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/f7/d9/b74575762bd9178b0498125f270268e0fb122ee991188e053048da7f002c/opendal-0.45.20-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d6069cef67f501eda221da63320bd1291aee967f5f8678ccee9e6e566ab37c78" },
{ url = "https://mirrors.aliyun.com/pypi/packages/56/f6/0af7d8a4afe5bae6222c4715f0563fa8c257f0525802da47120e28314353/opendal-0.45.20-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52c4bf9433a3fa17d1f7b18f386a8f601c4b41e3fae9a839d0a861867d6086a" },
{ url = "https://mirrors.aliyun.com/pypi/packages/96/16/cf0cfc0838c7837f5642824738ad57f84cee658b4cfdd2b25fdfb52ca8a7/opendal-0.45.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:088bc9b20c5f07bbb19a9ff45c32dd3d42cf2d0b4ef40a2319ca27cdc635bf0f" },
{ url = "https://mirrors.aliyun.com/pypi/packages/b0/76/e903436877895fcf948e36aa728b4b56a3a600c4fd3297d8e4bc38a843be/opendal-0.45.20-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:55efb4388fa03f309de497bf9b9854377fc4045da069c72c9d2df21d24c686cb" },
{ url = "https://mirrors.aliyun.com/pypi/packages/34/10/7863a90a592ed6bfb2ddde104db23a00586004e2197f86a255ad9f8a9401/opendal-0.45.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:49c966cda40dc6b7b100ea6150d2f29e01ed7db694c5a5168c5fc451872ec77c" },
{ url = "https://mirrors.aliyun.com/pypi/packages/b4/a3/b77497101e320bcaebb7e99c43d61ca1886795c6a83001d4426cdbc3683d/opendal-0.45.20-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:e81af55e1d8c145119dfa4c9cacd1fd60c1c1fba2207ec5064cb6baae8c3c86b" },
{ url = "https://mirrors.aliyun.com/pypi/packages/fc/36/21495e4a405d47ece52df98c323ba9467f43e0641e04819ab5732bf0f370/opendal-0.45.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdfcb6840ab8bbd29c36a2a329c1f691023b3cd6a26f8a285dc89f39526017" },
{ url = "https://mirrors.aliyun.com/pypi/packages/50/28/bb822cad3f3ef15836227751dad46554c499bbefcf0eb34b4cc7e9975e9b/opendal-0.45.20-cp310-cp310-win_amd64.whl", hash = "sha256:e3987c4766a3611ea8cb3a216f21d083ac3e7fa91eb2ff7c0ebe5dc6e6958cce" },
{ url = "https://mirrors.aliyun.com/pypi/packages/84/77/6427e16b8630f0cc71f4a1b01648ed3264f1e04f1f6d9b5d09e5c6a4dd2f/opendal-0.45.20-cp311-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:35acdd8001e4a741532834fdbff3020ffb10b40028bb49fbe93c4f8197d66d8c" },
{ url = "https://mirrors.aliyun.com/pypi/packages/12/1f/83e415334739f1ab4dba55cdd349abf0b66612249055afb422a354b96ac8/opendal-0.45.20-cp311-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:629bfe8d384364bced6cbeb01f49b99779fa5151c68048a1869ff645ddcfcb25" },
{ url = "https://mirrors.aliyun.com/pypi/packages/49/94/c5de6ed54a02d7413636c2ccefa71d8dd09c2ada1cd6ecab202feb1fdeda/opendal-0.45.20-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12cc5ac7e441fb93d86d1673112d9fb08580fc3226f864434f4a56a72efec53" },
{ url = "https://mirrors.aliyun.com/pypi/packages/c6/83/713a1e1de8cbbd69af50e26644bbdeef3c1068b89f442417376fa3c0f591/opendal-0.45.20-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:45a3adae1f473052234fc4054a6f210df3ded9aff10db8d545d0a37eff3b13cc" },
{ url = "https://mirrors.aliyun.com/pypi/packages/c7/78/c9651e753aaf6eb61887ca372a3f9c2ae57dae03c3159d24deaf018c26dc/opendal-0.45.20-cp311-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8947857052c85a4b0e251d50e23f5f68f0cdd9e509e32e614a5e4b2fc7424c4" },
{ url = "https://mirrors.aliyun.com/pypi/packages/3c/9d/5d8c20c0fc93df5e349e5694167de30afdc54c5755704cc64764a6cbb309/opendal-0.45.20-cp311-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:891d2f9114efeef648973049ed15e56477e8feb9e48b540bd8d6105ea22a253c" },
{ url = "https://mirrors.aliyun.com/pypi/packages/21/39/05262f748a2085522e0c85f03eab945589313dc9caedc002872c39162776/opendal-0.45.20-cp311-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:539de9b825f6783d6289d88c0c9ac5415daa4d892d761e3540c565bda51e8997" },
{ url = "https://mirrors.aliyun.com/pypi/packages/74/83/cc7c6de29b0a7585cd445258d174ca204d37729c3874ad08e515b0bf331c/opendal-0.45.20-cp311-abi3-win_amd64.whl", hash = "sha256:145efd56aa33b493d5b652c3e4f5ae5097ab69d38c132d80f108e9f5c1e4d863" },
]

[[package]]
name = "openpyxl"
version = "3.1.5"
@@ -4891,6 +4915,7 @@ dependencies = [
{ name = "openai" },
{ name = "opencv-python" },
{ name = "opencv-python-headless" },
{ name = "opendal" },
{ name = "openpyxl" },
{ name = "opensearch-py" },
{ name = "ormsgpack" },
@@ -5041,6 +5066,7 @@ requires-dist = [
{ name = "openai", specifier = "==1.45.0" },
{ name = "opencv-python", specifier = "==4.10.0.84" },
{ name = "opencv-python-headless", specifier = "==4.10.0.84" },
{ name = "opendal", specifier = ">=0.45.0,<0.46.0" },
{ name = "openpyxl", specifier = ">=3.1.0,<4.0.0" },
{ name = "opensearch-py", specifier = "==2.7.1" },
{ name = "ormsgpack", specifier = "==1.5.0" },

Loading…
取消
儲存