您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

opendal_conn.py 4.2KB

Fix: opendal storage health attribute not found & remove duplicate operator scheme initialization (#8265) ### What problem does this PR solve? This PR fixes two issues in the OpenDAL storage connector: 1. The ‎`health` method was missing, which prevented health checks on the storage backend. 3. The initialization of the ‎`opendal.Operator` object included a redundant scheme parameter, causing unnecessary duplication and potential confusion. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### Background - The absence of a ‎`health` method made it difficult to verify the availability and reliability of the storage service. - Initializing ‎`opendal.Operator` with both ‎`self._scheme` and unpacked ‎`**self._kwargs` could lead to errors or unexpected behavior if the scheme was already included in the kwargs. ### What is changed and how it works? - Adds a ‎`health` method that writes a test file to verify storage availability. - Removes the duplicate scheme parameter from the ‎`opendal.Operator` initialization to ensure clarity and prevent conflicts. before: <img width="762" alt="企业微信截图_46be646f-2e99-4e5e-be67-b1483426e77c" src="https://github.com/user-attachments/assets/acecbb8c-4810-457f-8342-6355148551ba" /> <img width="767" alt="image" src="https://github.com/user-attachments/assets/147cd5a2-dde3-466b-a9c1-d1d4f0819e5d" /> after: <img width="1123" alt="企业微信截图_09d62997-8908-4985-b89f-7a78b5da55ac" src="https://github.com/user-attachments/assets/97dc88c9-0f4e-4d77-88b3-cd818e8da046" />
4 个月前
Fix: opendal storage health attribute not found & remove duplicate operator scheme initialization (#8265) ### What problem does this PR solve? This PR fixes two issues in the OpenDAL storage connector: 1. The ‎`health` method was missing, which prevented health checks on the storage backend. 3. The initialization of the ‎`opendal.Operator` object included a redundant scheme parameter, causing unnecessary duplication and potential confusion. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### Background - The absence of a ‎`health` method made it difficult to verify the availability and reliability of the storage service. - Initializing ‎`opendal.Operator` with both ‎`self._scheme` and unpacked ‎`**self._kwargs` could lead to errors or unexpected behavior if the scheme was already included in the kwargs. ### What is changed and how it works? - Adds a ‎`health` method that writes a test file to verify storage availability. - Removes the duplicate scheme parameter from the ‎`opendal.Operator` initialization to ensure clarity and prevent conflicts. before: <img width="762" alt="企业微信截图_46be646f-2e99-4e5e-be67-b1483426e77c" src="https://github.com/user-attachments/assets/acecbb8c-4810-457f-8342-6355148551ba" /> <img width="767" alt="image" src="https://github.com/user-attachments/assets/147cd5a2-dde3-466b-a9c1-d1d4f0819e5d" /> after: <img width="1123" alt="企业微信截图_09d62997-8908-4985-b89f-7a78b5da55ac" src="https://github.com/user-attachments/assets/97dc88c9-0f4e-4d77-88b3-cd818e8da046" />
4 个月前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import opendal
  2. import logging
  3. import pymysql
  4. from api.utils import get_base_config
  5. from rag.utils import singleton
  6. CREATE_TABLE_SQL = """
  7. CREATE TABLE IF NOT EXISTS `{}` (
  8. `key` VARCHAR(255) PRIMARY KEY,
  9. `value` LONGBLOB,
  10. `created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  11. `updated_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
  12. );
  13. """
  14. SET_MAX_ALLOWED_PACKET_SQL = """
  15. SET GLOBAL max_allowed_packet={}
  16. """
  17. def get_opendal_config():
  18. try:
  19. opendal_config = get_base_config('opendal', {})
  20. if opendal_config.get("scheme") == 'mysql':
  21. mysql_config = get_base_config('mysql', {})
  22. max_packet = mysql_config.get("max_allowed_packet", 134217728)
  23. kwargs = {
  24. "scheme": "mysql",
  25. "host": mysql_config.get("host", "127.0.0.1"),
  26. "port": str(mysql_config.get("port", 3306)),
  27. "user": mysql_config.get("user", "root"),
  28. "password": mysql_config.get("password", ""),
  29. "database": mysql_config.get("name", "test_open_dal"),
  30. "table": opendal_config.get("config").get("oss_table", "opendal_storage"),
  31. "max_allowed_packet": str(max_packet)
  32. }
  33. kwargs["connection_string"] = f"mysql://{kwargs['user']}:{kwargs['password']}@{kwargs['host']}:{kwargs['port']}/{kwargs['database']}?max_allowed_packet={max_packet}"
  34. else:
  35. scheme = opendal_config.get("scheme")
  36. config_data = opendal_config.get("config", {})
  37. kwargs = {"scheme": scheme, **config_data}
  38. logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs)
  39. return kwargs
  40. except Exception as e:
  41. logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))
  42. raise
  43. @singleton
  44. class OpenDALStorage:
  45. def __init__(self):
  46. self._kwargs = get_opendal_config()
  47. self._scheme = self._kwargs.get('scheme', 'mysql')
  48. if self._scheme == 'mysql':
  49. self.init_db_config()
  50. self.init_opendal_mysql_table()
  51. self._operator = opendal.Operator(**self._kwargs)
  52. logging.info("OpenDALStorage initialized successfully")
  53. def health(self):
  54. bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
  55. r = self._operator.write(f"{bucket}/{fnm}", binary)
  56. return r
  57. def put(self, bucket, fnm, binary):
  58. self._operator.write(f"{bucket}/{fnm}", binary)
  59. def get(self, bucket, fnm):
  60. return self._operator.read(f"{bucket}/{fnm}")
  61. def rm(self, bucket, fnm):
  62. self._operator.delete(f"{bucket}/{fnm}")
  63. self._operator.__init__()
  64. def scan(self, bucket, fnm):
  65. return self._operator.scan(f"{bucket}/{fnm}")
  66. def obj_exist(self, bucket, fnm):
  67. return self._operator.exists(f"{bucket}/{fnm}")
  68. def init_db_config(self):
  69. try:
  70. conn = pymysql.connect(
  71. host=self._kwargs['host'],
  72. port=int(self._kwargs['port']),
  73. user=self._kwargs['user'],
  74. password=self._kwargs['password'],
  75. database=self._kwargs['database']
  76. )
  77. cursor = conn.cursor()
  78. max_packet = self._kwargs.get('max_allowed_packet', 4194304) # Default to 4MB if not specified
  79. cursor.execute(SET_MAX_ALLOWED_PACKET_SQL.format(max_packet))
  80. conn.commit()
  81. cursor.close()
  82. conn.close()
  83. logging.info(f"Database configuration initialized with max_allowed_packet={max_packet}")
  84. except Exception as e:
  85. logging.error(f"Failed to initialize database configuration: {str(e)}")
  86. raise
  87. def init_opendal_mysql_table(self):
  88. conn = pymysql.connect(
  89. host=self._kwargs['host'],
  90. port=int(self._kwargs['port']),
  91. user=self._kwargs['user'],
  92. password=self._kwargs['password'],
  93. database=self._kwargs['database']
  94. )
  95. cursor = conn.cursor()
  96. cursor.execute(CREATE_TABLE_SQL.format(self._kwargs['table']))
  97. conn.commit()
  98. cursor.close()
  99. conn.close()
  100. logging.info(f"Table `{self._kwargs['table']}` initialized.")