您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

oss_conn.py 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import logging
  16. import boto3
  17. from botocore.exceptions import ClientError
  18. from botocore.config import Config
  19. import time
  20. from io import BytesIO
  21. from rag.utils import singleton
  22. from rag import settings
  23. @singleton
  24. class RAGFlowOSS:
  25. def __init__(self):
  26. self.conn = None
  27. self.oss_config = settings.OSS
  28. self.access_key = self.oss_config.get('access_key', None)
  29. self.secret_key = self.oss_config.get('secret_key', None)
  30. self.endpoint_url = self.oss_config.get('endpoint_url', None)
  31. self.region = self.oss_config.get('region', None)
  32. self.bucket = self.oss_config.get('bucket', None)
  33. self.prefix_path = self.oss_config.get('prefix_path', None)
  34. self.__open__()
  35. @staticmethod
  36. def use_default_bucket(method):
  37. def wrapper(self, bucket, *args, **kwargs):
  38. # If there is a default bucket, use the default bucket
  39. actual_bucket = self.bucket if self.bucket else bucket
  40. return method(self, actual_bucket, *args, **kwargs)
  41. return wrapper
  42. @staticmethod
  43. def use_prefix_path(method):
  44. def wrapper(self, bucket, fnm, *args, **kwargs):
  45. # If the prefix path is set, use the prefix path
  46. fnm = f"{self.prefix_path}/{fnm}" if self.prefix_path else fnm
  47. return method(self, bucket, fnm, *args, **kwargs)
  48. return wrapper
  49. def __open__(self):
  50. try:
  51. if self.conn:
  52. self.__close__()
  53. except Exception:
  54. pass
  55. try:
  56. # Reference:https://help.aliyun.com/zh/oss/developer-reference/use-amazon-s3-sdks-to-access-oss
  57. self.conn = boto3.client(
  58. 's3',
  59. region_name=self.region,
  60. aws_access_key_id=self.access_key,
  61. aws_secret_access_key=self.secret_key,
  62. endpoint_url=self.endpoint_url,
  63. config=Config(s3={"addressing_style": "virtual"}, signature_version='v4')
  64. )
  65. except Exception:
  66. logging.exception(f"Fail to connect at region {self.region}")
  67. def __close__(self):
  68. del self.conn
  69. self.conn = None
  70. @use_default_bucket
  71. def bucket_exists(self, bucket):
  72. try:
  73. logging.debug(f"head_bucket bucketname {bucket}")
  74. self.conn.head_bucket(Bucket=bucket)
  75. exists = True
  76. except ClientError:
  77. logging.exception(f"head_bucket error {bucket}")
  78. exists = False
  79. return exists
  80. def health(self):
  81. bucket = self.bucket
  82. fnm = "txtxtxtxt1"
  83. fnm, binary = f"{self.prefix_path}/{fnm}" if self.prefix_path else fnm, b"_t@@@1"
  84. if not self.bucket_exists(bucket):
  85. self.conn.create_bucket(Bucket=bucket)
  86. logging.debug(f"create bucket {bucket} ********")
  87. r = self.conn.upload_fileobj(BytesIO(binary), bucket, fnm)
  88. return r
  89. def get_properties(self, bucket, key):
  90. return {}
  91. def list(self, bucket, dir, recursive=True):
  92. return []
  93. @use_prefix_path
  94. @use_default_bucket
  95. def put(self, bucket, fnm, binary):
  96. logging.debug(f"bucket name {bucket}; filename :{fnm}:")
  97. for _ in range(1):
  98. try:
  99. if not self.bucket_exists(bucket):
  100. self.conn.create_bucket(Bucket=bucket)
  101. logging.info(f"create bucket {bucket} ********")
  102. r = self.conn.upload_fileobj(BytesIO(binary), bucket, fnm)
  103. return r
  104. except Exception:
  105. logging.exception(f"Fail put {bucket}/{fnm}")
  106. self.__open__()
  107. time.sleep(1)
  108. @use_prefix_path
  109. @use_default_bucket
  110. def rm(self, bucket, fnm):
  111. try:
  112. self.conn.delete_object(Bucket=bucket, Key=fnm)
  113. except Exception:
  114. logging.exception(f"Fail rm {bucket}/{fnm}")
  115. @use_prefix_path
  116. @use_default_bucket
  117. def get(self, bucket, fnm):
  118. for _ in range(1):
  119. try:
  120. r = self.conn.get_object(Bucket=bucket, Key=fnm)
  121. object_data = r['Body'].read()
  122. return object_data
  123. except Exception:
  124. logging.exception(f"fail get {bucket}/{fnm}")
  125. self.__open__()
  126. time.sleep(1)
  127. return
  128. @use_prefix_path
  129. @use_default_bucket
  130. def obj_exist(self, bucket, fnm):
  131. try:
  132. if self.conn.head_object(Bucket=bucket, Key=fnm):
  133. return True
  134. except ClientError as e:
  135. if e.response['Error']['Code'] == '404':
  136. return False
  137. else:
  138. raise
  139. @use_prefix_path
  140. @use_default_bucket
  141. def get_presigned_url(self, bucket, fnm, expires):
  142. for _ in range(10):
  143. try:
  144. r = self.conn.generate_presigned_url('get_object',
  145. Params={'Bucket': bucket,
  146. 'Key': fnm},
  147. ExpiresIn=expires)
  148. return r
  149. except Exception:
  150. logging.exception(f"fail get url {bucket}/{fnm}")
  151. self.__open__()
  152. time.sleep(1)
  153. return