Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

s3_conn.py 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import logging
  17. import boto3
  18. from botocore.exceptions import ClientError
  19. from botocore.config import Config
  20. import time
  21. from io import BytesIO
  22. from rag.utils import singleton
  23. from rag import settings
  24. @singleton
  25. class RAGFlowS3:
  26. def __init__(self):
  27. self.conn = None
  28. self.s3_config = settings.S3
  29. self.access_key = self.s3_config.get('access_key', None)
  30. self.secret_key = self.s3_config.get('secret_key', None)
  31. self.region = self.s3_config.get('region', None)
  32. self.endpoint_url = self.s3_config.get('endpoint_url', None)
  33. self.signature_version = self.s3_config.get('signature_version', None)
  34. self.addressing_style = self.s3_config.get('addressing_style', None)
  35. self.bucket = self.s3_config.get('bucket', None)
  36. self.prefix_path = self.s3_config.get('prefix_path', None)
  37. self.__open__()
  38. @staticmethod
  39. def use_default_bucket(method):
  40. def wrapper(self, bucket, *args, **kwargs):
  41. # If there is a default bucket, use the default bucket
  42. actual_bucket = self.bucket if self.bucket else bucket
  43. return method(self, actual_bucket, *args, **kwargs)
  44. return wrapper
  45. @staticmethod
  46. def use_prefix_path(method):
  47. def wrapper(self, bucket, fnm, *args, **kwargs):
  48. # If the prefix path is set, use the prefix path.
  49. # The bucket passed from the upstream call is
  50. # used as the file prefix. This is especially useful when you're using the default bucket
  51. if self.prefix_path:
  52. fnm = f"{self.prefix_path}/{bucket}/{fnm}"
  53. return method(self, bucket, fnm, *args, **kwargs)
  54. return wrapper
  55. def __open__(self):
  56. try:
  57. if self.conn:
  58. self.__close__()
  59. except Exception:
  60. pass
  61. try:
  62. s3_params = {}
  63. config_kwargs = {}
  64. # if not set ak/sk, boto3 s3 client would try several ways to do the authentication
  65. # see doc: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials
  66. if self.access_key and self.secret_key:
  67. s3_params = {
  68. 'aws_access_key_id': self.access_key,
  69. 'aws_secret_access_key': self.secret_key,
  70. }
  71. if self.region in self.s3_config:
  72. s3_params['region_name'] = self.region
  73. if 'endpoint_url' in self.s3_config:
  74. s3_params['endpoint_url'] = self.endpoint_url
  75. if 'signature_version' in self.s3_config:
  76. config_kwargs['signature_version'] = self.signature_version
  77. if 'addressing_style' in self.s3_config:
  78. config_kwargs['addressing_style'] = self.addressing_style
  79. if config_kwargs:
  80. s3_params['config'] = Config(**config_kwargs)
  81. self.conn = boto3.client('s3', **s3_params)
  82. except Exception:
  83. logging.exception(f"Fail to connect at region {self.region} or endpoint {self.endpoint_url}")
  84. def __close__(self):
  85. del self.conn
  86. self.conn = None
  87. @use_default_bucket
  88. def bucket_exists(self, bucket):
  89. try:
  90. logging.debug(f"head_bucket bucketname {bucket}")
  91. self.conn.head_bucket(Bucket=bucket)
  92. exists = True
  93. except ClientError:
  94. logging.exception(f"head_bucket error {bucket}")
  95. exists = False
  96. return exists
  97. def health(self):
  98. bucket = self.bucket
  99. fnm = "txtxtxtxt1"
  100. fnm, binary = f"{self.prefix_path}/{fnm}" if self.prefix_path else fnm, b"_t@@@1"
  101. if not self.bucket_exists(bucket):
  102. self.conn.create_bucket(Bucket=bucket)
  103. logging.debug(f"create bucket {bucket} ********")
  104. r = self.conn.upload_fileobj(BytesIO(binary), bucket, fnm)
  105. return r
  106. def get_properties(self, bucket, key):
  107. return {}
  108. def list(self, bucket, dir, recursive=True):
  109. return []
  110. @use_prefix_path
  111. @use_default_bucket
  112. def put(self, bucket, fnm, binary):
  113. logging.debug(f"bucket name {bucket}; filename :{fnm}:")
  114. for _ in range(1):
  115. try:
  116. if not self.bucket_exists(bucket):
  117. self.conn.create_bucket(Bucket=bucket)
  118. logging.info(f"create bucket {bucket} ********")
  119. r = self.conn.upload_fileobj(BytesIO(binary), bucket, fnm)
  120. return r
  121. except Exception:
  122. logging.exception(f"Fail put {bucket}/{fnm}")
  123. self.__open__()
  124. time.sleep(1)
  125. @use_prefix_path
  126. @use_default_bucket
  127. def rm(self, bucket, fnm):
  128. try:
  129. self.conn.delete_object(Bucket=bucket, Key=fnm)
  130. except Exception:
  131. logging.exception(f"Fail rm {bucket}/{fnm}")
  132. @use_prefix_path
  133. @use_default_bucket
  134. def get(self, bucket, fnm):
  135. for _ in range(1):
  136. try:
  137. r = self.conn.get_object(Bucket=bucket, Key=fnm)
  138. object_data = r['Body'].read()
  139. return object_data
  140. except Exception:
  141. logging.exception(f"fail get {bucket}/{fnm}")
  142. self.__open__()
  143. time.sleep(1)
  144. return
  145. @use_prefix_path
  146. @use_default_bucket
  147. def obj_exist(self, bucket, fnm):
  148. try:
  149. if self.conn.head_object(Bucket=bucket, Key=fnm):
  150. return True
  151. except ClientError as e:
  152. if e.response['Error']['Code'] == '404':
  153. return False
  154. else:
  155. raise
  156. @use_prefix_path
  157. @use_default_bucket
  158. def get_presigned_url(self, bucket, fnm, expires):
  159. for _ in range(10):
  160. try:
  161. r = self.conn.generate_presigned_url('get_object',
  162. Params={'Bucket': bucket,
  163. 'Key': fnm},
  164. ExpiresIn=expires)
  165. return r
  166. except Exception:
  167. logging.exception(f"fail get url {bucket}/{fnm}")
  168. self.__open__()
  169. time.sleep(1)
  170. return