Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import logging
  17. import boto3
  18. from botocore.exceptions import ClientError
  19. from botocore.config import Config
  20. import time
  21. from io import BytesIO
  22. from rag.utils import singleton
  23. from rag import settings
  24. @singleton
  25. class RAGFlowS3:
  26. def __init__(self):
  27. self.conn = None
  28. self.s3_config = settings.S3
  29. self.access_key = self.s3_config.get('access_key', None)
  30. self.secret_key = self.s3_config.get('secret_key', None)
  31. self.region = self.s3_config.get('region', None)
  32. self.endpoint_url = self.s3_config.get('endpoint_url', None)
  33. self.signature_version = self.s3_config.get('signature_version', None)
  34. self.addressing_style = self.s3_config.get('addressing_style', None)
  35. self.bucket = self.s3_config.get('bucket', None)
  36. self.prefix_path = self.s3_config.get('prefix_path', None)
  37. self.__open__()
  38. @staticmethod
  39. def use_default_bucket(method):
  40. def wrapper(self, bucket, *args, **kwargs):
  41. # If there is a default bucket, use the default bucket
  42. actual_bucket = self.bucket if self.bucket else bucket
  43. return method(self, actual_bucket, *args, **kwargs)
  44. return wrapper
  45. @staticmethod
  46. def use_prefix_path(method):
  47. def wrapper(self, bucket, fnm, *args, **kwargs):
  48. # If the prefix path is set, use the prefix path.
  49. # The bucket passed from the upstream call is
  50. # used as the file prefix. This is especially useful when you're using the default bucket
  51. if self.prefix_path:
  52. fnm = f"{self.prefix_path}/{bucket}/{fnm}"
  53. return method(self, bucket, fnm, *args, **kwargs)
  54. return wrapper
  55. def __open__(self):
  56. try:
  57. if self.conn:
  58. self.__close__()
  59. except Exception:
  60. pass
  61. try:
  62. s3_params = {}
  63. # if not set ak/sk, boto3 s3 client would try several ways to do the authentication
  64. # see doc: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials
  65. if self.access_key and self.secret_key:
  66. s3_params = {
  67. 'aws_access_key_id': self.access_key,
  68. 'aws_secret_access_key': self.secret_key,
  69. }
  70. if self.region in self.s3_config:
  71. s3_params['region_name'] = self.region
  72. if 'endpoint_url' in self.s3_config:
  73. s3_params['endpoint_url'] = self.endpoint_url
  74. if 'signature_version' in self.s3_config:
  75. s3_params['config'] = Config(s3={"signature_version": self.signature_version})
  76. if 'addressing_style' in self.s3_config:
  77. s3_params['config'] = Config(s3={"addressing_style": self.addressing_style})
  78. self.conn = boto3.client('s3', **s3_params)
  79. except Exception:
  80. logging.exception(f"Fail to connect at region {self.region} or endpoint {self.endpoint_url}")
  81. def __close__(self):
  82. del self.conn
  83. self.conn = None
  84. @use_default_bucket
  85. def bucket_exists(self, bucket):
  86. try:
  87. logging.debug(f"head_bucket bucketname {bucket}")
  88. self.conn.head_bucket(Bucket=bucket)
  89. exists = True
  90. except ClientError:
  91. logging.exception(f"head_bucket error {bucket}")
  92. exists = False
  93. return exists
  94. def health(self):
  95. bucket = self.bucket
  96. fnm = "txtxtxtxt1"
  97. fnm, binary = f"{self.prefix_path}/{fnm}" if self.prefix_path else fnm, b"_t@@@1"
  98. if not self.bucket_exists(bucket):
  99. self.conn.create_bucket(Bucket=bucket)
  100. logging.debug(f"create bucket {bucket} ********")
  101. r = self.conn.upload_fileobj(BytesIO(binary), bucket, fnm)
  102. return r
  103. def get_properties(self, bucket, key):
  104. return {}
  105. def list(self, bucket, dir, recursive=True):
  106. return []
  107. @use_prefix_path
  108. @use_default_bucket
  109. def put(self, bucket, fnm, binary):
  110. logging.debug(f"bucket name {bucket}; filename :{fnm}:")
  111. for _ in range(1):
  112. try:
  113. if not self.bucket_exists(bucket):
  114. self.conn.create_bucket(Bucket=bucket)
  115. logging.info(f"create bucket {bucket} ********")
  116. r = self.conn.upload_fileobj(BytesIO(binary), bucket, fnm)
  117. return r
  118. except Exception:
  119. logging.exception(f"Fail put {bucket}/{fnm}")
  120. self.__open__()
  121. time.sleep(1)
  122. @use_prefix_path
  123. @use_default_bucket
  124. def rm(self, bucket, fnm):
  125. try:
  126. self.conn.delete_object(Bucket=bucket, Key=fnm)
  127. except Exception:
  128. logging.exception(f"Fail rm {bucket}/{fnm}")
  129. @use_prefix_path
  130. @use_default_bucket
  131. def get(self, bucket, fnm):
  132. for _ in range(1):
  133. try:
  134. r = self.conn.get_object(Bucket=bucket, Key=fnm)
  135. object_data = r['Body'].read()
  136. return object_data
  137. except Exception:
  138. logging.exception(f"fail get {bucket}/{fnm}")
  139. self.__open__()
  140. time.sleep(1)
  141. return
  142. @use_prefix_path
  143. @use_default_bucket
  144. def obj_exist(self, bucket, fnm):
  145. try:
  146. if self.conn.head_object(Bucket=bucket, Key=fnm):
  147. return True
  148. except ClientError as e:
  149. if e.response['Error']['Code'] == '404':
  150. return False
  151. else:
  152. raise
  153. @use_prefix_path
  154. @use_default_bucket
  155. def get_presigned_url(self, bucket, fnm, expires):
  156. for _ in range(10):
  157. try:
  158. r = self.conn.generate_presigned_url('get_object',
  159. Params={'Bucket': bucket,
  160. 'Key': fnm},
  161. ExpiresIn=expires)
  162. return r
  163. except Exception:
  164. logging.exception(f"fail get url {bucket}/{fnm}")
  165. self.__open__()
  166. time.sleep(1)
  167. return