Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import json
  17. import os
  18. import secrets
  19. from datetime import date
  20. from enum import Enum, IntEnum
  21. import rag.utils
  22. import rag.utils.es_conn
  23. import rag.utils.infinity_conn
  24. import rag.utils.opensearch_conn
  25. from api.constants import RAG_FLOW_SERVICE_NAME
  26. from api.utils import decrypt_database_config, get_base_config
  27. from api.utils.file_utils import get_project_base_directory
  28. from rag.nlp import search
  29. LIGHTEN = int(os.environ.get("LIGHTEN", "0"))
  30. LLM = None
  31. LLM_FACTORY = None
  32. LLM_BASE_URL = None
  33. CHAT_MDL = ""
  34. EMBEDDING_MDL = ""
  35. RERANK_MDL = ""
  36. ASR_MDL = ""
  37. IMAGE2TEXT_MDL = ""
  38. CHAT_CFG = ""
  39. EMBEDDING_CFG = ""
  40. RERANK_CFG = ""
  41. ASR_CFG = ""
  42. IMAGE2TEXT_CFG = ""
  43. API_KEY = None
  44. PARSERS = None
  45. HOST_IP = None
  46. HOST_PORT = None
  47. SECRET_KEY = None
  48. FACTORY_LLM_INFOS = None
  49. DATABASE_TYPE = os.getenv("DB_TYPE", "mysql")
  50. DATABASE = decrypt_database_config(name=DATABASE_TYPE)
  51. # authentication
  52. AUTHENTICATION_CONF = None
  53. # client
  54. CLIENT_AUTHENTICATION = None
  55. HTTP_APP_KEY = None
  56. GITHUB_OAUTH = None
  57. FEISHU_OAUTH = None
  58. OAUTH_CONFIG = None
  59. DOC_ENGINE = None
  60. docStoreConn = None
  61. retrievaler = None
  62. kg_retrievaler = None
  63. # user registration switch
  64. REGISTER_ENABLED = 1
  65. # sandbox-executor-manager
  66. SANDBOX_ENABLED = 0
  67. SANDBOX_HOST = None
  68. STRONG_TEST_COUNT = int(os.environ.get("STRONG_TEST_COUNT", "8"))
  69. BUILTIN_EMBEDDING_MODELS = ["BAAI/bge-large-zh-v1.5@BAAI", "maidalun1020/bce-embedding-base_v1@Youdao"]
  70. SMTP_CONF = None
  71. MAIL_SERVER = ""
  72. MAIL_PORT = 000
  73. MAIL_USE_SSL= True
  74. MAIL_USE_TLS = False
  75. MAIL_USERNAME = ""
  76. MAIL_PASSWORD = ""
  77. MAIL_DEFAULT_SENDER = ()
  78. MAIL_FRONTEND_URL = ""
  79. def get_or_create_secret_key():
  80. secret_key = os.environ.get("RAGFLOW_SECRET_KEY")
  81. if secret_key and len(secret_key) >= 32:
  82. return secret_key
  83. # Check if there's a configured secret key
  84. configured_key = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("secret_key")
  85. if configured_key and configured_key != str(date.today()) and len(configured_key) >= 32:
  86. return configured_key
  87. # Generate a new secure key and warn about it
  88. import logging
  89. new_key = secrets.token_hex(32)
  90. logging.warning(f"SECURITY WARNING: Using auto-generated SECRET_KEY. Generated key: {new_key}")
  91. return new_key
  92. def init_settings():
  93. global LLM, LLM_FACTORY, LLM_BASE_URL, LIGHTEN, DATABASE_TYPE, DATABASE, FACTORY_LLM_INFOS, REGISTER_ENABLED
  94. LIGHTEN = int(os.environ.get("LIGHTEN", "0"))
  95. DATABASE_TYPE = os.getenv("DB_TYPE", "mysql")
  96. DATABASE = decrypt_database_config(name=DATABASE_TYPE)
  97. LLM = get_base_config("user_default_llm", {}) or {}
  98. LLM_DEFAULT_MODELS = LLM.get("default_models", {}) or {}
  99. LLM_FACTORY = LLM.get("factory", "") or ""
  100. LLM_BASE_URL = LLM.get("base_url", "") or ""
  101. try:
  102. REGISTER_ENABLED = int(os.environ.get("REGISTER_ENABLED", "1"))
  103. except Exception:
  104. pass
  105. try:
  106. with open(os.path.join(get_project_base_directory(), "conf", "llm_factories.json"), "r") as f:
  107. FACTORY_LLM_INFOS = json.load(f)["factory_llm_infos"]
  108. except Exception:
  109. FACTORY_LLM_INFOS = []
  110. global CHAT_MDL, EMBEDDING_MDL, RERANK_MDL, ASR_MDL, IMAGE2TEXT_MDL
  111. global CHAT_CFG, EMBEDDING_CFG, RERANK_CFG, ASR_CFG, IMAGE2TEXT_CFG
  112. if not LIGHTEN:
  113. EMBEDDING_MDL = BUILTIN_EMBEDDING_MODELS[0]
  114. global API_KEY, PARSERS, HOST_IP, HOST_PORT, SECRET_KEY
  115. API_KEY = LLM.get("api_key")
  116. PARSERS = LLM.get(
  117. "parsers", "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag"
  118. )
  119. chat_entry = _parse_model_entry(LLM_DEFAULT_MODELS.get("chat_model", CHAT_MDL))
  120. embedding_entry = _parse_model_entry(LLM_DEFAULT_MODELS.get("embedding_model", EMBEDDING_MDL))
  121. rerank_entry = _parse_model_entry(LLM_DEFAULT_MODELS.get("rerank_model", RERANK_MDL))
  122. asr_entry = _parse_model_entry(LLM_DEFAULT_MODELS.get("asr_model", ASR_MDL))
  123. image2text_entry = _parse_model_entry(LLM_DEFAULT_MODELS.get("image2text_model", IMAGE2TEXT_MDL))
  124. CHAT_CFG = _resolve_per_model_config(chat_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
  125. EMBEDDING_CFG = _resolve_per_model_config(embedding_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
  126. RERANK_CFG = _resolve_per_model_config(rerank_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
  127. ASR_CFG = _resolve_per_model_config(asr_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
  128. IMAGE2TEXT_CFG = _resolve_per_model_config(image2text_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL)
  129. CHAT_MDL = CHAT_CFG.get("model", "") or ""
  130. EMBEDDING_MDL = EMBEDDING_CFG.get("model", "") or ""
  131. RERANK_MDL = RERANK_CFG.get("model", "") or ""
  132. ASR_MDL = ASR_CFG.get("model", "") or ""
  133. IMAGE2TEXT_MDL = IMAGE2TEXT_CFG.get("model", "") or ""
  134. HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
  135. HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
  136. SECRET_KEY = get_or_create_secret_key()
  137. global AUTHENTICATION_CONF, CLIENT_AUTHENTICATION, HTTP_APP_KEY, GITHUB_OAUTH, FEISHU_OAUTH, OAUTH_CONFIG
  138. # authentication
  139. AUTHENTICATION_CONF = get_base_config("authentication", {})
  140. # client
  141. CLIENT_AUTHENTICATION = AUTHENTICATION_CONF.get("client", {}).get("switch", False)
  142. HTTP_APP_KEY = AUTHENTICATION_CONF.get("client", {}).get("http_app_key")
  143. GITHUB_OAUTH = get_base_config("oauth", {}).get("github")
  144. FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
  145. OAUTH_CONFIG = get_base_config("oauth", {})
  146. global DOC_ENGINE, docStoreConn, retrievaler, kg_retrievaler
  147. DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
  148. # DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
  149. lower_case_doc_engine = DOC_ENGINE.lower()
  150. if lower_case_doc_engine == "elasticsearch":
  151. docStoreConn = rag.utils.es_conn.ESConnection()
  152. elif lower_case_doc_engine == "infinity":
  153. docStoreConn = rag.utils.infinity_conn.InfinityConnection()
  154. elif lower_case_doc_engine == "opensearch":
  155. docStoreConn = rag.utils.opensearch_conn.OSConnection()
  156. else:
  157. raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
  158. retrievaler = search.Dealer(docStoreConn)
  159. from graphrag import search as kg_search
  160. kg_retrievaler = kg_search.KGSearch(docStoreConn)
  161. if int(os.environ.get("SANDBOX_ENABLED", "0")):
  162. global SANDBOX_HOST
  163. SANDBOX_HOST = os.environ.get("SANDBOX_HOST", "sandbox-executor-manager")
  164. global SMTP_CONF, MAIL_SERVER, MAIL_PORT, MAIL_USE_SSL, MAIL_USE_TLS
  165. global MAIL_USERNAME, MAIL_PASSWORD, MAIL_DEFAULT_SENDER, MAIL_FRONTEND_URL
  166. SMTP_CONF = get_base_config("smtp", {})
  167. MAIL_SERVER = SMTP_CONF.get("mail_server", "")
  168. MAIL_PORT = SMTP_CONF.get("mail_port", 000)
  169. MAIL_USE_SSL = SMTP_CONF.get("mail_use_ssl", True)
  170. MAIL_USE_TLS = SMTP_CONF.get("mail_use_tls", False)
  171. MAIL_USERNAME = SMTP_CONF.get("mail_username", "")
  172. MAIL_PASSWORD = SMTP_CONF.get("mail_password", "")
  173. mail_default_sender = SMTP_CONF.get("mail_default_sender", [])
  174. if mail_default_sender and len(mail_default_sender) >= 2:
  175. MAIL_DEFAULT_SENDER = (mail_default_sender[0], mail_default_sender[1])
  176. MAIL_FRONTEND_URL = SMTP_CONF.get("mail_frontend_url", "")
  177. class CustomEnum(Enum):
  178. @classmethod
  179. def valid(cls, value):
  180. try:
  181. cls(value)
  182. return True
  183. except BaseException:
  184. return False
  185. @classmethod
  186. def values(cls):
  187. return [member.value for member in cls.__members__.values()]
  188. @classmethod
  189. def names(cls):
  190. return [member.name for member in cls.__members__.values()]
  191. class RetCode(IntEnum, CustomEnum):
  192. SUCCESS = 0
  193. NOT_EFFECTIVE = 10
  194. EXCEPTION_ERROR = 100
  195. ARGUMENT_ERROR = 101
  196. DATA_ERROR = 102
  197. OPERATING_ERROR = 103
  198. CONNECTION_ERROR = 105
  199. RUNNING = 106
  200. PERMISSION_ERROR = 108
  201. AUTHENTICATION_ERROR = 109
  202. UNAUTHORIZED = 401
  203. SERVER_ERROR = 500
  204. FORBIDDEN = 403
  205. NOT_FOUND = 404
  206. def _parse_model_entry(entry):
  207. if isinstance(entry, str):
  208. return {"name": entry, "factory": None, "api_key": None, "base_url": None}
  209. if isinstance(entry, dict):
  210. name = entry.get("name") or entry.get("model") or ""
  211. return {
  212. "name": name,
  213. "factory": entry.get("factory"),
  214. "api_key": entry.get("api_key"),
  215. "base_url": entry.get("base_url"),
  216. }
  217. return {"name": "", "factory": None, "api_key": None, "base_url": None}
  218. def _resolve_per_model_config(entry_dict, backup_factory, backup_api_key, backup_base_url):
  219. name = (entry_dict.get("name") or "").strip()
  220. m_factory = entry_dict.get("factory") or backup_factory or ""
  221. m_api_key = entry_dict.get("api_key") or backup_api_key or ""
  222. m_base_url = entry_dict.get("base_url") or backup_base_url or ""
  223. if name and "@" not in name and m_factory:
  224. name = f"{name}@{m_factory}"
  225. return {
  226. "model": name,
  227. "factory": m_factory,
  228. "api_key": m_api_key,
  229. "base_url": m_base_url,
  230. }