您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

init_data.py 8.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import logging
  17. import base64
  18. import json
  19. import os
  20. import time
  21. import uuid
  22. from copy import deepcopy
  23. from api.db import LLMType, UserTenantRole
  24. from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM, TenantLLM
  25. from api.db.services import UserService
  26. from api.db.services.canvas_service import CanvasTemplateService
  27. from api.db.services.document_service import DocumentService
  28. from api.db.services.knowledgebase_service import KnowledgebaseService
  29. from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
  30. from api.db.services.user_service import TenantService, UserTenantService
  31. from api import settings
  32. from api.utils.file_utils import get_project_base_directory
  33. def encode_to_base64(input_string):
  34. base64_encoded = base64.b64encode(input_string.encode('utf-8'))
  35. return base64_encoded.decode('utf-8')
  36. def init_superuser():
  37. user_info = {
  38. "id": uuid.uuid1().hex,
  39. "password": encode_to_base64("admin"),
  40. "nickname": "admin",
  41. "is_superuser": True,
  42. "email": "admin@ragflow.io",
  43. "creator": "system",
  44. "status": "1",
  45. }
  46. tenant = {
  47. "id": user_info["id"],
  48. "name": user_info["nickname"] + "‘s Kingdom",
  49. "llm_id": settings.CHAT_MDL,
  50. "embd_id": settings.EMBEDDING_MDL,
  51. "asr_id": settings.ASR_MDL,
  52. "parser_ids": settings.PARSERS,
  53. "img2txt_id": settings.IMAGE2TEXT_MDL
  54. }
  55. usr_tenant = {
  56. "tenant_id": user_info["id"],
  57. "user_id": user_info["id"],
  58. "invited_by": user_info["id"],
  59. "role": UserTenantRole.OWNER
  60. }
  61. user_id = user_info
  62. tenant_llm = []
  63. seen = set()
  64. factory_configs = []
  65. for factory_config in [
  66. settings.CHAT_CFG["factory"],
  67. settings.EMBEDDING_CFG["factory"],
  68. settings.ASR_CFG["factory"],
  69. settings.IMAGE2TEXT_CFG["factory"],
  70. settings.RERANK_CFG["factory"],
  71. ]:
  72. factory_name = factory_config["factory"]
  73. if factory_name not in seen:
  74. seen.add(factory_name)
  75. factory_configs.append(factory_config)
  76. for factory_config in factory_configs:
  77. for llm in LLMService.query(fid=factory_config["factory"]):
  78. tenant_llm.append(
  79. {
  80. "tenant_id": user_id,
  81. "llm_factory": factory_config["factory"],
  82. "llm_name": llm.llm_name,
  83. "model_type": llm.model_type,
  84. "api_key": factory_config["api_key"],
  85. "api_base": factory_config["base_url"],
  86. "max_tokens": llm.max_tokens if llm.max_tokens else 8192,
  87. }
  88. )
  89. unique = {}
  90. for item in tenant_llm:
  91. key = (item["tenant_id"], item["llm_factory"], item["llm_name"])
  92. if key not in unique:
  93. unique[key] = item
  94. tenant_llm = list(unique.values())
  95. if not UserService.save(**user_info):
  96. logging.error("can't init admin.")
  97. return
  98. TenantService.insert(**tenant)
  99. UserTenantService.insert(**usr_tenant)
  100. TenantLLMService.insert_many(tenant_llm)
  101. logging.info(
  102. "Super user initialized. email: admin@ragflow.io, password: admin. Changing the password after login is strongly recommended.")
  103. chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
  104. msg = chat_mdl.chat(system="", history=[
  105. {"role": "user", "content": "Hello!"}], gen_conf={})
  106. if msg.find("ERROR: ") == 0:
  107. logging.error(
  108. "'{}' doesn't work. {}".format(
  109. tenant["llm_id"],
  110. msg))
  111. embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
  112. v, c = embd_mdl.encode(["Hello!"])
  113. if c == 0:
  114. logging.error(
  115. "'{}' doesn't work!".format(
  116. tenant["embd_id"]))
  117. def init_llm_factory():
  118. try:
  119. LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
  120. LLMService.filter_delete([(LLM.fid == "cohere")])
  121. LLMFactoriesService.filter_delete([LLMFactories.name == "cohere"])
  122. except Exception:
  123. pass
  124. factory_llm_infos = settings.FACTORY_LLM_INFOS
  125. for factory_llm_info in factory_llm_infos:
  126. info = deepcopy(factory_llm_info)
  127. llm_infos = info.pop("llm")
  128. try:
  129. LLMFactoriesService.save(**info)
  130. except Exception:
  131. pass
  132. LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
  133. for llm_info in llm_infos:
  134. llm_info["fid"] = factory_llm_info["name"]
  135. try:
  136. LLMService.save(**llm_info)
  137. except Exception:
  138. pass
  139. LLMFactoriesService.filter_delete([(LLMFactories.name == "Local") | (LLMFactories.name == "novita.ai")])
  140. LLMService.filter_delete([LLM.fid == "Local"])
  141. LLMService.filter_delete([LLM.llm_name == "qwen-vl-max"])
  142. LLMService.filter_delete([LLM.fid == "Moonshot", LLM.llm_name == "flag-embedding"])
  143. TenantLLMService.filter_delete([TenantLLM.llm_factory == "Moonshot", TenantLLM.llm_name == "flag-embedding"])
  144. LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"])
  145. LLMService.filter_delete([LLMService.model.fid == "QAnything"])
  146. TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
  147. TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "cohere"], {"llm_factory": "Cohere"})
  148. TenantService.filter_update([1 == 1], {
  149. "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag"})
  150. ## insert openai two embedding models to the current openai user.
  151. # print("Start to insert 2 OpenAI embedding models...")
  152. tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
  153. for tid in tenant_ids:
  154. for row in TenantLLMService.query(llm_factory="OpenAI", tenant_id=tid):
  155. row = row.to_dict()
  156. row["model_type"] = LLMType.EMBEDDING.value
  157. row["llm_name"] = "text-embedding-3-small"
  158. row["used_tokens"] = 0
  159. try:
  160. TenantLLMService.save(**row)
  161. row = deepcopy(row)
  162. row["llm_name"] = "text-embedding-3-large"
  163. TenantLLMService.save(**row)
  164. except Exception:
  165. pass
  166. break
  167. for kb_id in KnowledgebaseService.get_all_ids():
  168. KnowledgebaseService.update_document_number_in_init(kb_id=kb_id, doc_num=DocumentService.get_kb_doc_count(kb_id))
  169. def add_graph_templates():
  170. dir = os.path.join(get_project_base_directory(), "agent", "templates")
  171. CanvasTemplateService.filter_delete([1 == 1])
  172. if not os.path.exists(dir):
  173. logging.warning("Missing agent templates!")
  174. return
  175. for fnm in os.listdir(dir):
  176. try:
  177. cnvs = json.load(open(os.path.join(dir, fnm), "r",encoding="utf-8"))
  178. try:
  179. CanvasTemplateService.save(**cnvs)
  180. except Exception:
  181. CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
  182. except Exception:
  183. logging.exception("Add agent templates error: ")
  184. def init_web_data():
  185. start_time = time.time()
  186. init_llm_factory()
  187. # if not UserService.get_all().count():
  188. # init_superuser()
  189. add_graph_templates()
  190. logging.info("init web data success:{}".format(time.time() - start_time))
  191. if __name__ == '__main__':
  192. init_web_db()
  193. init_web_data()