You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

init_data.py 7.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import time
  17. import uuid
  18. from api.db import LLMType
  19. from api.db.db_models import init_database_tables as init_web_db
  20. from api.db.services import UserService
  21. from api.db.services.llm_service import LLMFactoriesService, LLMService
  22. def init_superuser():
  23. user_info = {
  24. "id": uuid.uuid1().hex,
  25. "password": "admin",
  26. "nickname": "admin",
  27. "is_superuser": True,
  28. "email": "kai.hu@infiniflow.org",
  29. "creator": "system",
  30. "status": "1",
  31. }
  32. UserService.save(**user_info)
  33. def init_llm_factory():
  34. factory_infos = [{
  35. "name": "OpenAI",
  36. "logo": "",
  37. "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
  38. "status": "1",
  39. },{
  40. "name": "通义千问",
  41. "logo": "",
  42. "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
  43. "status": "1",
  44. },{
  45. "name": "Infiniflow",
  46. "logo": "",
  47. "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
  48. "status": "1",
  49. },{
  50. "name": "智普AI",
  51. "logo": "",
  52. "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
  53. "status": "1",
  54. },{
  55. "name": "文心一言",
  56. "logo": "",
  57. "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
  58. "status": "1",
  59. },
  60. ]
  61. llm_infos = [
  62. # ---------------------- OpenAI ------------------------
  63. {
  64. "fid": factory_infos[0]["name"],
  65. "llm_name": "gpt-3.5-turbo",
  66. "tags": "LLM,CHAT,4K",
  67. "max_tokens": 4096,
  68. "model_type": LLMType.CHAT.value
  69. },{
  70. "fid": factory_infos[0]["name"],
  71. "llm_name": "gpt-3.5-turbo-16k-0613",
  72. "tags": "LLM,CHAT,16k",
  73. "max_tokens": 16385,
  74. "model_type": LLMType.CHAT.value
  75. },{
  76. "fid": factory_infos[0]["name"],
  77. "llm_name": "text-embedding-ada-002",
  78. "tags": "TEXT EMBEDDING,8K",
  79. "max_tokens": 8191,
  80. "model_type": LLMType.EMBEDDING.value
  81. },{
  82. "fid": factory_infos[0]["name"],
  83. "llm_name": "whisper-1",
  84. "tags": "SPEECH2TEXT",
  85. "max_tokens": 25*1024*1024,
  86. "model_type": LLMType.SPEECH2TEXT.value
  87. },{
  88. "fid": factory_infos[0]["name"],
  89. "llm_name": "gpt-4",
  90. "tags": "LLM,CHAT,8K",
  91. "max_tokens": 8191,
  92. "model_type": LLMType.CHAT.value
  93. },{
  94. "fid": factory_infos[0]["name"],
  95. "llm_name": "gpt-4-32k",
  96. "tags": "LLM,CHAT,32K",
  97. "max_tokens": 32768,
  98. "model_type": LLMType.CHAT.value
  99. },{
  100. "fid": factory_infos[0]["name"],
  101. "llm_name": "gpt-4-vision-preview",
  102. "tags": "LLM,CHAT,IMAGE2TEXT",
  103. "max_tokens": 765,
  104. "model_type": LLMType.IMAGE2TEXT.value
  105. },
  106. # ----------------------- Qwen -----------------------
  107. {
  108. "fid": factory_infos[1]["name"],
  109. "llm_name": "qwen-turbo",
  110. "tags": "LLM,CHAT,8K",
  111. "max_tokens": 8191,
  112. "model_type": LLMType.CHAT.value
  113. },{
  114. "fid": factory_infos[1]["name"],
  115. "llm_name": "qwen-plus",
  116. "tags": "LLM,CHAT,32K",
  117. "max_tokens": 32768,
  118. "model_type": LLMType.CHAT.value
  119. },{
  120. "fid": factory_infos[1]["name"],
  121. "llm_name": "text-embedding-v2",
  122. "tags": "TEXT EMBEDDING,2K",
  123. "max_tokens": 2048,
  124. "model_type": LLMType.EMBEDDING.value
  125. },{
  126. "fid": factory_infos[1]["name"],
  127. "llm_name": "paraformer-realtime-8k-v1",
  128. "tags": "SPEECH2TEXT",
  129. "max_tokens": 25*1024*1024,
  130. "model_type": LLMType.SPEECH2TEXT.value
  131. },{
  132. "fid": factory_infos[1]["name"],
  133. "llm_name": "qwen_vl_chat_v1",
  134. "tags": "LLM,CHAT,IMAGE2TEXT",
  135. "max_tokens": 765,
  136. "model_type": LLMType.IMAGE2TEXT.value
  137. },
  138. # ----------------------- Infiniflow -----------------------
  139. {
  140. "fid": factory_infos[2]["name"],
  141. "llm_name": "gpt-3.5-turbo",
  142. "tags": "LLM,CHAT,4K",
  143. "max_tokens": 4096,
  144. "model_type": LLMType.CHAT.value
  145. },{
  146. "fid": factory_infos[2]["name"],
  147. "llm_name": "text-embedding-ada-002",
  148. "tags": "TEXT EMBEDDING,8K",
  149. "max_tokens": 8191,
  150. "model_type": LLMType.EMBEDDING.value
  151. },{
  152. "fid": factory_infos[2]["name"],
  153. "llm_name": "whisper-1",
  154. "tags": "SPEECH2TEXT",
  155. "max_tokens": 25*1024*1024,
  156. "model_type": LLMType.SPEECH2TEXT.value
  157. },{
  158. "fid": factory_infos[2]["name"],
  159. "llm_name": "gpt-4-vision-preview",
  160. "tags": "LLM,CHAT,IMAGE2TEXT",
  161. "max_tokens": 765,
  162. "model_type": LLMType.IMAGE2TEXT.value
  163. },
  164. # ---------------------- ZhipuAI ----------------------
  165. {
  166. "fid": factory_infos[3]["name"],
  167. "llm_name": "glm-3-turbo",
  168. "tags": "LLM,CHAT,",
  169. "max_tokens": 128 * 1000,
  170. "model_type": LLMType.CHAT.value
  171. }, {
  172. "fid": factory_infos[3]["name"],
  173. "llm_name": "glm-4",
  174. "tags": "LLM,CHAT,",
  175. "max_tokens": 128 * 1000,
  176. "model_type": LLMType.CHAT.value
  177. }, {
  178. "fid": factory_infos[3]["name"],
  179. "llm_name": "glm-4v",
  180. "tags": "LLM,CHAT,IMAGE2TEXT",
  181. "max_tokens": 2000,
  182. "model_type": LLMType.IMAGE2TEXT.value
  183. },
  184. {
  185. "fid": factory_infos[3]["name"],
  186. "llm_name": "embedding-2",
  187. "tags": "TEXT EMBEDDING",
  188. "max_tokens": 512,
  189. "model_type": LLMType.SPEECH2TEXT.value
  190. },
  191. ]
  192. for info in factory_infos:
  193. LLMFactoriesService.save(**info)
  194. for info in llm_infos:
  195. LLMService.save(**info)
  196. def init_web_data():
  197. start_time = time.time()
  198. if not UserService.get_all().count():
  199. init_superuser()
  200. if not LLMService.get_all().count():init_llm_factory()
  201. print("init web data success:{}".format(time.time() - start_time))
  202. if __name__ == '__main__':
  203. init_web_db()
  204. init_web_data()