您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

db_models.py 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import inspect
  17. import os
  18. import sys
  19. import typing
  20. import operator
  21. from functools import wraps
  22. from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
  23. from flask_login import UserMixin
  24. from peewee import (
  25. BigAutoField, BigIntegerField, BooleanField, CharField,
  26. CompositeKey, Insert, IntegerField, TextField, FloatField, DateTimeField,
  27. Field, Model, Metadata
  28. )
  29. from playhouse.pool import PooledMySQLDatabase
  30. from api.db import SerializedType, ParserType
  31. from api.settings import DATABASE, stat_logger, SECRET_KEY
  32. from api.utils.log_utils import getLogger
  33. from api import utils
  34. LOGGER = getLogger()
  35. def singleton(cls, *args, **kw):
  36. instances = {}
  37. def _singleton():
  38. key = str(cls) + str(os.getpid())
  39. if key not in instances:
  40. instances[key] = cls(*args, **kw)
  41. return instances[key]
  42. return _singleton
  43. CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField}
  44. AUTO_DATE_TIMESTAMP_FIELD_PREFIX = {"create", "start", "end", "update", "read_access", "write_access"}
  45. class LongTextField(TextField):
  46. field_type = 'LONGTEXT'
  47. class JSONField(LongTextField):
  48. default_value = {}
  49. def __init__(self, object_hook=None, object_pairs_hook=None, **kwargs):
  50. self._object_hook = object_hook
  51. self._object_pairs_hook = object_pairs_hook
  52. super().__init__(**kwargs)
  53. def db_value(self, value):
  54. if value is None:
  55. value = self.default_value
  56. return utils.json_dumps(value)
  57. def python_value(self, value):
  58. if not value:
  59. return self.default_value
  60. return utils.json_loads(value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook)
  61. class ListField(JSONField):
  62. default_value = []
  63. class SerializedField(LongTextField):
  64. def __init__(self, serialized_type=SerializedType.PICKLE, object_hook=None, object_pairs_hook=None, **kwargs):
  65. self._serialized_type = serialized_type
  66. self._object_hook = object_hook
  67. self._object_pairs_hook = object_pairs_hook
  68. super().__init__(**kwargs)
  69. def db_value(self, value):
  70. if self._serialized_type == SerializedType.PICKLE:
  71. return utils.serialize_b64(value, to_str=True)
  72. elif self._serialized_type == SerializedType.JSON:
  73. if value is None:
  74. return None
  75. return utils.json_dumps(value, with_type=True)
  76. else:
  77. raise ValueError(f"the serialized type {self._serialized_type} is not supported")
  78. def python_value(self, value):
  79. if self._serialized_type == SerializedType.PICKLE:
  80. return utils.deserialize_b64(value)
  81. elif self._serialized_type == SerializedType.JSON:
  82. if value is None:
  83. return {}
  84. return utils.json_loads(value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook)
  85. else:
  86. raise ValueError(f"the serialized type {self._serialized_type} is not supported")
  87. def is_continuous_field(cls: typing.Type) -> bool:
  88. if cls in CONTINUOUS_FIELD_TYPE:
  89. return True
  90. for p in cls.__bases__:
  91. if p in CONTINUOUS_FIELD_TYPE:
  92. return True
  93. elif p != Field and p != object:
  94. if is_continuous_field(p):
  95. return True
  96. else:
  97. return False
  98. def auto_date_timestamp_field():
  99. return {f"{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX}
  100. def auto_date_timestamp_db_field():
  101. return {f"f_{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX}
  102. def remove_field_name_prefix(field_name):
  103. return field_name[2:] if field_name.startswith('f_') else field_name
  104. class BaseModel(Model):
  105. create_time = BigIntegerField(null=True)
  106. create_date = DateTimeField(null=True)
  107. update_time = BigIntegerField(null=True)
  108. update_date = DateTimeField(null=True)
  109. def to_json(self):
  110. # This function is obsolete
  111. return self.to_dict()
  112. def to_dict(self):
  113. return self.__dict__['__data__']
  114. def to_human_model_dict(self, only_primary_with: list = None):
  115. model_dict = self.__dict__['__data__']
  116. if not only_primary_with:
  117. return {remove_field_name_prefix(k): v for k, v in model_dict.items()}
  118. human_model_dict = {}
  119. for k in self._meta.primary_key.field_names:
  120. human_model_dict[remove_field_name_prefix(k)] = model_dict[k]
  121. for k in only_primary_with:
  122. human_model_dict[k] = model_dict[f'f_{k}']
  123. return human_model_dict
  124. @property
  125. def meta(self) -> Metadata:
  126. return self._meta
  127. @classmethod
  128. def get_primary_keys_name(cls):
  129. return cls._meta.primary_key.field_names if isinstance(cls._meta.primary_key, CompositeKey) else [
  130. cls._meta.primary_key.name]
  131. @classmethod
  132. def getter_by(cls, attr):
  133. return operator.attrgetter(attr)(cls)
  134. @classmethod
  135. def query(cls, reverse=None, order_by=None, **kwargs):
  136. filters = []
  137. for f_n, f_v in kwargs.items():
  138. attr_name = '%s' % f_n
  139. if not hasattr(cls, attr_name) or f_v is None:
  140. continue
  141. if type(f_v) in {list, set}:
  142. f_v = list(f_v)
  143. if is_continuous_field(type(getattr(cls, attr_name))):
  144. if len(f_v) == 2:
  145. for i, v in enumerate(f_v):
  146. if isinstance(v, str) and f_n in auto_date_timestamp_field():
  147. # time type: %Y-%m-%d %H:%M:%S
  148. f_v[i] = utils.date_string_to_timestamp(v)
  149. lt_value = f_v[0]
  150. gt_value = f_v[1]
  151. if lt_value is not None and gt_value is not None:
  152. filters.append(cls.getter_by(attr_name).between(lt_value, gt_value))
  153. elif lt_value is not None:
  154. filters.append(operator.attrgetter(attr_name)(cls) >= lt_value)
  155. elif gt_value is not None:
  156. filters.append(operator.attrgetter(attr_name)(cls) <= gt_value)
  157. else:
  158. filters.append(operator.attrgetter(attr_name)(cls) << f_v)
  159. else:
  160. filters.append(operator.attrgetter(attr_name)(cls) == f_v)
  161. if filters:
  162. query_records = cls.select().where(*filters)
  163. if reverse is not None:
  164. if not order_by or not hasattr(cls, f"{order_by}"):
  165. order_by = "create_time"
  166. if reverse is True:
  167. query_records = query_records.order_by(cls.getter_by(f"{order_by}").desc())
  168. elif reverse is False:
  169. query_records = query_records.order_by(cls.getter_by(f"{order_by}").asc())
  170. return [query_record for query_record in query_records]
  171. else:
  172. return []
  173. @classmethod
  174. def insert(cls, __data=None, **insert):
  175. if isinstance(__data, dict) and __data:
  176. __data[cls._meta.combined["create_time"]] = utils.current_timestamp()
  177. if insert:
  178. insert["create_time"] = utils.current_timestamp()
  179. return super().insert(__data, **insert)
  180. # update and insert will call this method
  181. @classmethod
  182. def _normalize_data(cls, data, kwargs):
  183. normalized = super()._normalize_data(data, kwargs)
  184. if not normalized:
  185. return {}
  186. normalized[cls._meta.combined["update_time"]] = utils.current_timestamp()
  187. for f_n in AUTO_DATE_TIMESTAMP_FIELD_PREFIX:
  188. if {f"{f_n}_time", f"{f_n}_date"}.issubset(cls._meta.combined.keys()) and \
  189. cls._meta.combined[f"{f_n}_time"] in normalized and \
  190. normalized[cls._meta.combined[f"{f_n}_time"]] is not None:
  191. normalized[cls._meta.combined[f"{f_n}_date"]] = utils.timestamp_to_date(
  192. normalized[cls._meta.combined[f"{f_n}_time"]])
  193. return normalized
  194. class JsonSerializedField(SerializedField):
  195. def __init__(self, object_hook=utils.from_dict_hook, object_pairs_hook=None, **kwargs):
  196. super(JsonSerializedField, self).__init__(serialized_type=SerializedType.JSON, object_hook=object_hook,
  197. object_pairs_hook=object_pairs_hook, **kwargs)
  198. @singleton
  199. class BaseDataBase:
  200. def __init__(self):
  201. database_config = DATABASE.copy()
  202. db_name = database_config.pop("name")
  203. self.database_connection = PooledMySQLDatabase(db_name, **database_config)
  204. stat_logger.info('init mysql database on cluster mode successfully')
  205. class DatabaseLock:
  206. def __init__(self, lock_name, timeout=10, db=None):
  207. self.lock_name = lock_name
  208. self.timeout = int(timeout)
  209. self.db = db if db else DB
  210. def lock(self):
  211. # SQL parameters only support %s format placeholders
  212. cursor = self.db.execute_sql("SELECT GET_LOCK(%s, %s)", (self.lock_name, self.timeout))
  213. ret = cursor.fetchone()
  214. if ret[0] == 0:
  215. raise Exception(f'acquire mysql lock {self.lock_name} timeout')
  216. elif ret[0] == 1:
  217. return True
  218. else:
  219. raise Exception(f'failed to acquire lock {self.lock_name}')
  220. def unlock(self):
  221. cursor = self.db.execute_sql("SELECT RELEASE_LOCK(%s)", (self.lock_name,))
  222. ret = cursor.fetchone()
  223. if ret[0] == 0:
  224. raise Exception(f'mysql lock {self.lock_name} was not established by this thread')
  225. elif ret[0] == 1:
  226. return True
  227. else:
  228. raise Exception(f'mysql lock {self.lock_name} does not exist')
  229. def __enter__(self):
  230. if isinstance(self.db, PooledMySQLDatabase):
  231. self.lock()
  232. return self
  233. def __exit__(self, exc_type, exc_val, exc_tb):
  234. if isinstance(self.db, PooledMySQLDatabase):
  235. self.unlock()
  236. def __call__(self, func):
  237. @wraps(func)
  238. def magic(*args, **kwargs):
  239. with self:
  240. return func(*args, **kwargs)
  241. return magic
  242. DB = BaseDataBase().database_connection
  243. DB.lock = DatabaseLock
  244. def close_connection():
  245. try:
  246. if DB:
  247. DB.close()
  248. except Exception as e:
  249. LOGGER.exception(e)
  250. class DataBaseModel(BaseModel):
  251. class Meta:
  252. database = DB
  253. @DB.connection_context()
  254. def init_database_tables():
  255. members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
  256. table_objs = []
  257. create_failed_list = []
  258. for name, obj in members:
  259. if obj != DataBaseModel and issubclass(obj, DataBaseModel):
  260. table_objs.append(obj)
  261. LOGGER.info(f"start create table {obj.__name__}")
  262. try:
  263. obj.create_table()
  264. LOGGER.info(f"create table success: {obj.__name__}")
  265. except Exception as e:
  266. LOGGER.exception(e)
  267. create_failed_list.append(obj.__name__)
  268. if create_failed_list:
  269. LOGGER.info(f"create tables failed: {create_failed_list}")
  270. raise Exception(f"create tables failed: {create_failed_list}")
  271. def fill_db_model_object(model_object, human_model_dict):
  272. for k, v in human_model_dict.items():
  273. attr_name = '%s' % k
  274. if hasattr(model_object.__class__, attr_name):
  275. setattr(model_object, attr_name, v)
  276. return model_object
  277. class User(DataBaseModel, UserMixin):
  278. id = CharField(max_length=32, primary_key=True)
  279. access_token = CharField(max_length=255, null=True)
  280. nickname = CharField(max_length=100, null=False, help_text="nicky name")
  281. password = CharField(max_length=255, null=True, help_text="password")
  282. email = CharField(max_length=255, null=False, help_text="email", index=True)
  283. avatar = TextField(null=True, help_text="avatar base64 string")
  284. language = CharField(max_length=32, null=True, help_text="English|Chinese", default="Chinese")
  285. color_schema = CharField(max_length=32, null=True, help_text="Bright|Dark", default="Dark")
  286. last_login_time = DateTimeField(null=True)
  287. is_authenticated = CharField(max_length=1, null=False, default="1")
  288. is_active = CharField(max_length=1, null=False, default="1")
  289. is_anonymous = CharField(max_length=1, null=False, default="0")
  290. login_channel = CharField(null=True, help_text="from which user login")
  291. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  292. is_superuser = BooleanField(null=True, help_text="is root", default=False)
  293. def __str__(self):
  294. return self.email
  295. def get_id(self):
  296. jwt = Serializer(secret_key=SECRET_KEY)
  297. return jwt.dumps(str(self.access_token))
  298. class Meta:
  299. db_table = "user"
  300. class Tenant(DataBaseModel):
  301. id = CharField(max_length=32, primary_key=True)
  302. name = CharField(max_length=100, null=True, help_text="Tenant name")
  303. public_key = CharField(max_length=255, null=True)
  304. llm_id = CharField(max_length=128, null=False, help_text="default llm ID")
  305. embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
  306. asr_id = CharField(max_length=128, null=False, help_text="default ASR model ID")
  307. img2txt_id = CharField(max_length=128, null=False, help_text="default image to text model ID")
  308. parser_ids = CharField(max_length=128, null=False, help_text="document processors")
  309. credit = IntegerField(default=512)
  310. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  311. class Meta:
  312. db_table = "tenant"
  313. class UserTenant(DataBaseModel):
  314. id = CharField(max_length=32, primary_key=True)
  315. user_id = CharField(max_length=32, null=False)
  316. tenant_id = CharField(max_length=32, null=False)
  317. role = CharField(max_length=32, null=False, help_text="UserTenantRole")
  318. invited_by = CharField(max_length=32, null=False)
  319. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  320. class Meta:
  321. db_table = "user_tenant"
  322. class InvitationCode(DataBaseModel):
  323. id = CharField(max_length=32, primary_key=True)
  324. code = CharField(max_length=32, null=False)
  325. visit_time = DateTimeField(null=True)
  326. user_id = CharField(max_length=32, null=True)
  327. tenant_id = CharField(max_length=32, null=True)
  328. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  329. class Meta:
  330. db_table = "invitation_code"
  331. class LLMFactories(DataBaseModel):
  332. name = CharField(max_length=128, null=False, help_text="LLM factory name", primary_key=True)
  333. logo = TextField(null=True, help_text="llm logo base64")
  334. tags = CharField(max_length=255, null=False, help_text="LLM, Text Embedding, Image2Text, ASR")
  335. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  336. def __str__(self):
  337. return self.name
  338. class Meta:
  339. db_table = "llm_factories"
  340. class LLM(DataBaseModel):
  341. # LLMs dictionary
  342. llm_name = CharField(max_length=128, null=False, help_text="LLM name", index=True)
  343. model_type = CharField(max_length=128, null=False, help_text="LLM, Text Embedding, Image2Text, ASR")
  344. fid = CharField(max_length=128, null=False, help_text="LLM factory id")
  345. max_tokens = IntegerField(default=0)
  346. tags = CharField(max_length=255, null=False, help_text="LLM, Text Embedding, Image2Text, Chat, 32k...")
  347. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  348. def __str__(self):
  349. return self.llm_name
  350. class Meta:
  351. db_table = "llm"
  352. class TenantLLM(DataBaseModel):
  353. tenant_id = CharField(max_length=32, null=False)
  354. llm_factory = CharField(max_length=128, null=False, help_text="LLM factory name")
  355. model_type = CharField(max_length=128, null=True, help_text="LLM, Text Embedding, Image2Text, ASR")
  356. llm_name = CharField(max_length=128, null=True, help_text="LLM name", default="")
  357. api_key = CharField(max_length=255, null=True, help_text="API KEY")
  358. api_base = CharField(max_length=255, null=True, help_text="API Base")
  359. used_tokens = IntegerField(default=0)
  360. def __str__(self):
  361. return self.llm_name
  362. class Meta:
  363. db_table = "tenant_llm"
  364. primary_key = CompositeKey('tenant_id', 'llm_factory', 'llm_name')
  365. class Knowledgebase(DataBaseModel):
  366. id = CharField(max_length=32, primary_key=True)
  367. avatar = TextField(null=True, help_text="avatar base64 string")
  368. tenant_id = CharField(max_length=32, null=False)
  369. name = CharField(max_length=128, null=False, help_text="KB name", index=True)
  370. description = TextField(null=True, help_text="KB description")
  371. permission = CharField(max_length=16, null=False, help_text="me|team")
  372. created_by = CharField(max_length=32, null=False)
  373. doc_num = IntegerField(default=0)
  374. token_num = IntegerField(default=0)
  375. chunk_num = IntegerField(default=0)
  376. similarity_threshold = FloatField(default=0.2)
  377. vector_similarity_weight = FloatField(default=0.3)
  378. parser_id = CharField(max_length=32, null=False, help_text="default parser ID", default=ParserType.GENERAL.value)
  379. parser_config = JSONField(null=False, default={"from_page":0, "to_page": 100000})
  380. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  381. def __str__(self):
  382. return self.name
  383. class Meta:
  384. db_table = "knowledgebase"
  385. class Document(DataBaseModel):
  386. id = CharField(max_length=32, primary_key=True)
  387. thumbnail = TextField(null=True, help_text="thumbnail base64 string")
  388. kb_id = CharField(max_length=256, null=False, index=True)
  389. parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
  390. parser_config = JSONField(null=False, default={"from_page":0, "to_page": 100000})
  391. source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document from")
  392. type = CharField(max_length=32, null=False, help_text="file extension")
  393. created_by = CharField(max_length=32, null=False, help_text="who created it")
  394. name = CharField(max_length=255, null=True, help_text="file name", index=True)
  395. location = CharField(max_length=255, null=True, help_text="where dose it store")
  396. size = IntegerField(default=0)
  397. token_num = IntegerField(default=0)
  398. chunk_num = IntegerField(default=0)
  399. progress = FloatField(default=0)
  400. progress_msg = CharField(max_length=512, null=True, help_text="process message", default="")
  401. process_begin_at = DateTimeField(null=True)
  402. process_duation = FloatField(default=0)
  403. run = CharField(max_length=1, null=True, help_text="start to run processing or cancel.(1: run it; 2: cancel)", default="0")
  404. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  405. class Meta:
  406. db_table = "document"
  407. class Task(DataBaseModel):
  408. id = CharField(max_length=32, primary_key=True)
  409. doc_id = CharField(max_length=32, null=False, index=True)
  410. from_page = IntegerField(default=0)
  411. to_page = IntegerField(default=-1)
  412. begin_at = DateTimeField(null=True)
  413. process_duation = FloatField(default=0)
  414. progress = FloatField(default=0)
  415. progress_msg = CharField(max_length=255, null=True, help_text="process message", default="")
  416. class Dialog(DataBaseModel):
  417. id = CharField(max_length=32, primary_key=True)
  418. tenant_id = CharField(max_length=32, null=False)
  419. name = CharField(max_length=255, null=True, help_text="dialog application name")
  420. description = TextField(null=True, help_text="Dialog description")
  421. icon = CharField(max_length=16, null=False, help_text="dialog icon")
  422. language = CharField(max_length=32, null=True, default="Chinese", help_text="English|Chinese")
  423. llm_id = CharField(max_length=32, null=False, help_text="default llm ID")
  424. llm_setting_type = CharField(max_length=8, null=False, help_text="Creative|Precise|Evenly|Custom",
  425. default="Creative")
  426. llm_setting = JSONField(null=False, default={"temperature": 0.1, "top_p": 0.3, "frequency_penalty": 0.7,
  427. "presence_penalty": 0.4, "max_tokens": 215})
  428. prompt_type = CharField(max_length=16, null=False, default="simple", help_text="simple|advanced")
  429. prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  430. "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
  431. similarity_threshold = FloatField(default=0.2)
  432. vector_similarity_weight = FloatField(default=0.3)
  433. top_n = IntegerField(default=6)
  434. do_refer = CharField(max_length=1, null=False, help_text="it needs to insert reference index into answer or not", default="1")
  435. kb_ids = JSONField(null=False, default=[])
  436. status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
  437. class Meta:
  438. db_table = "dialog"
  439. # class DialogKb(DataBaseModel):
  440. # dialog_id = CharField(max_length=32, null=False, index=True)
  441. # kb_id = CharField(max_length=32, null=False)
  442. #
  443. # class Meta:
  444. # db_table = "dialog_kb"
  445. # primary_key = CompositeKey('dialog_id', 'kb_id')
  446. class Conversation(DataBaseModel):
  447. id = CharField(max_length=32, primary_key=True)
  448. dialog_id = CharField(max_length=32, null=False, index=True)
  449. name = CharField(max_length=255, null=True, help_text="converastion name")
  450. message = JSONField(null=True)
  451. class Meta:
  452. db_table = "conversation"
  453. """
  454. class Meta:
  455. db_table = 't_pipeline_component_meta'
  456. indexes = (
  457. (('f_model_id', 'f_model_version', 'f_role', 'f_party_id', 'f_component_name'), True),
  458. )
  459. """