Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

db_models.py 36KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import logging
  17. import inspect
  18. import os
  19. import sys
  20. import typing
  21. import operator
  22. from enum import Enum
  23. from functools import wraps
  24. from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
  25. from flask_login import UserMixin
  26. from playhouse.migrate import MySQLMigrator, PostgresqlMigrator, migrate
  27. from peewee import (
  28. BigIntegerField, BooleanField, CharField,
  29. CompositeKey, IntegerField, TextField, FloatField, DateTimeField,
  30. Field, Model, Metadata
  31. )
  32. from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
  33. from api.db import SerializedType, ParserType
  34. from api import settings
  35. from api import utils
  36. def singleton(cls, *args, **kw):
  37. instances = {}
  38. def _singleton():
  39. key = str(cls) + str(os.getpid())
  40. if key not in instances:
  41. instances[key] = cls(*args, **kw)
  42. return instances[key]
  43. return _singleton
  44. CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField}
  45. AUTO_DATE_TIMESTAMP_FIELD_PREFIX = {
  46. "create",
  47. "start",
  48. "end",
  49. "update",
  50. "read_access",
  51. "write_access"}
  52. class TextFieldType(Enum):
  53. MYSQL = 'LONGTEXT'
  54. POSTGRES = 'TEXT'
  55. class LongTextField(TextField):
  56. field_type = TextFieldType[settings.DATABASE_TYPE.upper()].value
  57. class JSONField(LongTextField):
  58. default_value = {}
  59. def __init__(self, object_hook=None, object_pairs_hook=None, **kwargs):
  60. self._object_hook = object_hook
  61. self._object_pairs_hook = object_pairs_hook
  62. super().__init__(**kwargs)
  63. def db_value(self, value):
  64. if value is None:
  65. value = self.default_value
  66. return utils.json_dumps(value)
  67. def python_value(self, value):
  68. if not value:
  69. return self.default_value
  70. return utils.json_loads(
  71. value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook)
  72. class ListField(JSONField):
  73. default_value = []
  74. class SerializedField(LongTextField):
  75. def __init__(self, serialized_type=SerializedType.PICKLE,
  76. object_hook=None, object_pairs_hook=None, **kwargs):
  77. self._serialized_type = serialized_type
  78. self._object_hook = object_hook
  79. self._object_pairs_hook = object_pairs_hook
  80. super().__init__(**kwargs)
  81. def db_value(self, value):
  82. if self._serialized_type == SerializedType.PICKLE:
  83. return utils.serialize_b64(value, to_str=True)
  84. elif self._serialized_type == SerializedType.JSON:
  85. if value is None:
  86. return None
  87. return utils.json_dumps(value, with_type=True)
  88. else:
  89. raise ValueError(
  90. f"the serialized type {self._serialized_type} is not supported")
  91. def python_value(self, value):
  92. if self._serialized_type == SerializedType.PICKLE:
  93. return utils.deserialize_b64(value)
  94. elif self._serialized_type == SerializedType.JSON:
  95. if value is None:
  96. return {}
  97. return utils.json_loads(
  98. value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook)
  99. else:
  100. raise ValueError(
  101. f"the serialized type {self._serialized_type} is not supported")
  102. def is_continuous_field(cls: typing.Type) -> bool:
  103. if cls in CONTINUOUS_FIELD_TYPE:
  104. return True
  105. for p in cls.__bases__:
  106. if p in CONTINUOUS_FIELD_TYPE:
  107. return True
  108. elif p is not Field and p is not object:
  109. if is_continuous_field(p):
  110. return True
  111. else:
  112. return False
  113. def auto_date_timestamp_field():
  114. return {f"{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX}
  115. def auto_date_timestamp_db_field():
  116. return {f"f_{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX}
  117. def remove_field_name_prefix(field_name):
  118. return field_name[2:] if field_name.startswith('f_') else field_name
  119. class BaseModel(Model):
  120. create_time = BigIntegerField(null=True, index=True)
  121. create_date = DateTimeField(null=True, index=True)
  122. update_time = BigIntegerField(null=True, index=True)
  123. update_date = DateTimeField(null=True, index=True)
  124. def to_json(self):
  125. # This function is obsolete
  126. return self.to_dict()
  127. def to_dict(self):
  128. return self.__dict__['__data__']
  129. def to_human_model_dict(self, only_primary_with: list = None):
  130. model_dict = self.__dict__['__data__']
  131. if not only_primary_with:
  132. return {remove_field_name_prefix(
  133. k): v for k, v in model_dict.items()}
  134. human_model_dict = {}
  135. for k in self._meta.primary_key.field_names:
  136. human_model_dict[remove_field_name_prefix(k)] = model_dict[k]
  137. for k in only_primary_with:
  138. human_model_dict[k] = model_dict[f'f_{k}']
  139. return human_model_dict
  140. @property
  141. def meta(self) -> Metadata:
  142. return self._meta
  143. @classmethod
  144. def get_primary_keys_name(cls):
  145. return cls._meta.primary_key.field_names if isinstance(cls._meta.primary_key, CompositeKey) else [
  146. cls._meta.primary_key.name]
  147. @classmethod
  148. def getter_by(cls, attr):
  149. return operator.attrgetter(attr)(cls)
  150. @classmethod
  151. def query(cls, reverse=None, order_by=None, **kwargs):
  152. filters = []
  153. for f_n, f_v in kwargs.items():
  154. attr_name = '%s' % f_n
  155. if not hasattr(cls, attr_name) or f_v is None:
  156. continue
  157. if type(f_v) in {list, set}:
  158. f_v = list(f_v)
  159. if is_continuous_field(type(getattr(cls, attr_name))):
  160. if len(f_v) == 2:
  161. for i, v in enumerate(f_v):
  162. if isinstance(
  163. v, str) and f_n in auto_date_timestamp_field():
  164. # time type: %Y-%m-%d %H:%M:%S
  165. f_v[i] = utils.date_string_to_timestamp(v)
  166. lt_value = f_v[0]
  167. gt_value = f_v[1]
  168. if lt_value is not None and gt_value is not None:
  169. filters.append(
  170. cls.getter_by(attr_name).between(
  171. lt_value, gt_value))
  172. elif lt_value is not None:
  173. filters.append(
  174. operator.attrgetter(attr_name)(cls) >= lt_value)
  175. elif gt_value is not None:
  176. filters.append(
  177. operator.attrgetter(attr_name)(cls) <= gt_value)
  178. else:
  179. filters.append(operator.attrgetter(attr_name)(cls) << f_v)
  180. else:
  181. filters.append(operator.attrgetter(attr_name)(cls) == f_v)
  182. if filters:
  183. query_records = cls.select().where(*filters)
  184. if reverse is not None:
  185. if not order_by or not hasattr(cls, f"{order_by}"):
  186. order_by = "create_time"
  187. if reverse is True:
  188. query_records = query_records.order_by(
  189. cls.getter_by(f"{order_by}").desc())
  190. elif reverse is False:
  191. query_records = query_records.order_by(
  192. cls.getter_by(f"{order_by}").asc())
  193. return [query_record for query_record in query_records]
  194. else:
  195. return []
  196. @classmethod
  197. def insert(cls, __data=None, **insert):
  198. if isinstance(__data, dict) and __data:
  199. __data[cls._meta.combined["create_time"]
  200. ] = utils.current_timestamp()
  201. if insert:
  202. insert["create_time"] = utils.current_timestamp()
  203. return super().insert(__data, **insert)
  204. # update and insert will call this method
  205. @classmethod
  206. def _normalize_data(cls, data, kwargs):
  207. normalized = super()._normalize_data(data, kwargs)
  208. if not normalized:
  209. return {}
  210. normalized[cls._meta.combined["update_time"]
  211. ] = utils.current_timestamp()
  212. for f_n in AUTO_DATE_TIMESTAMP_FIELD_PREFIX:
  213. if {f"{f_n}_time", f"{f_n}_date"}.issubset(cls._meta.combined.keys()) and \
  214. cls._meta.combined[f"{f_n}_time"] in normalized and \
  215. normalized[cls._meta.combined[f"{f_n}_time"]] is not None:
  216. normalized[cls._meta.combined[f"{f_n}_date"]] = utils.timestamp_to_date(
  217. normalized[cls._meta.combined[f"{f_n}_time"]])
  218. return normalized
  219. class JsonSerializedField(SerializedField):
  220. def __init__(self, object_hook=utils.from_dict_hook,
  221. object_pairs_hook=None, **kwargs):
  222. super(JsonSerializedField, self).__init__(serialized_type=SerializedType.JSON, object_hook=object_hook,
  223. object_pairs_hook=object_pairs_hook, **kwargs)
  224. class PooledDatabase(Enum):
  225. MYSQL = PooledMySQLDatabase
  226. POSTGRES = PooledPostgresqlDatabase
  227. class DatabaseMigrator(Enum):
  228. MYSQL = MySQLMigrator
  229. POSTGRES = PostgresqlMigrator
  230. @singleton
  231. class BaseDataBase:
  232. def __init__(self):
  233. database_config = settings.DATABASE.copy()
  234. db_name = database_config.pop("name")
  235. self.database_connection = PooledDatabase[settings.DATABASE_TYPE.upper()].value(db_name, **database_config)
  236. logging.info('init database on cluster mode successfully')
  237. class PostgresDatabaseLock:
  238. def __init__(self, lock_name, timeout=10, db=None):
  239. self.lock_name = lock_name
  240. self.timeout = int(timeout)
  241. self.db = db if db else DB
  242. def lock(self):
  243. cursor = self.db.execute_sql("SELECT pg_try_advisory_lock(%s)", self.timeout)
  244. ret = cursor.fetchone()
  245. if ret[0] == 0:
  246. raise Exception(f'acquire postgres lock {self.lock_name} timeout')
  247. elif ret[0] == 1:
  248. return True
  249. else:
  250. raise Exception(f'failed to acquire lock {self.lock_name}')
  251. def unlock(self):
  252. cursor = self.db.execute_sql("SELECT pg_advisory_unlock(%s)", self.timeout)
  253. ret = cursor.fetchone()
  254. if ret[0] == 0:
  255. raise Exception(
  256. f'postgres lock {self.lock_name} was not established by this thread')
  257. elif ret[0] == 1:
  258. return True
  259. else:
  260. raise Exception(f'postgres lock {self.lock_name} does not exist')
  261. def __enter__(self):
  262. if isinstance(self.db, PostgresDatabaseLock):
  263. self.lock()
  264. return self
  265. def __exit__(self, exc_type, exc_val, exc_tb):
  266. if isinstance(self.db, PostgresDatabaseLock):
  267. self.unlock()
  268. def __call__(self, func):
  269. @wraps(func)
  270. def magic(*args, **kwargs):
  271. with self:
  272. return func(*args, **kwargs)
  273. return magic
  274. class MysqlDatabaseLock:
  275. def __init__(self, lock_name, timeout=10, db=None):
  276. self.lock_name = lock_name
  277. self.timeout = int(timeout)
  278. self.db = db if db else DB
  279. def lock(self):
  280. # SQL parameters only support %s format placeholders
  281. cursor = self.db.execute_sql(
  282. "SELECT GET_LOCK(%s, %s)", (self.lock_name, self.timeout))
  283. ret = cursor.fetchone()
  284. if ret[0] == 0:
  285. raise Exception(f'acquire mysql lock {self.lock_name} timeout')
  286. elif ret[0] == 1:
  287. return True
  288. else:
  289. raise Exception(f'failed to acquire lock {self.lock_name}')
  290. def unlock(self):
  291. cursor = self.db.execute_sql(
  292. "SELECT RELEASE_LOCK(%s)", (self.lock_name,))
  293. ret = cursor.fetchone()
  294. if ret[0] == 0:
  295. raise Exception(
  296. f'mysql lock {self.lock_name} was not established by this thread')
  297. elif ret[0] == 1:
  298. return True
  299. else:
  300. raise Exception(f'mysql lock {self.lock_name} does not exist')
  301. def __enter__(self):
  302. if isinstance(self.db, PooledMySQLDatabase):
  303. self.lock()
  304. return self
  305. def __exit__(self, exc_type, exc_val, exc_tb):
  306. if isinstance(self.db, PooledMySQLDatabase):
  307. self.unlock()
  308. def __call__(self, func):
  309. @wraps(func)
  310. def magic(*args, **kwargs):
  311. with self:
  312. return func(*args, **kwargs)
  313. return magic
  314. class DatabaseLock(Enum):
  315. MYSQL = MysqlDatabaseLock
  316. POSTGRES = PostgresDatabaseLock
  317. DB = BaseDataBase().database_connection
  318. DB.lock = DatabaseLock[settings.DATABASE_TYPE.upper()].value
  319. def close_connection():
  320. try:
  321. if DB:
  322. DB.close_stale(age=30)
  323. except Exception as e:
  324. logging.exception(e)
  325. class DataBaseModel(BaseModel):
  326. class Meta:
  327. database = DB
  328. @DB.connection_context()
  329. def init_database_tables(alter_fields=[]):
  330. members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
  331. table_objs = []
  332. create_failed_list = []
  333. for name, obj in members:
  334. if obj != DataBaseModel and issubclass(obj, DataBaseModel):
  335. table_objs.append(obj)
  336. logging.debug(f"start create table {obj.__name__}")
  337. try:
  338. obj.create_table()
  339. logging.debug(f"create table success: {obj.__name__}")
  340. except Exception as e:
  341. logging.exception(e)
  342. create_failed_list.append(obj.__name__)
  343. if create_failed_list:
  344. logging.error(f"create tables failed: {create_failed_list}")
  345. raise Exception(f"create tables failed: {create_failed_list}")
  346. migrate_db()
  347. def fill_db_model_object(model_object, human_model_dict):
  348. for k, v in human_model_dict.items():
  349. attr_name = '%s' % k
  350. if hasattr(model_object.__class__, attr_name):
  351. setattr(model_object, attr_name, v)
  352. return model_object
  353. class User(DataBaseModel, UserMixin):
  354. id = CharField(max_length=32, primary_key=True)
  355. access_token = CharField(max_length=255, null=True, index=True)
  356. nickname = CharField(max_length=100, null=False, help_text="nicky name", index=True)
  357. password = CharField(max_length=255, null=True, help_text="password", index=True)
  358. email = CharField(
  359. max_length=255,
  360. null=False,
  361. help_text="email",
  362. index=True)
  363. avatar = TextField(null=True, help_text="avatar base64 string")
  364. language = CharField(
  365. max_length=32,
  366. null=True,
  367. help_text="English|Chinese",
  368. default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English",
  369. index=True)
  370. color_schema = CharField(
  371. max_length=32,
  372. null=True,
  373. help_text="Bright|Dark",
  374. default="Bright",
  375. index=True)
  376. timezone = CharField(
  377. max_length=64,
  378. null=True,
  379. help_text="Timezone",
  380. default="UTC+8\tAsia/Shanghai",
  381. index=True)
  382. last_login_time = DateTimeField(null=True, index=True)
  383. is_authenticated = CharField(max_length=1, null=False, default="1", index=True)
  384. is_active = CharField(max_length=1, null=False, default="1", index=True)
  385. is_anonymous = CharField(max_length=1, null=False, default="0", index=True)
  386. login_channel = CharField(null=True, help_text="from which user login", index=True)
  387. status = CharField(
  388. max_length=1,
  389. null=True,
  390. help_text="is it validate(0: wasted, 1: validate)",
  391. default="1",
  392. index=True)
  393. is_superuser = BooleanField(null=True, help_text="is root", default=False, index=True)
  394. def __str__(self):
  395. return self.email
  396. def get_id(self):
  397. jwt = Serializer(secret_key=settings.SECRET_KEY)
  398. return jwt.dumps(str(self.access_token))
  399. class Meta:
  400. db_table = "user"
  401. class Tenant(DataBaseModel):
  402. id = CharField(max_length=32, primary_key=True)
  403. name = CharField(max_length=100, null=True, help_text="Tenant name", index=True)
  404. public_key = CharField(max_length=255, null=True, index=True)
  405. llm_id = CharField(max_length=128, null=False, help_text="default llm ID", index=True)
  406. embd_id = CharField(
  407. max_length=128,
  408. null=False,
  409. help_text="default embedding model ID",
  410. index=True)
  411. asr_id = CharField(
  412. max_length=128,
  413. null=False,
  414. help_text="default ASR model ID",
  415. index=True)
  416. img2txt_id = CharField(
  417. max_length=128,
  418. null=False,
  419. help_text="default image to text model ID",
  420. index=True)
  421. rerank_id = CharField(
  422. max_length=128,
  423. null=False,
  424. help_text="default rerank model ID",
  425. index=True)
  426. tts_id = CharField(
  427. max_length=256,
  428. null=True,
  429. help_text="default tts model ID",
  430. index=True)
  431. parser_ids = CharField(
  432. max_length=256,
  433. null=False,
  434. help_text="document processors",
  435. index=True)
  436. credit = IntegerField(default=512, index=True)
  437. status = CharField(
  438. max_length=1,
  439. null=True,
  440. help_text="is it validate(0: wasted, 1: validate)",
  441. default="1",
  442. index=True)
  443. class Meta:
  444. db_table = "tenant"
  445. class UserTenant(DataBaseModel):
  446. id = CharField(max_length=32, primary_key=True)
  447. user_id = CharField(max_length=32, null=False, index=True)
  448. tenant_id = CharField(max_length=32, null=False, index=True)
  449. role = CharField(max_length=32, null=False, help_text="UserTenantRole", index=True)
  450. invited_by = CharField(max_length=32, null=False, index=True)
  451. status = CharField(
  452. max_length=1,
  453. null=True,
  454. help_text="is it validate(0: wasted, 1: validate)",
  455. default="1",
  456. index=True)
  457. class Meta:
  458. db_table = "user_tenant"
  459. class InvitationCode(DataBaseModel):
  460. id = CharField(max_length=32, primary_key=True)
  461. code = CharField(max_length=32, null=False, index=True)
  462. visit_time = DateTimeField(null=True, index=True)
  463. user_id = CharField(max_length=32, null=True, index=True)
  464. tenant_id = CharField(max_length=32, null=True, index=True)
  465. status = CharField(
  466. max_length=1,
  467. null=True,
  468. help_text="is it validate(0: wasted, 1: validate)",
  469. default="1",
  470. index=True)
  471. class Meta:
  472. db_table = "invitation_code"
  473. class LLMFactories(DataBaseModel):
  474. name = CharField(
  475. max_length=128,
  476. null=False,
  477. help_text="LLM factory name",
  478. primary_key=True)
  479. logo = TextField(null=True, help_text="llm logo base64")
  480. tags = CharField(
  481. max_length=255,
  482. null=False,
  483. help_text="LLM, Text Embedding, Image2Text, ASR",
  484. index=True)
  485. status = CharField(
  486. max_length=1,
  487. null=True,
  488. help_text="is it validate(0: wasted, 1: validate)",
  489. default="1",
  490. index=True)
  491. def __str__(self):
  492. return self.name
  493. class Meta:
  494. db_table = "llm_factories"
  495. class LLM(DataBaseModel):
  496. # LLMs dictionary
  497. llm_name = CharField(
  498. max_length=128,
  499. null=False,
  500. help_text="LLM name",
  501. index=True)
  502. model_type = CharField(
  503. max_length=128,
  504. null=False,
  505. help_text="LLM, Text Embedding, Image2Text, ASR",
  506. index=True)
  507. fid = CharField(max_length=128, null=False, help_text="LLM factory id", index=True)
  508. max_tokens = IntegerField(default=0)
  509. tags = CharField(
  510. max_length=255,
  511. null=False,
  512. help_text="LLM, Text Embedding, Image2Text, Chat, 32k...",
  513. index=True)
  514. status = CharField(
  515. max_length=1,
  516. null=True,
  517. help_text="is it validate(0: wasted, 1: validate)",
  518. default="1",
  519. index=True)
  520. def __str__(self):
  521. return self.llm_name
  522. class Meta:
  523. primary_key = CompositeKey('fid', 'llm_name')
  524. db_table = "llm"
  525. class TenantLLM(DataBaseModel):
  526. tenant_id = CharField(max_length=32, null=False, index=True)
  527. llm_factory = CharField(
  528. max_length=128,
  529. null=False,
  530. help_text="LLM factory name",
  531. index=True)
  532. model_type = CharField(
  533. max_length=128,
  534. null=True,
  535. help_text="LLM, Text Embedding, Image2Text, ASR",
  536. index=True)
  537. llm_name = CharField(
  538. max_length=128,
  539. null=True,
  540. help_text="LLM name",
  541. default="",
  542. index=True)
  543. api_key = CharField(max_length=1024, null=True, help_text="API KEY", index=True)
  544. api_base = CharField(max_length=255, null=True, help_text="API Base")
  545. max_tokens = IntegerField(default=8192, index=True)
  546. used_tokens = IntegerField(default=0, index=True)
  547. def __str__(self):
  548. return self.llm_name
  549. class Meta:
  550. db_table = "tenant_llm"
  551. primary_key = CompositeKey('tenant_id', 'llm_factory', 'llm_name')
  552. class Knowledgebase(DataBaseModel):
  553. id = CharField(max_length=32, primary_key=True)
  554. avatar = TextField(null=True, help_text="avatar base64 string")
  555. tenant_id = CharField(max_length=32, null=False, index=True)
  556. name = CharField(
  557. max_length=128,
  558. null=False,
  559. help_text="KB name",
  560. index=True)
  561. language = CharField(
  562. max_length=32,
  563. null=True,
  564. default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English",
  565. help_text="English|Chinese",
  566. index=True)
  567. description = TextField(null=True, help_text="KB description")
  568. embd_id = CharField(
  569. max_length=128,
  570. null=False,
  571. help_text="default embedding model ID",
  572. index=True)
  573. permission = CharField(
  574. max_length=16,
  575. null=False,
  576. help_text="me|team",
  577. default="me",
  578. index=True)
  579. created_by = CharField(max_length=32, null=False, index=True)
  580. doc_num = IntegerField(default=0, index=True)
  581. token_num = IntegerField(default=0, index=True)
  582. chunk_num = IntegerField(default=0, index=True)
  583. similarity_threshold = FloatField(default=0.2, index=True)
  584. vector_similarity_weight = FloatField(default=0.3, index=True)
  585. parser_id = CharField(
  586. max_length=32,
  587. null=False,
  588. help_text="default parser ID",
  589. default=ParserType.NAIVE.value,
  590. index=True)
  591. parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
  592. pagerank = IntegerField(default=0, index=False)
  593. status = CharField(
  594. max_length=1,
  595. null=True,
  596. help_text="is it validate(0: wasted, 1: validate)",
  597. default="1",
  598. index=True)
  599. def __str__(self):
  600. return self.name
  601. class Meta:
  602. db_table = "knowledgebase"
  603. class Document(DataBaseModel):
  604. id = CharField(max_length=32, primary_key=True)
  605. thumbnail = TextField(null=True, help_text="thumbnail base64 string")
  606. kb_id = CharField(max_length=256, null=False, index=True)
  607. parser_id = CharField(
  608. max_length=32,
  609. null=False,
  610. help_text="default parser ID",
  611. index=True)
  612. parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
  613. source_type = CharField(
  614. max_length=128,
  615. null=False,
  616. default="local",
  617. help_text="where dose this document come from",
  618. index=True)
  619. type = CharField(max_length=32, null=False, help_text="file extension",
  620. index=True)
  621. created_by = CharField(
  622. max_length=32,
  623. null=False,
  624. help_text="who created it",
  625. index=True)
  626. name = CharField(
  627. max_length=255,
  628. null=True,
  629. help_text="file name",
  630. index=True)
  631. location = CharField(
  632. max_length=255,
  633. null=True,
  634. help_text="where dose it store",
  635. index=True)
  636. size = IntegerField(default=0, index=True)
  637. token_num = IntegerField(default=0, index=True)
  638. chunk_num = IntegerField(default=0, index=True)
  639. progress = FloatField(default=0, index=True)
  640. progress_msg = TextField(
  641. null=True,
  642. help_text="process message",
  643. default="")
  644. process_begin_at = DateTimeField(null=True, index=True)
  645. process_duation = FloatField(default=0)
  646. run = CharField(
  647. max_length=1,
  648. null=True,
  649. help_text="start to run processing or cancel.(1: run it; 2: cancel)",
  650. default="0",
  651. index=True)
  652. status = CharField(
  653. max_length=1,
  654. null=True,
  655. help_text="is it validate(0: wasted, 1: validate)",
  656. default="1",
  657. index=True)
  658. class Meta:
  659. db_table = "document"
  660. class File(DataBaseModel):
  661. id = CharField(
  662. max_length=32,
  663. primary_key=True)
  664. parent_id = CharField(
  665. max_length=32,
  666. null=False,
  667. help_text="parent folder id",
  668. index=True)
  669. tenant_id = CharField(
  670. max_length=32,
  671. null=False,
  672. help_text="tenant id",
  673. index=True)
  674. created_by = CharField(
  675. max_length=32,
  676. null=False,
  677. help_text="who created it",
  678. index=True)
  679. name = CharField(
  680. max_length=255,
  681. null=False,
  682. help_text="file name or folder name",
  683. index=True)
  684. location = CharField(
  685. max_length=255,
  686. null=True,
  687. help_text="where dose it store",
  688. index=True)
  689. size = IntegerField(default=0, index=True)
  690. type = CharField(max_length=32, null=False, help_text="file extension", index=True)
  691. source_type = CharField(
  692. max_length=128,
  693. null=False,
  694. default="",
  695. help_text="where dose this document come from", index=True)
  696. class Meta:
  697. db_table = "file"
  698. class File2Document(DataBaseModel):
  699. id = CharField(
  700. max_length=32,
  701. primary_key=True)
  702. file_id = CharField(
  703. max_length=32,
  704. null=True,
  705. help_text="file id",
  706. index=True)
  707. document_id = CharField(
  708. max_length=32,
  709. null=True,
  710. help_text="document id",
  711. index=True)
  712. class Meta:
  713. db_table = "file2document"
  714. class Task(DataBaseModel):
  715. id = CharField(max_length=32, primary_key=True)
  716. doc_id = CharField(max_length=32, null=False, index=True)
  717. from_page = IntegerField(default=0)
  718. to_page = IntegerField(default=100000000)
  719. begin_at = DateTimeField(null=True, index=True)
  720. process_duation = FloatField(default=0)
  721. progress = FloatField(default=0, index=True)
  722. progress_msg = TextField(
  723. null=True,
  724. help_text="process message",
  725. default="")
  726. retry_count = IntegerField(default=0)
  727. digest = TextField(null=True, help_text="task digest", default="")
  728. chunk_ids = LongTextField(null=True, help_text="chunk ids", default="")
  729. class Dialog(DataBaseModel):
  730. id = CharField(max_length=32, primary_key=True)
  731. tenant_id = CharField(max_length=32, null=False, index=True)
  732. name = CharField(
  733. max_length=255,
  734. null=True,
  735. help_text="dialog application name",
  736. index=True)
  737. description = TextField(null=True, help_text="Dialog description")
  738. icon = TextField(null=True, help_text="icon base64 string")
  739. language = CharField(
  740. max_length=32,
  741. null=True,
  742. default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English",
  743. help_text="English|Chinese",
  744. index=True)
  745. llm_id = CharField(max_length=128, null=False, help_text="default llm ID")
  746. llm_setting = JSONField(null=False, default={"temperature": 0.1, "top_p": 0.3, "frequency_penalty": 0.7,
  747. "presence_penalty": 0.4, "max_tokens": 512})
  748. prompt_type = CharField(
  749. max_length=16,
  750. null=False,
  751. default="simple",
  752. help_text="simple|advanced",
  753. index=True)
  754. prompt_config = JSONField(null=False,
  755. default={"system": "", "prologue": "Hi! I'm your assistant, what can I do for you?",
  756. "parameters": [],
  757. "empty_response": "Sorry! No relevant content was found in the knowledge base!"})
  758. similarity_threshold = FloatField(default=0.2)
  759. vector_similarity_weight = FloatField(default=0.3)
  760. top_n = IntegerField(default=6)
  761. top_k = IntegerField(default=1024)
  762. do_refer = CharField(
  763. max_length=1,
  764. null=False,
  765. default="1",
  766. help_text="it needs to insert reference index into answer or not")
  767. rerank_id = CharField(
  768. max_length=128,
  769. null=False,
  770. help_text="default rerank model ID")
  771. kb_ids = JSONField(null=False, default=[])
  772. status = CharField(
  773. max_length=1,
  774. null=True,
  775. help_text="is it validate(0: wasted, 1: validate)",
  776. default="1",
  777. index=True)
  778. class Meta:
  779. db_table = "dialog"
  780. class Conversation(DataBaseModel):
  781. id = CharField(max_length=32, primary_key=True)
  782. dialog_id = CharField(max_length=32, null=False, index=True)
  783. name = CharField(max_length=255, null=True, help_text="converastion name", index=True)
  784. message = JSONField(null=True)
  785. reference = JSONField(null=True, default=[])
  786. user_id = CharField(max_length=255, null=True, help_text="user_id", index=True)
  787. class Meta:
  788. db_table = "conversation"
  789. class APIToken(DataBaseModel):
  790. tenant_id = CharField(max_length=32, null=False, index=True)
  791. token = CharField(max_length=255, null=False, index=True)
  792. dialog_id = CharField(max_length=32, null=False, index=True)
  793. source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)
  794. beta = CharField(max_length=255, null=True, index=True)
  795. class Meta:
  796. db_table = "api_token"
  797. primary_key = CompositeKey('tenant_id', 'token')
  798. class API4Conversation(DataBaseModel):
  799. id = CharField(max_length=32, primary_key=True)
  800. dialog_id = CharField(max_length=32, null=False, index=True)
  801. user_id = CharField(max_length=255, null=False, help_text="user_id", index=True)
  802. message = JSONField(null=True)
  803. reference = JSONField(null=True, default=[])
  804. tokens = IntegerField(default=0)
  805. source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)
  806. dsl = JSONField(null=True, default={})
  807. duration = FloatField(default=0, index=True)
  808. round = IntegerField(default=0, index=True)
  809. thumb_up = IntegerField(default=0, index=True)
  810. class Meta:
  811. db_table = "api_4_conversation"
  812. class UserCanvas(DataBaseModel):
  813. id = CharField(max_length=32, primary_key=True)
  814. avatar = TextField(null=True, help_text="avatar base64 string")
  815. user_id = CharField(max_length=255, null=False, help_text="user_id", index=True)
  816. title = CharField(max_length=255, null=True, help_text="Canvas title")
  817. description = TextField(null=True, help_text="Canvas description")
  818. canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True)
  819. dsl = JSONField(null=True, default={})
  820. class Meta:
  821. db_table = "user_canvas"
  822. class CanvasTemplate(DataBaseModel):
  823. id = CharField(max_length=32, primary_key=True)
  824. avatar = TextField(null=True, help_text="avatar base64 string")
  825. title = CharField(max_length=255, null=True, help_text="Canvas title")
  826. description = TextField(null=True, help_text="Canvas description")
  827. canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True)
  828. dsl = JSONField(null=True, default={})
  829. class Meta:
  830. db_table = "canvas_template"
  831. def migrate_db():
  832. with DB.transaction():
  833. migrator = DatabaseMigrator[settings.DATABASE_TYPE.upper()].value(DB)
  834. try:
  835. migrate(
  836. migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="",
  837. help_text="where dose this document come from",
  838. index=True))
  839. )
  840. except Exception:
  841. pass
  842. try:
  843. migrate(
  844. migrator.add_column('tenant', 'rerank_id',
  845. CharField(max_length=128, null=False, default="BAAI/bge-reranker-v2-m3",
  846. help_text="default rerank model ID"))
  847. )
  848. except Exception:
  849. pass
  850. try:
  851. migrate(
  852. migrator.add_column('dialog', 'rerank_id', CharField(max_length=128, null=False, default="",
  853. help_text="default rerank model ID"))
  854. )
  855. except Exception:
  856. pass
  857. try:
  858. migrate(
  859. migrator.add_column('dialog', 'top_k', IntegerField(default=1024))
  860. )
  861. except Exception:
  862. pass
  863. try:
  864. migrate(
  865. migrator.alter_column_type('tenant_llm', 'api_key',
  866. CharField(max_length=1024, null=True, help_text="API KEY", index=True))
  867. )
  868. except Exception:
  869. pass
  870. try:
  871. migrate(
  872. migrator.add_column('api_token', 'source',
  873. CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True))
  874. )
  875. except Exception:
  876. pass
  877. try:
  878. migrate(
  879. migrator.add_column("tenant", "tts_id",
  880. CharField(max_length=256, null=True, help_text="default tts model ID", index=True))
  881. )
  882. except Exception:
  883. pass
  884. try:
  885. migrate(
  886. migrator.add_column('api_4_conversation', 'source',
  887. CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True))
  888. )
  889. except Exception:
  890. pass
  891. try:
  892. DB.execute_sql('ALTER TABLE llm DROP PRIMARY KEY;')
  893. DB.execute_sql('ALTER TABLE llm ADD PRIMARY KEY (llm_name,fid);')
  894. except Exception:
  895. pass
  896. try:
  897. migrate(
  898. migrator.add_column('task', 'retry_count', IntegerField(default=0))
  899. )
  900. except Exception:
  901. pass
  902. try:
  903. migrate(
  904. migrator.alter_column_type('api_token', 'dialog_id',
  905. CharField(max_length=32, null=True, index=True))
  906. )
  907. except Exception:
  908. pass
  909. try:
  910. migrate(
  911. migrator.add_column("tenant_llm", "max_tokens", IntegerField(default=8192, index=True))
  912. )
  913. except Exception:
  914. pass
  915. try:
  916. migrate(
  917. migrator.add_column("api_4_conversation", "dsl", JSONField(null=True, default={}))
  918. )
  919. except Exception:
  920. pass
  921. try:
  922. migrate(
  923. migrator.add_column("knowledgebase", "pagerank", IntegerField(default=0, index=False))
  924. )
  925. except Exception:
  926. pass
  927. try:
  928. migrate(
  929. migrator.add_column("api_token", "beta", CharField(max_length=255, null=True, index=True))
  930. )
  931. except Exception:
  932. pass
  933. try:
  934. migrate(
  935. migrator.add_column("task", "digest", TextField(null=True, help_text="task digest", default=""))
  936. )
  937. except Exception:
  938. pass
  939. try:
  940. migrate(
  941. migrator.add_column("task", "chunk_ids", LongTextField(null=True, help_text="chunk ids", default=""))
  942. )
  943. except Exception:
  944. pass
  945. try:
  946. migrate(
  947. migrator.add_column("conversation", "user_id",
  948. CharField(max_length=255, null=True, help_text="user_id", index=True))
  949. )
  950. except Exception:
  951. pass