Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

db_models.py 32KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import inspect
  17. import os
  18. import sys
  19. import typing
  20. import operator
  21. from functools import wraps
  22. from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
  23. from flask_login import UserMixin
  24. from playhouse.migrate import MySQLMigrator, migrate
  25. from peewee import (
  26. BigIntegerField, BooleanField, CharField,
  27. CompositeKey, IntegerField, TextField, FloatField, DateTimeField,
  28. Field, Model, Metadata
  29. )
  30. from playhouse.pool import PooledMySQLDatabase
  31. from api.db import SerializedType, ParserType
  32. from api.settings import DATABASE, stat_logger, SECRET_KEY
  33. from api.utils.log_utils import getLogger
  34. from api import utils
  35. LOGGER = getLogger()
  36. def singleton(cls, *args, **kw):
  37. instances = {}
  38. def _singleton():
  39. key = str(cls) + str(os.getpid())
  40. if key not in instances:
  41. instances[key] = cls(*args, **kw)
  42. return instances[key]
  43. return _singleton
  44. CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField}
  45. AUTO_DATE_TIMESTAMP_FIELD_PREFIX = {
  46. "create",
  47. "start",
  48. "end",
  49. "update",
  50. "read_access",
  51. "write_access"}
  52. class LongTextField(TextField):
  53. field_type = 'LONGTEXT'
  54. class JSONField(LongTextField):
  55. default_value = {}
  56. def __init__(self, object_hook=None, object_pairs_hook=None, **kwargs):
  57. self._object_hook = object_hook
  58. self._object_pairs_hook = object_pairs_hook
  59. super().__init__(**kwargs)
  60. def db_value(self, value):
  61. if value is None:
  62. value = self.default_value
  63. return utils.json_dumps(value)
  64. def python_value(self, value):
  65. if not value:
  66. return self.default_value
  67. return utils.json_loads(
  68. value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook)
  69. class ListField(JSONField):
  70. default_value = []
  71. class SerializedField(LongTextField):
  72. def __init__(self, serialized_type=SerializedType.PICKLE,
  73. object_hook=None, object_pairs_hook=None, **kwargs):
  74. self._serialized_type = serialized_type
  75. self._object_hook = object_hook
  76. self._object_pairs_hook = object_pairs_hook
  77. super().__init__(**kwargs)
  78. def db_value(self, value):
  79. if self._serialized_type == SerializedType.PICKLE:
  80. return utils.serialize_b64(value, to_str=True)
  81. elif self._serialized_type == SerializedType.JSON:
  82. if value is None:
  83. return None
  84. return utils.json_dumps(value, with_type=True)
  85. else:
  86. raise ValueError(
  87. f"the serialized type {self._serialized_type} is not supported")
  88. def python_value(self, value):
  89. if self._serialized_type == SerializedType.PICKLE:
  90. return utils.deserialize_b64(value)
  91. elif self._serialized_type == SerializedType.JSON:
  92. if value is None:
  93. return {}
  94. return utils.json_loads(
  95. value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook)
  96. else:
  97. raise ValueError(
  98. f"the serialized type {self._serialized_type} is not supported")
  99. def is_continuous_field(cls: typing.Type) -> bool:
  100. if cls in CONTINUOUS_FIELD_TYPE:
  101. return True
  102. for p in cls.__bases__:
  103. if p in CONTINUOUS_FIELD_TYPE:
  104. return True
  105. elif p != Field and p != object:
  106. if is_continuous_field(p):
  107. return True
  108. else:
  109. return False
  110. def auto_date_timestamp_field():
  111. return {f"{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX}
  112. def auto_date_timestamp_db_field():
  113. return {f"f_{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX}
  114. def remove_field_name_prefix(field_name):
  115. return field_name[2:] if field_name.startswith('f_') else field_name
  116. class BaseModel(Model):
  117. create_time = BigIntegerField(null=True, index=True)
  118. create_date = DateTimeField(null=True, index=True)
  119. update_time = BigIntegerField(null=True, index=True)
  120. update_date = DateTimeField(null=True, index=True)
  121. def to_json(self):
  122. # This function is obsolete
  123. return self.to_dict()
  124. def to_dict(self):
  125. return self.__dict__['__data__']
  126. def to_human_model_dict(self, only_primary_with: list = None):
  127. model_dict = self.__dict__['__data__']
  128. if not only_primary_with:
  129. return {remove_field_name_prefix(
  130. k): v for k, v in model_dict.items()}
  131. human_model_dict = {}
  132. for k in self._meta.primary_key.field_names:
  133. human_model_dict[remove_field_name_prefix(k)] = model_dict[k]
  134. for k in only_primary_with:
  135. human_model_dict[k] = model_dict[f'f_{k}']
  136. return human_model_dict
  137. @property
  138. def meta(self) -> Metadata:
  139. return self._meta
  140. @classmethod
  141. def get_primary_keys_name(cls):
  142. return cls._meta.primary_key.field_names if isinstance(cls._meta.primary_key, CompositeKey) else [
  143. cls._meta.primary_key.name]
  144. @classmethod
  145. def getter_by(cls, attr):
  146. return operator.attrgetter(attr)(cls)
  147. @classmethod
  148. def query(cls, reverse=None, order_by=None, **kwargs):
  149. filters = []
  150. for f_n, f_v in kwargs.items():
  151. attr_name = '%s' % f_n
  152. if not hasattr(cls, attr_name) or f_v is None:
  153. continue
  154. if type(f_v) in {list, set}:
  155. f_v = list(f_v)
  156. if is_continuous_field(type(getattr(cls, attr_name))):
  157. if len(f_v) == 2:
  158. for i, v in enumerate(f_v):
  159. if isinstance(
  160. v, str) and f_n in auto_date_timestamp_field():
  161. # time type: %Y-%m-%d %H:%M:%S
  162. f_v[i] = utils.date_string_to_timestamp(v)
  163. lt_value = f_v[0]
  164. gt_value = f_v[1]
  165. if lt_value is not None and gt_value is not None:
  166. filters.append(
  167. cls.getter_by(attr_name).between(
  168. lt_value, gt_value))
  169. elif lt_value is not None:
  170. filters.append(
  171. operator.attrgetter(attr_name)(cls) >= lt_value)
  172. elif gt_value is not None:
  173. filters.append(
  174. operator.attrgetter(attr_name)(cls) <= gt_value)
  175. else:
  176. filters.append(operator.attrgetter(attr_name)(cls) << f_v)
  177. else:
  178. filters.append(operator.attrgetter(attr_name)(cls) == f_v)
  179. if filters:
  180. query_records = cls.select().where(*filters)
  181. if reverse is not None:
  182. if not order_by or not hasattr(cls, f"{order_by}"):
  183. order_by = "create_time"
  184. if reverse is True:
  185. query_records = query_records.order_by(
  186. cls.getter_by(f"{order_by}").desc())
  187. elif reverse is False:
  188. query_records = query_records.order_by(
  189. cls.getter_by(f"{order_by}").asc())
  190. return [query_record for query_record in query_records]
  191. else:
  192. return []
  193. @classmethod
  194. def insert(cls, __data=None, **insert):
  195. if isinstance(__data, dict) and __data:
  196. __data[cls._meta.combined["create_time"]
  197. ] = utils.current_timestamp()
  198. if insert:
  199. insert["create_time"] = utils.current_timestamp()
  200. return super().insert(__data, **insert)
  201. # update and insert will call this method
  202. @classmethod
  203. def _normalize_data(cls, data, kwargs):
  204. normalized = super()._normalize_data(data, kwargs)
  205. if not normalized:
  206. return {}
  207. normalized[cls._meta.combined["update_time"]
  208. ] = utils.current_timestamp()
  209. for f_n in AUTO_DATE_TIMESTAMP_FIELD_PREFIX:
  210. if {f"{f_n}_time", f"{f_n}_date"}.issubset(cls._meta.combined.keys()) and \
  211. cls._meta.combined[f"{f_n}_time"] in normalized and \
  212. normalized[cls._meta.combined[f"{f_n}_time"]] is not None:
  213. normalized[cls._meta.combined[f"{f_n}_date"]] = utils.timestamp_to_date(
  214. normalized[cls._meta.combined[f"{f_n}_time"]])
  215. return normalized
  216. class JsonSerializedField(SerializedField):
  217. def __init__(self, object_hook=utils.from_dict_hook,
  218. object_pairs_hook=None, **kwargs):
  219. super(JsonSerializedField, self).__init__(serialized_type=SerializedType.JSON, object_hook=object_hook,
  220. object_pairs_hook=object_pairs_hook, **kwargs)
  221. @singleton
  222. class BaseDataBase:
  223. def __init__(self):
  224. database_config = DATABASE.copy()
  225. db_name = database_config.pop("name")
  226. self.database_connection = PooledMySQLDatabase(
  227. db_name, **database_config)
  228. stat_logger.info('init mysql database on cluster mode successfully')
  229. class DatabaseLock:
  230. def __init__(self, lock_name, timeout=10, db=None):
  231. self.lock_name = lock_name
  232. self.timeout = int(timeout)
  233. self.db = db if db else DB
  234. def lock(self):
  235. # SQL parameters only support %s format placeholders
  236. cursor = self.db.execute_sql(
  237. "SELECT GET_LOCK(%s, %s)", (self.lock_name, self.timeout))
  238. ret = cursor.fetchone()
  239. if ret[0] == 0:
  240. raise Exception(f'acquire mysql lock {self.lock_name} timeout')
  241. elif ret[0] == 1:
  242. return True
  243. else:
  244. raise Exception(f'failed to acquire lock {self.lock_name}')
  245. def unlock(self):
  246. cursor = self.db.execute_sql(
  247. "SELECT RELEASE_LOCK(%s)", (self.lock_name,))
  248. ret = cursor.fetchone()
  249. if ret[0] == 0:
  250. raise Exception(
  251. f'mysql lock {self.lock_name} was not established by this thread')
  252. elif ret[0] == 1:
  253. return True
  254. else:
  255. raise Exception(f'mysql lock {self.lock_name} does not exist')
  256. def __enter__(self):
  257. if isinstance(self.db, PooledMySQLDatabase):
  258. self.lock()
  259. return self
  260. def __exit__(self, exc_type, exc_val, exc_tb):
  261. if isinstance(self.db, PooledMySQLDatabase):
  262. self.unlock()
  263. def __call__(self, func):
  264. @wraps(func)
  265. def magic(*args, **kwargs):
  266. with self:
  267. return func(*args, **kwargs)
  268. return magic
  269. DB = BaseDataBase().database_connection
  270. DB.lock = DatabaseLock
  271. def close_connection():
  272. try:
  273. if DB:
  274. DB.close_stale(age=30)
  275. except Exception as e:
  276. LOGGER.exception(e)
  277. class DataBaseModel(BaseModel):
  278. class Meta:
  279. database = DB
  280. @DB.connection_context()
  281. def init_database_tables(alter_fields=[]):
  282. members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
  283. table_objs = []
  284. create_failed_list = []
  285. for name, obj in members:
  286. if obj != DataBaseModel and issubclass(obj, DataBaseModel):
  287. table_objs.append(obj)
  288. LOGGER.info(f"start create table {obj.__name__}")
  289. try:
  290. obj.create_table()
  291. LOGGER.info(f"create table success: {obj.__name__}")
  292. except Exception as e:
  293. LOGGER.exception(e)
  294. create_failed_list.append(obj.__name__)
  295. if create_failed_list:
  296. LOGGER.info(f"create tables failed: {create_failed_list}")
  297. raise Exception(f"create tables failed: {create_failed_list}")
  298. migrate_db()
  299. def fill_db_model_object(model_object, human_model_dict):
  300. for k, v in human_model_dict.items():
  301. attr_name = '%s' % k
  302. if hasattr(model_object.__class__, attr_name):
  303. setattr(model_object, attr_name, v)
  304. return model_object
  305. class User(DataBaseModel, UserMixin):
  306. id = CharField(max_length=32, primary_key=True)
  307. access_token = CharField(max_length=255, null=True, index=True)
  308. nickname = CharField(max_length=100, null=False, help_text="nicky name", index=True)
  309. password = CharField(max_length=255, null=True, help_text="password", index=True)
  310. email = CharField(
  311. max_length=255,
  312. null=False,
  313. help_text="email",
  314. index=True)
  315. avatar = TextField(null=True, help_text="avatar base64 string")
  316. language = CharField(
  317. max_length=32,
  318. null=True,
  319. help_text="English|Chinese",
  320. default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English",
  321. index=True)
  322. color_schema = CharField(
  323. max_length=32,
  324. null=True,
  325. help_text="Bright|Dark",
  326. default="Bright",
  327. index=True)
  328. timezone = CharField(
  329. max_length=64,
  330. null=True,
  331. help_text="Timezone",
  332. default="UTC+8\tAsia/Shanghai",
  333. index=True)
  334. last_login_time = DateTimeField(null=True, index=True)
  335. is_authenticated = CharField(max_length=1, null=False, default="1", index=True)
  336. is_active = CharField(max_length=1, null=False, default="1", index=True)
  337. is_anonymous = CharField(max_length=1, null=False, default="0", index=True)
  338. login_channel = CharField(null=True, help_text="from which user login", index=True)
  339. status = CharField(
  340. max_length=1,
  341. null=True,
  342. help_text="is it validate(0: wasted,1: validate)",
  343. default="1",
  344. index=True)
  345. is_superuser = BooleanField(null=True, help_text="is root", default=False, index=True)
  346. def __str__(self):
  347. return self.email
  348. def get_id(self):
  349. jwt = Serializer(secret_key=SECRET_KEY)
  350. return jwt.dumps(str(self.access_token))
  351. class Meta:
  352. db_table = "user"
  353. class Tenant(DataBaseModel):
  354. id = CharField(max_length=32, primary_key=True)
  355. name = CharField(max_length=100, null=True, help_text="Tenant name", index=True)
  356. public_key = CharField(max_length=255, null=True, index=True)
  357. llm_id = CharField(max_length=128, null=False, help_text="default llm ID", index=True)
  358. embd_id = CharField(
  359. max_length=128,
  360. null=False,
  361. help_text="default embedding model ID",
  362. index=True)
  363. asr_id = CharField(
  364. max_length=128,
  365. null=False,
  366. help_text="default ASR model ID",
  367. index=True)
  368. img2txt_id = CharField(
  369. max_length=128,
  370. null=False,
  371. help_text="default image to text model ID",
  372. index=True)
  373. rerank_id = CharField(
  374. max_length=128,
  375. null=False,
  376. help_text="default rerank model ID",
  377. index=True)
  378. tts_id = CharField(
  379. max_length=256,
  380. null=True,
  381. help_text="default tts model ID",
  382. index=True)
  383. parser_ids = CharField(
  384. max_length=256,
  385. null=False,
  386. help_text="document processors",
  387. index=True)
  388. credit = IntegerField(default=512, index=True)
  389. status = CharField(
  390. max_length=1,
  391. null=True,
  392. help_text="is it validate(0: wasted,1: validate)",
  393. default="1",
  394. index=True)
  395. class Meta:
  396. db_table = "tenant"
  397. class UserTenant(DataBaseModel):
  398. id = CharField(max_length=32, primary_key=True)
  399. user_id = CharField(max_length=32, null=False, index=True)
  400. tenant_id = CharField(max_length=32, null=False, index=True)
  401. role = CharField(max_length=32, null=False, help_text="UserTenantRole", index=True)
  402. invited_by = CharField(max_length=32, null=False, index=True)
  403. status = CharField(
  404. max_length=1,
  405. null=True,
  406. help_text="is it validate(0: wasted,1: validate)",
  407. default="1",
  408. index=True)
  409. class Meta:
  410. db_table = "user_tenant"
  411. class InvitationCode(DataBaseModel):
  412. id = CharField(max_length=32, primary_key=True)
  413. code = CharField(max_length=32, null=False, index=True)
  414. visit_time = DateTimeField(null=True, index=True)
  415. user_id = CharField(max_length=32, null=True, index=True)
  416. tenant_id = CharField(max_length=32, null=True, index=True)
  417. status = CharField(
  418. max_length=1,
  419. null=True,
  420. help_text="is it validate(0: wasted,1: validate)",
  421. default="1",
  422. index=True)
  423. class Meta:
  424. db_table = "invitation_code"
  425. class LLMFactories(DataBaseModel):
  426. name = CharField(
  427. max_length=128,
  428. null=False,
  429. help_text="LLM factory name",
  430. primary_key=True)
  431. logo = TextField(null=True, help_text="llm logo base64")
  432. tags = CharField(
  433. max_length=255,
  434. null=False,
  435. help_text="LLM, Text Embedding, Image2Text, ASR",
  436. index=True)
  437. status = CharField(
  438. max_length=1,
  439. null=True,
  440. help_text="is it validate(0: wasted,1: validate)",
  441. default="1",
  442. index=True)
  443. def __str__(self):
  444. return self.name
  445. class Meta:
  446. db_table = "llm_factories"
  447. class LLM(DataBaseModel):
  448. # LLMs dictionary
  449. llm_name = CharField(
  450. max_length=128,
  451. null=False,
  452. help_text="LLM name",
  453. index=True)
  454. model_type = CharField(
  455. max_length=128,
  456. null=False,
  457. help_text="LLM, Text Embedding, Image2Text, ASR",
  458. index=True)
  459. fid = CharField(max_length=128, null=False, help_text="LLM factory id", index=True)
  460. max_tokens = IntegerField(default=0)
  461. tags = CharField(
  462. max_length=255,
  463. null=False,
  464. help_text="LLM, Text Embedding, Image2Text, Chat, 32k...",
  465. index=True)
  466. status = CharField(
  467. max_length=1,
  468. null=True,
  469. help_text="is it validate(0: wasted,1: validate)",
  470. default="1",
  471. index=True)
  472. def __str__(self):
  473. return self.llm_name
  474. class Meta:
  475. primary_key = CompositeKey('fid', 'llm_name')
  476. db_table = "llm"
  477. class TenantLLM(DataBaseModel):
  478. tenant_id = CharField(max_length=32, null=False, index=True)
  479. llm_factory = CharField(
  480. max_length=128,
  481. null=False,
  482. help_text="LLM factory name",
  483. index=True)
  484. model_type = CharField(
  485. max_length=128,
  486. null=True,
  487. help_text="LLM, Text Embedding, Image2Text, ASR",
  488. index=True)
  489. llm_name = CharField(
  490. max_length=128,
  491. null=True,
  492. help_text="LLM name",
  493. default="",
  494. index=True)
  495. api_key = CharField(max_length=1024, null=True, help_text="API KEY", index=True)
  496. api_base = CharField(max_length=255, null=True, help_text="API Base")
  497. used_tokens = IntegerField(default=0, index=True)
  498. def __str__(self):
  499. return self.llm_name
  500. class Meta:
  501. db_table = "tenant_llm"
  502. primary_key = CompositeKey('tenant_id', 'llm_factory', 'llm_name')
  503. class Knowledgebase(DataBaseModel):
  504. id = CharField(max_length=32, primary_key=True)
  505. avatar = TextField(null=True, help_text="avatar base64 string")
  506. tenant_id = CharField(max_length=32, null=False, index=True)
  507. name = CharField(
  508. max_length=128,
  509. null=False,
  510. help_text="KB name",
  511. index=True)
  512. language = CharField(
  513. max_length=32,
  514. null=True,
  515. default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English",
  516. help_text="English|Chinese",
  517. index=True)
  518. description = TextField(null=True, help_text="KB description")
  519. embd_id = CharField(
  520. max_length=128,
  521. null=False,
  522. help_text="default embedding model ID",
  523. index=True)
  524. permission = CharField(
  525. max_length=16,
  526. null=False,
  527. help_text="me|team",
  528. default="me",
  529. index=True)
  530. created_by = CharField(max_length=32, null=False, index=True)
  531. doc_num = IntegerField(default=0, index=True)
  532. token_num = IntegerField(default=0, index=True)
  533. chunk_num = IntegerField(default=0, index=True)
  534. similarity_threshold = FloatField(default=0.2, index=True)
  535. vector_similarity_weight = FloatField(default=0.3, index=True)
  536. parser_id = CharField(
  537. max_length=32,
  538. null=False,
  539. help_text="default parser ID",
  540. default=ParserType.NAIVE.value,
  541. index=True)
  542. parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
  543. status = CharField(
  544. max_length=1,
  545. null=True,
  546. help_text="is it validate(0: wasted,1: validate)",
  547. default="1",
  548. index=True)
  549. def __str__(self):
  550. return self.name
  551. class Meta:
  552. db_table = "knowledgebase"
  553. class Document(DataBaseModel):
  554. id = CharField(max_length=32, primary_key=True)
  555. thumbnail = TextField(null=True, help_text="thumbnail base64 string")
  556. kb_id = CharField(max_length=256, null=False, index=True)
  557. parser_id = CharField(
  558. max_length=32,
  559. null=False,
  560. help_text="default parser ID",
  561. index=True)
  562. parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
  563. source_type = CharField(
  564. max_length=128,
  565. null=False,
  566. default="local",
  567. help_text="where dose this document come from",
  568. index=True)
  569. type = CharField(max_length=32, null=False, help_text="file extension",
  570. index=True)
  571. created_by = CharField(
  572. max_length=32,
  573. null=False,
  574. help_text="who created it",
  575. index=True)
  576. name = CharField(
  577. max_length=255,
  578. null=True,
  579. help_text="file name",
  580. index=True)
  581. location = CharField(
  582. max_length=255,
  583. null=True,
  584. help_text="where dose it store",
  585. index=True)
  586. size = IntegerField(default=0, index=True)
  587. token_num = IntegerField(default=0, index=True)
  588. chunk_num = IntegerField(default=0, index=True)
  589. progress = FloatField(default=0, index=True)
  590. progress_msg = TextField(
  591. null=True,
  592. help_text="process message",
  593. default="")
  594. process_begin_at = DateTimeField(null=True, index=True)
  595. process_duation = FloatField(default=0)
  596. run = CharField(
  597. max_length=1,
  598. null=True,
  599. help_text="start to run processing or cancel.(1: run it; 2: cancel)",
  600. default="0",
  601. index=True)
  602. status = CharField(
  603. max_length=1,
  604. null=True,
  605. help_text="is it validate(0: wasted,1: validate)",
  606. default="1",
  607. index=True)
  608. class Meta:
  609. db_table = "document"
  610. class File(DataBaseModel):
  611. id = CharField(
  612. max_length=32,
  613. primary_key=True)
  614. parent_id = CharField(
  615. max_length=32,
  616. null=False,
  617. help_text="parent folder id",
  618. index=True)
  619. tenant_id = CharField(
  620. max_length=32,
  621. null=False,
  622. help_text="tenant id",
  623. index=True)
  624. created_by = CharField(
  625. max_length=32,
  626. null=False,
  627. help_text="who created it",
  628. index=True)
  629. name = CharField(
  630. max_length=255,
  631. null=False,
  632. help_text="file name or folder name",
  633. index=True)
  634. location = CharField(
  635. max_length=255,
  636. null=True,
  637. help_text="where dose it store",
  638. index=True)
  639. size = IntegerField(default=0, index=True)
  640. type = CharField(max_length=32, null=False, help_text="file extension", index=True)
  641. source_type = CharField(
  642. max_length=128,
  643. null=False,
  644. default="",
  645. help_text="where dose this document come from", index=True)
  646. class Meta:
  647. db_table = "file"
  648. class File2Document(DataBaseModel):
  649. id = CharField(
  650. max_length=32,
  651. primary_key=True)
  652. file_id = CharField(
  653. max_length=32,
  654. null=True,
  655. help_text="file id",
  656. index=True)
  657. document_id = CharField(
  658. max_length=32,
  659. null=True,
  660. help_text="document id",
  661. index=True)
  662. class Meta:
  663. db_table = "file2document"
  664. class Task(DataBaseModel):
  665. id = CharField(max_length=32, primary_key=True)
  666. doc_id = CharField(max_length=32, null=False, index=True)
  667. from_page = IntegerField(default=0)
  668. to_page = IntegerField(default=-1)
  669. begin_at = DateTimeField(null=True, index=True)
  670. process_duation = FloatField(default=0)
  671. progress = FloatField(default=0, index=True)
  672. progress_msg = TextField(
  673. null=True,
  674. help_text="process message",
  675. default="")
  676. retry_count = IntegerField(default=0)
  677. class Dialog(DataBaseModel):
  678. id = CharField(max_length=32, primary_key=True)
  679. tenant_id = CharField(max_length=32, null=False, index=True)
  680. name = CharField(
  681. max_length=255,
  682. null=True,
  683. help_text="dialog application name",
  684. index=True)
  685. description = TextField(null=True, help_text="Dialog description")
  686. icon = TextField(null=True, help_text="icon base64 string")
  687. language = CharField(
  688. max_length=32,
  689. null=True,
  690. default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English",
  691. help_text="English|Chinese",
  692. index=True)
  693. llm_id = CharField(max_length=128, null=False, help_text="default llm ID")
  694. llm_setting = JSONField(null=False, default={"temperature": 0.1, "top_p": 0.3, "frequency_penalty": 0.7,
  695. "presence_penalty": 0.4, "max_tokens": 512})
  696. prompt_type = CharField(
  697. max_length=16,
  698. null=False,
  699. default="simple",
  700. help_text="simple|advanced",
  701. index=True)
  702. prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
  703. "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
  704. similarity_threshold = FloatField(default=0.2)
  705. vector_similarity_weight = FloatField(default=0.3)
  706. top_n = IntegerField(default=6)
  707. top_k = IntegerField(default=1024)
  708. do_refer = CharField(
  709. max_length=1,
  710. null=False,
  711. help_text="it needs to insert reference index into answer or not")
  712. rerank_id = CharField(
  713. max_length=128,
  714. null=False,
  715. help_text="default rerank model ID")
  716. kb_ids = JSONField(null=False, default=[])
  717. status = CharField(
  718. max_length=1,
  719. null=True,
  720. help_text="is it validate(0: wasted,1: validate)",
  721. default="1",
  722. index=True)
  723. class Meta:
  724. db_table = "dialog"
  725. class Conversation(DataBaseModel):
  726. id = CharField(max_length=32, primary_key=True)
  727. dialog_id = CharField(max_length=32, null=False, index=True)
  728. name = CharField(max_length=255, null=True, help_text="converastion name", index=True)
  729. message = JSONField(null=True)
  730. reference = JSONField(null=True, default=[])
  731. class Meta:
  732. db_table = "conversation"
  733. class APIToken(DataBaseModel):
  734. tenant_id = CharField(max_length=32, null=False, index=True)
  735. token = CharField(max_length=255, null=False, index=True)
  736. dialog_id = CharField(max_length=32, null=False, index=True)
  737. source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)
  738. class Meta:
  739. db_table = "api_token"
  740. primary_key = CompositeKey('tenant_id', 'token')
  741. class API4Conversation(DataBaseModel):
  742. id = CharField(max_length=32, primary_key=True)
  743. dialog_id = CharField(max_length=32, null=False, index=True)
  744. user_id = CharField(max_length=255, null=False, help_text="user_id", index=True)
  745. message = JSONField(null=True)
  746. reference = JSONField(null=True, default=[])
  747. tokens = IntegerField(default=0)
  748. source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)
  749. duration = FloatField(default=0, index=True)
  750. round = IntegerField(default=0, index=True)
  751. thumb_up = IntegerField(default=0, index=True)
  752. class Meta:
  753. db_table = "api_4_conversation"
  754. class UserCanvas(DataBaseModel):
  755. id = CharField(max_length=32, primary_key=True)
  756. avatar = TextField(null=True, help_text="avatar base64 string")
  757. user_id = CharField(max_length=255, null=False, help_text="user_id", index=True)
  758. title = CharField(max_length=255, null=True, help_text="Canvas title")
  759. description = TextField(null=True, help_text="Canvas description")
  760. canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True)
  761. dsl = JSONField(null=True, default={})
  762. class Meta:
  763. db_table = "user_canvas"
  764. class CanvasTemplate(DataBaseModel):
  765. id = CharField(max_length=32, primary_key=True)
  766. avatar = TextField(null=True, help_text="avatar base64 string")
  767. title = CharField(max_length=255, null=True, help_text="Canvas title")
  768. description = TextField(null=True, help_text="Canvas description")
  769. canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True)
  770. dsl = JSONField(null=True, default={})
  771. class Meta:
  772. db_table = "canvas_template"
  773. def migrate_db():
  774. with DB.transaction():
  775. migrator = MySQLMigrator(DB)
  776. try:
  777. migrate(
  778. migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="",
  779. help_text="where dose this document come from",
  780. index=True))
  781. )
  782. except Exception as e:
  783. pass
  784. try:
  785. migrate(
  786. migrator.add_column('tenant', 'rerank_id',
  787. CharField(max_length=128, null=False, default="BAAI/bge-reranker-v2-m3",
  788. help_text="default rerank model ID"))
  789. )
  790. except Exception as e:
  791. pass
  792. try:
  793. migrate(
  794. migrator.add_column('dialog', 'rerank_id', CharField(max_length=128, null=False, default="",
  795. help_text="default rerank model ID"))
  796. )
  797. except Exception as e:
  798. pass
  799. try:
  800. migrate(
  801. migrator.add_column('dialog', 'top_k', IntegerField(default=1024))
  802. )
  803. except Exception as e:
  804. pass
  805. try:
  806. migrate(
  807. migrator.alter_column_type('tenant_llm', 'api_key',
  808. CharField(max_length=1024, null=True, help_text="API KEY", index=True))
  809. )
  810. except Exception as e:
  811. pass
  812. try:
  813. migrate(
  814. migrator.add_column('api_token', 'source',
  815. CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True))
  816. )
  817. except Exception as e:
  818. pass
  819. try:
  820. migrate(
  821. migrator.add_column("tenant","tts_id",
  822. CharField(max_length=256,null=True,help_text="default tts model ID",index=True))
  823. )
  824. except Exception as e:
  825. pass
  826. try:
  827. migrate(
  828. migrator.add_column('api_4_conversation', 'source',
  829. CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True))
  830. )
  831. except Exception as e:
  832. pass
  833. try:
  834. DB.execute_sql('ALTER TABLE llm DROP PRIMARY KEY;')
  835. DB.execute_sql('ALTER TABLE llm ADD PRIMARY KEY (llm_name,fid);')
  836. except Exception as e:
  837. pass
  838. try:
  839. migrate(
  840. migrator.add_column('task', 'retry_count', IntegerField(default=0))
  841. )
  842. except Exception as e:
  843. pass