Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

utils.ts 9.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. import {
  2. uniq,
  3. xorBy,
  4. } from 'lodash-es'
  5. import type { MultipleRetrievalConfig } from './types'
  6. import type {
  7. DataSet,
  8. SelectedDatasetsMode,
  9. } from '@/models/datasets'
  10. import {
  11. DEFAULT_WEIGHTED_SCORE,
  12. RerankingModeEnum,
  13. WeightedScoreEnum,
  14. } from '@/models/datasets'
  15. import { RETRIEVE_METHOD } from '@/types/app'
  16. import { DATASET_DEFAULT } from '@/config'
  17. export const checkNodeValid = () => {
  18. return true
  19. }
  20. export const getSelectedDatasetsMode = (datasets: DataSet[] = []) => {
  21. if (datasets === null)
  22. datasets = []
  23. let allHighQuality = true
  24. let allHighQualityVectorSearch = true
  25. let allHighQualityFullTextSearch = true
  26. let allEconomic = true
  27. let mixtureHighQualityAndEconomic = true
  28. let allExternal = true
  29. let allInternal = true
  30. let mixtureInternalAndExternal = true
  31. let inconsistentEmbeddingModel = false
  32. if (!datasets.length) {
  33. allHighQuality = false
  34. allHighQualityVectorSearch = false
  35. allHighQualityFullTextSearch = false
  36. allEconomic = false
  37. mixtureHighQualityAndEconomic = false
  38. allExternal = false
  39. allInternal = false
  40. mixtureInternalAndExternal = false
  41. }
  42. datasets.forEach((dataset) => {
  43. if (dataset.indexing_technique === 'economy') {
  44. allHighQuality = false
  45. allHighQualityVectorSearch = false
  46. allHighQualityFullTextSearch = false
  47. }
  48. if (dataset.indexing_technique === 'high_quality') {
  49. allEconomic = false
  50. if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic)
  51. allHighQualityVectorSearch = false
  52. if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
  53. allHighQualityFullTextSearch = false
  54. }
  55. if (dataset.provider !== 'external') {
  56. allExternal = false
  57. }
  58. else {
  59. allInternal = false
  60. allHighQuality = false
  61. allHighQualityVectorSearch = false
  62. allHighQualityFullTextSearch = false
  63. mixtureHighQualityAndEconomic = false
  64. }
  65. })
  66. if (allExternal || allInternal)
  67. mixtureInternalAndExternal = false
  68. if (allHighQuality || allEconomic)
  69. mixtureHighQualityAndEconomic = false
  70. if (allHighQuality)
  71. inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1
  72. return {
  73. allHighQuality,
  74. allHighQualityVectorSearch,
  75. allHighQualityFullTextSearch,
  76. allEconomic,
  77. mixtureHighQualityAndEconomic,
  78. allInternal,
  79. allExternal,
  80. mixtureInternalAndExternal,
  81. inconsistentEmbeddingModel,
  82. } as SelectedDatasetsMode
  83. }
  84. export const getMultipleRetrievalConfig = (
  85. multipleRetrievalConfig: MultipleRetrievalConfig,
  86. selectedDatasets: DataSet[],
  87. originalDatasets: DataSet[],
  88. fallbackRerankModel?: { provider?: string; model?: string }, // fallback rerank model
  89. ) => {
  90. // Check if the selected datasets are different from the original datasets
  91. const isDatasetsChanged = xorBy(selectedDatasets, originalDatasets, 'id').length > 0
  92. // Check if the rerank model is valid
  93. const isFallbackRerankModelValid = !!(fallbackRerankModel?.provider && fallbackRerankModel?.model)
  94. const {
  95. allHighQuality,
  96. allHighQualityVectorSearch,
  97. allHighQualityFullTextSearch,
  98. allEconomic,
  99. mixtureHighQualityAndEconomic,
  100. allInternal,
  101. allExternal,
  102. mixtureInternalAndExternal,
  103. inconsistentEmbeddingModel,
  104. } = getSelectedDatasetsMode(selectedDatasets)
  105. const {
  106. top_k = DATASET_DEFAULT.top_k,
  107. score_threshold,
  108. reranking_mode,
  109. reranking_model,
  110. weights,
  111. reranking_enable,
  112. } = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k }
  113. const result = {
  114. top_k,
  115. score_threshold,
  116. reranking_mode,
  117. reranking_model,
  118. weights,
  119. reranking_enable,
  120. }
  121. const setDefaultWeights = () => {
  122. result.weights = {
  123. weight_type: WeightedScoreEnum.Customized,
  124. vector_setting: {
  125. vector_weight: allHighQualityVectorSearch
  126. ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic
  127. // eslint-disable-next-line sonarjs/no-nested-conditional
  128. : allHighQualityFullTextSearch
  129. ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic
  130. : DEFAULT_WEIGHTED_SCORE.other.semantic,
  131. embedding_provider_name: selectedDatasets[0].embedding_model_provider,
  132. embedding_model_name: selectedDatasets[0].embedding_model,
  133. },
  134. keyword_setting: {
  135. keyword_weight: allHighQualityVectorSearch
  136. ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword
  137. // eslint-disable-next-line sonarjs/no-nested-conditional
  138. : allHighQualityFullTextSearch
  139. ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword
  140. : DEFAULT_WEIGHTED_SCORE.other.keyword,
  141. },
  142. }
  143. }
  144. /**
  145. * In this case, user can manually toggle reranking
  146. * So should keep the reranking_enable value
  147. * But the default reranking_model should be set
  148. */
  149. if ((allEconomic && allInternal) || allExternal) {
  150. result.reranking_mode = RerankingModeEnum.RerankingModel
  151. // Need to check if the reranking model should be set to default when first time initialized
  152. if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
  153. result.reranking_model = {
  154. provider: fallbackRerankModel.provider || '',
  155. model: fallbackRerankModel.model || '',
  156. }
  157. }
  158. result.reranking_enable = reranking_enable
  159. }
  160. /**
  161. * In this case, reranking_enable must be true
  162. * And if rerank model is not set, should set the default rerank model
  163. */
  164. if (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || mixtureInternalAndExternal) {
  165. result.reranking_mode = RerankingModeEnum.RerankingModel
  166. // Need to check if the reranking model should be set to default when first time initialized
  167. if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
  168. result.reranking_model = {
  169. provider: fallbackRerankModel.provider || '',
  170. model: fallbackRerankModel.model || '',
  171. }
  172. }
  173. result.reranking_enable = true
  174. }
  175. /**
  176. * In this case, user can choose to use weighted score or rerank model
  177. * But if the reranking_mode is not initialized, should set the default rerank model and reranking_enable to true
  178. * and set reranking_mode to reranking_model
  179. */
  180. if (allHighQuality && !inconsistentEmbeddingModel && allInternal) {
  181. // If not initialized, check if the default rerank model is valid
  182. if (!reranking_mode) {
  183. if (isFallbackRerankModelValid) {
  184. result.reranking_mode = RerankingModeEnum.RerankingModel
  185. result.reranking_enable = true
  186. result.reranking_model = {
  187. provider: fallbackRerankModel.provider || '',
  188. model: fallbackRerankModel.model || '',
  189. }
  190. }
  191. else {
  192. result.reranking_mode = RerankingModeEnum.WeightedScore
  193. result.reranking_enable = false
  194. setDefaultWeights()
  195. }
  196. }
  197. // After initialization, if datasets has no change, make sure the config has correct value
  198. if (reranking_mode === RerankingModeEnum.WeightedScore) {
  199. result.reranking_enable = false
  200. if (!weights)
  201. setDefaultWeights()
  202. }
  203. if (reranking_mode === RerankingModeEnum.RerankingModel) {
  204. if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
  205. result.reranking_model = {
  206. provider: fallbackRerankModel.provider || '',
  207. model: fallbackRerankModel.model || '',
  208. }
  209. }
  210. result.reranking_enable = true
  211. }
  212. // Need to check if reranking_mode should be set to reranking_model when datasets changed
  213. if (reranking_mode === RerankingModeEnum.WeightedScore && weights && isDatasetsChanged) {
  214. if ((result.reranking_model?.provider && result.reranking_model?.model) || isFallbackRerankModelValid) {
  215. result.reranking_mode = RerankingModeEnum.RerankingModel
  216. result.reranking_enable = true
  217. // eslint-disable-next-line sonarjs/nested-control-flow
  218. if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
  219. result.reranking_model = {
  220. provider: fallbackRerankModel.provider || '',
  221. model: fallbackRerankModel.model || '',
  222. }
  223. }
  224. }
  225. else {
  226. setDefaultWeights()
  227. }
  228. }
  229. // Need to switch to weighted score when reranking model is not valid and datasets changed
  230. if (
  231. reranking_mode === RerankingModeEnum.RerankingModel
  232. && (!result.reranking_model?.provider || !result.reranking_model?.model)
  233. && !isFallbackRerankModelValid
  234. && isDatasetsChanged
  235. ) {
  236. result.reranking_mode = RerankingModeEnum.WeightedScore
  237. result.reranking_enable = false
  238. setDefaultWeights()
  239. }
  240. }
  241. return result
  242. }
  243. export const checkoutRerankModelConfiguredInRetrievalSettings = (
  244. datasets: DataSet[],
  245. multipleRetrievalConfig?: MultipleRetrievalConfig,
  246. ) => {
  247. if (!multipleRetrievalConfig)
  248. return true
  249. const {
  250. allEconomic,
  251. allExternal,
  252. allInternal,
  253. } = getSelectedDatasetsMode(datasets)
  254. const {
  255. reranking_enable,
  256. reranking_mode,
  257. reranking_model,
  258. } = multipleRetrievalConfig
  259. if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.provider || !reranking_model?.model))
  260. return ((allEconomic && allInternal) || allExternal) && !reranking_enable
  261. return true
  262. }