Bladeren bron

add lighten control (#2567)

### What problem does this PR solve?

#2295

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
tags/v0.12.0
Kevin Hu 1 jaar geleden
bovenliggende
commit
7bb28ca2bd
No account linked to committer's email address
5 gewijzigde bestanden met toevoegingen van 86 en 78 verwijderingen
  1. 3
    1
      api/apps/llm_app.py
  2. 67
    67
      api/settings.py
  3. 5
    3
      deepdoc/parser/pdf_parser.py
  4. 5
    3
      rag/llm/embedding_model.py
  5. 6
    4
      rag/llm/rerank_model.py

+ 3
- 1
api/apps/llm_app.py Bestand weergeven

@@ -18,6 +18,7 @@ import json
from flask import request
from flask_login import login_required, current_user
from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
from api.settings import LIGHTEN
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from api.db import StatusEnum, LLMType
from api.db.db_models import TenantLLM
@@ -319,13 +320,14 @@ def my_llms():
@login_required
def list_app():
self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
weighted = ["Youdao","FastEmbed", "BAAI"] if LIGHTEN else []
model_type = request.args.get("model_type")
try:
objs = TenantLLMService.query(tenant_id=current_user.id)
facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
llms = LLMService.get_all()
llms = [m.to_dict()
for m in llms if m.status == StatusEnum.VALID.value]
for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
for m in llms:
m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied


+ 67
- 67
api/settings.py Bestand weergeven

@@ -42,6 +42,7 @@ RAG_FLOW_SERVICE_NAME = "ragflow"
SERVER_MODULE = "rag_flow_server.py"
TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
LIGHTEN = os.environ.get('LIGHTEN')

SUBPROCESS_STD_LOG_NAME = "std.log"

@@ -57,77 +58,76 @@ REQUEST_MAX_WAIT_SEC = 300

USE_REGISTRY = get_base_config("use_registry")

default_llm = {
"Tongyi-Qianwen": {
"chat_model": "qwen-plus",
"embedding_model": "text-embedding-v2",
"image2text_model": "qwen-vl-max",
"asr_model": "paraformer-realtime-8k-v1",
},
"OpenAI": {
"chat_model": "gpt-3.5-turbo",
"embedding_model": "text-embedding-ada-002",
"image2text_model": "gpt-4-vision-preview",
"asr_model": "whisper-1",
},
"Azure-OpenAI": {
"chat_model": "azure-gpt-35-turbo",
"embedding_model": "azure-text-embedding-ada-002",
"image2text_model": "azure-gpt-4-vision-preview",
"asr_model": "azure-whisper-1",
},
"ZHIPU-AI": {
"chat_model": "glm-3-turbo",
"embedding_model": "embedding-2",
"image2text_model": "glm-4v",
"asr_model": "",
},
"Ollama": {
"chat_model": "qwen-14B-chat",
"embedding_model": "flag-embedding",
"image2text_model": "",
"asr_model": "",
},
"Moonshot": {
"chat_model": "moonshot-v1-8k",
"embedding_model": "",
"image2text_model": "",
"asr_model": "",
},
"DeepSeek": {
"chat_model": "deepseek-chat",
"embedding_model": "",
"image2text_model": "",
"asr_model": "",
},
"VolcEngine": {
"chat_model": "",
"embedding_model": "",
"image2text_model": "",
"asr_model": "",
},
"BAAI": {
"chat_model": "",
"embedding_model": "BAAI/bge-large-zh-v1.5",
"image2text_model": "",
"asr_model": "",
"rerank_model": "BAAI/bge-reranker-v2-m3",
}
}
LLM = get_base_config("user_default_llm", {})
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
LLM_BASE_URL = LLM.get("base_url")

if LLM_FACTORY not in default_llm:
print(
"\33[91m【ERROR】\33[0m:",
f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
LLM_FACTORY = "Tongyi-Qianwen"
CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
RERANK_MDL = default_llm["BAAI"]["rerank_model"]
ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
if not LIGHTEN:
default_llm = {
"Tongyi-Qianwen": {
"chat_model": "qwen-plus",
"embedding_model": "text-embedding-v2",
"image2text_model": "qwen-vl-max",
"asr_model": "paraformer-realtime-8k-v1",
},
"OpenAI": {
"chat_model": "gpt-3.5-turbo",
"embedding_model": "text-embedding-ada-002",
"image2text_model": "gpt-4-vision-preview",
"asr_model": "whisper-1",
},
"Azure-OpenAI": {
"chat_model": "azure-gpt-35-turbo",
"embedding_model": "azure-text-embedding-ada-002",
"image2text_model": "azure-gpt-4-vision-preview",
"asr_model": "azure-whisper-1",
},
"ZHIPU-AI": {
"chat_model": "glm-3-turbo",
"embedding_model": "embedding-2",
"image2text_model": "glm-4v",
"asr_model": "",
},
"Ollama": {
"chat_model": "qwen-14B-chat",
"embedding_model": "flag-embedding",
"image2text_model": "",
"asr_model": "",
},
"Moonshot": {
"chat_model": "moonshot-v1-8k",
"embedding_model": "",
"image2text_model": "",
"asr_model": "",
},
"DeepSeek": {
"chat_model": "deepseek-chat",
"embedding_model": "",
"image2text_model": "",
"asr_model": "",
},
"VolcEngine": {
"chat_model": "",
"embedding_model": "",
"image2text_model": "",
"asr_model": "",
},
"BAAI": {
"chat_model": "",
"embedding_model": "BAAI/bge-large-zh-v1.5",
"image2text_model": "",
"asr_model": "",
"rerank_model": "BAAI/bge-reranker-v2-m3",
}
}

CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
RERANK_MDL = default_llm["BAAI"]["rerank_model"] if not LIGHTEN else ""
ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
else:
CHAT_MDL = EMBEDDING_MDL = RERANK_MDL = ASR_MDL = IMAGE2TEXT_MDL = ""

API_KEY = LLM.get("api_key", "")
PARSERS = LLM.get(

+ 5
- 3
deepdoc/parser/pdf_parser.py Bestand weergeven

@@ -16,7 +16,6 @@ import random

import xgboost as xgb
from io import BytesIO
import torch
import re
import pdfplumber
import logging
@@ -25,6 +24,7 @@ import numpy as np
from timeit import default_timer as timer
from pypdf import PdfReader as pdf2_read

from api.settings import LIGHTEN
from api.utils.file_utils import get_project_base_directory
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
from rag.nlp import rag_tokenizer
@@ -44,8 +44,10 @@ class RAGFlowPdfParser:
self.tbl_det = TableStructureRecognizer()

self.updown_cnt_mdl = xgb.Booster()
if torch.cuda.is_available():
self.updown_cnt_mdl.set_param({"device": "cuda"})
if not LIGHTEN:
import torch
if torch.cuda.is_available():
self.updown_cnt_mdl.set_param({"device": "cuda"})
try:
model_dir = os.path.join(
get_project_base_directory(),

+ 5
- 3
rag/llm/embedding_model.py Bestand weergeven

@@ -25,10 +25,10 @@ from abc import ABC
from ollama import Client
import dashscope
from openai import OpenAI
from FlagEmbedding import FlagModel
import torch
import numpy as np
import asyncio

from api.settings import LIGHTEN
from api.utils.file_utils import get_home_cache_dir
from rag.utils import num_tokens_from_string, truncate
import google.generativeai as genai
@@ -60,8 +60,10 @@ class DefaultEmbedding(Base):
^_-

"""
if not DefaultEmbedding._model:
if not LIGHTEN and not DefaultEmbedding._model:
with DefaultEmbedding._model_lock:
from FlagEmbedding import FlagModel
import torch
if not DefaultEmbedding._model:
try:
DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),

+ 6
- 4
rag/llm/rerank_model.py Bestand weergeven

@@ -14,14 +14,14 @@
# limitations under the License.
#
import re
import threading
import threading
import requests
import torch
from FlagEmbedding import FlagReranker
from huggingface_hub import snapshot_download
import os
from abc import ABC
import numpy as np

from api.settings import LIGHTEN
from api.utils.file_utils import get_home_cache_dir
from rag.utils import num_tokens_from_string, truncate
import json
@@ -53,7 +53,9 @@ class DefaultRerank(Base):
^_-

"""
if not DefaultRerank._model:
if not LIGHTEN and not DefaultRerank._model:
import torch
from FlagEmbedding import FlagReranker
with DefaultRerank._model_lock:
if not DefaultRerank._model:
try:

Laden…
Annuleren
Opslaan