### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Zhedong Cen <cenzhedong2@126.com>

pirms 1 gada · e34817c2a9
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
            "tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
            "status": "1",
            "llm": []
        },
        {
            "name": "cohere",
            "logo": "",
            "tags": "LLM,TEXT EMBEDDING, TEXT RE-RANK",
            "status": "1",
            "llm": [
                {
                    "llm_name": "command-r-plus",
                    "tags": "LLM,CHAT,128k",
                    "max_tokens": 131072,
                    "model_type": "chat"
                },
                {
                    "llm_name": "command-r",
                    "tags": "LLM,CHAT,128k",
                    "max_tokens": 131072,
                    "model_type": "chat"
                },
                {
                    "llm_name": "command",
                    "tags": "LLM,CHAT,4k",
                    "max_tokens": 4096,
                    "model_type": "chat"
                },
                {
                    "llm_name": "command-nightly",
                    "tags": "LLM,CHAT,128k",
                    "max_tokens": 131072,
                    "model_type": "chat"
                },
                {
                    "llm_name": "command-light",
                    "tags": "LLM,CHAT,4k",
                    "max_tokens": 4096,
                    "model_type": "chat"
                },
                {
                    "llm_name": "command-light-nightly",
                    "tags": "LLM,CHAT,4k",
                    "max_tokens": 4096,
                    "model_type": "chat"
                },
                {
                    "llm_name": "embed-english-v3.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 512,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "embed-english-light-v3.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 512,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "embed-multilingual-v3.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 512,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "embed-multilingual-light-v3.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 512,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "embed-english-v2.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 512,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "embed-english-light-v2.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 512,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "embed-multilingual-v2.0",
                    "tags": "TEXT EMBEDDING",
                    "max_tokens": 256,
                    "model_type": "embedding"
                },
                {
                    "llm_name": "rerank-english-v3.0",
                    "tags": "RE-RANK,4k",
                    "max_tokens": 4096,
                    "model_type": "rerank"
                },
                {
                    "llm_name": "rerank-multilingual-v3.0",
                    "tags": "RE-RANK,4k",
                    "max_tokens": 4096,
                    "model_type": "rerank"
                },
                {
                    "llm_name": "rerank-english-v2.0",
                    "tags": "RE-RANK,512",
                    "max_tokens": 8196,
                    "model_type": "rerank"
                },
                {
                    "llm_name": "rerank-multilingual-v2.0",
                    "tags": "RE-RANK,512",
                    "max_tokens": 512,
                    "model_type": "rerank"
                }
            ]
        }
    ]
 }
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
    "Gemini": GeminiEmbed,
    "NVIDIA": NvidiaEmbed,
    "LM-Studio": LmStudioEmbed,
    "OpenAI-API-Compatible": OpenAI_APIEmbed
    "OpenAI-API-Compatible": OpenAI_APIEmbed,
    "cohere": CoHereEmbed
 }
    "StepFun": StepFunChat,
    "NVIDIA": NvidiaChat,
    "LM-Studio": LmStudioChat,
    "OpenAI-API-Compatible": OpenAI_APIChat
    "OpenAI-API-Compatible": OpenAI_APIChat,
    "cohere": CoHereChat
 }
    "Xinference": XInferenceRerank,
    "NVIDIA": NvidiaRerank,
    "LM-Studio": LmStudioRerank,
    "OpenAI-API-Compatible": OpenAI_APIRerank
    "OpenAI-API-Compatible": OpenAI_APIRerank,
    "cohere": CoHereRerank
 }
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
            base_url = os.path.join(base_url, "v1")
        model_name = model_name.split("___")[0]
        super().__init__(key, model_name, base_url)
 class CoHereChat(Base):
    def __init__(self, key, model_name, base_url=""):
        from cohere import Client
        self.client = Client(api_key=key)
        self.model_name = model_name
    def chat(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        if "top_p" in gen_conf:
            gen_conf["p"] = gen_conf.pop("top_p")
        if "frequency_penalty" in gen_conf and "presence_penalty" in gen_conf:
            gen_conf.pop("presence_penalty")
        for item in history:
            if "role" in item and item["role"] == "user":
                item["role"] = "USER"
            if "role" in item and item["role"] == "assistant":
                item["role"] = "CHATBOT"
            if "content" in item:
                item["message"] = item.pop("content")
        mes = history.pop()["message"]
        ans = ""
        try:
            response = self.client.chat(
                model=self.model_name, chat_history=history, message=mes, **gen_conf
            )
            ans = response.text
            if response.finish_reason == "MAX_TOKENS":
                ans += (
                    "...\nFor the content length reason, it stopped, continue?"
                    if is_english([ans])
                    else "······\n由于长度的原因，回答被截断了，要继续吗？"
                )
            return (
                ans,
                response.meta.tokens.input_tokens + response.meta.tokens.output_tokens,
            )
        except Exception as e:
            return ans + "\n**ERROR**: " + str(e), 0
    def chat_streamly(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        if "top_p" in gen_conf:
            gen_conf["p"] = gen_conf.pop("top_p")
        if "frequency_penalty" in gen_conf and "presence_penalty" in gen_conf:
            gen_conf.pop("presence_penalty")
        for item in history:
            if "role" in item and item["role"] == "user":
                item["role"] = "USER"
            if "role" in item and item["role"] == "assistant":
                item["role"] = "CHATBOT"
            if "content" in item:
                item["message"] = item.pop("content")
        mes = history.pop()["message"]
        ans = ""
        total_tokens = 0
        try:
            response = self.client.chat_stream(
                model=self.model_name, chat_history=history, message=mes, **gen_conf
            )
            for resp in response:
                if resp.event_type == "text-generation":
                    ans += resp.text
                    total_tokens += num_tokens_from_string(resp.text)
                elif resp.event_type == "stream-end":
                    if resp.finish_reason == "MAX_TOKENS":
                        ans += (
                            "...\nFor the content length reason, it stopped, continue?"
                            if is_english([ans])
                            else "······\n由于长度的原因，回答被截断了，要继续吗？"
                        )
                yield ans
        except Exception as e:
            yield ans + "\n**ERROR**: " + str(e)
        yield total_tokens
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
        if base_url.split("/")[-1] != "v1":
            base_url = os.path.join(base_url, "v1")
        self.client = OpenAI(api_key=key, base_url=base_url)
        self.model_name = model_name.split("___")[0]
        self.model_name = model_name.split("___")[0]
 class CoHereEmbed(Base):
    def __init__(self, key, model_name, base_url=None):
        from cohere import Client
        self.client = Client(api_key=key)
        self.model_name = model_name
    def encode(self, texts: list, batch_size=32):
        res = self.client.embed(
            texts=texts,
            model=self.model_name,
            input_type="search_query",
            embedding_types=["float"],
        )
        return np.array([d for d in res.embeddings.float]), int(
            res.meta.billed_units.input_tokens
        )
    def encode_queries(self, text):
        res = self.client.embed(
            texts=[text],
            model=self.model_name,
            input_type="search_query",
            embedding_types=["float"],
        )
        return np.array([d for d in res.embeddings.float]), int(
            res.meta.billed_units.input_tokens
        )
--- a/rag/llm/rerank_model.py
+++ b/rag/llm/rerank_model.py
            "top_n": len(texts),
        }
        res = requests.post(self.base_url, headers=self.headers, json=data).json()
        return (np.array([d["logit"] for d in res["rankings"]]), token_count)
        rank = np.array([d["logit"] for d in res["rankings"]])
        indexs = [d["index"] for d in res["rankings"]]
        return rank[indexs], token_count
 class LmStudioRerank(Base):
    def similarity(self, query: str, texts: list):
        raise NotImplementedError("The api has not been implement")
 class CoHereRerank(Base):
    def __init__(self, key, model_name, base_url=None):
        from cohere import Client
        self.client = Client(api_key=key)
        self.model_name = model_name
    def similarity(self, query: str, texts: list):
        token_count = num_tokens_from_string(query) + sum(
            [num_tokens_from_string(t) for t in texts]
        )
        res = self.client.rerank(
            model=self.model_name,
            query=query,
            documents=texts,
            top_n=len(texts),
            return_documents=False,
        )
        rank = np.array([d.relevance_score for d in res.results])
        indexs = [d.index for d in res.results]
        return rank[indexs], token_count
--- a/requirements.txt
+++ b/requirements.txt
 cachetools==5.3.3
 chardet==5.2.0
 cn2an==0.5.22
 cohere==5.6.2
 dashscope==1.14.1
 datrie==0.8.2
 demjson3==3.0.6
--- a/requirements_arm.txt
+++ b/requirements_arm.txt
 cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
 cohere==5.6.2
 coloredlogs==15.0.1
 cryptography==42.0.5
 dashscope==1.14.1
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
 cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
 cohere==5.6.2
 coloredlogs==15.0.1
 cryptography==42.0.5
 dashscope==1.14.1
--- a/web/src/assets/svg/llm/cohere.svg
+++ b/web/src/assets/svg/llm/cohere.svg
 <svg xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" xml:space="preserve" style="enable-background:new 0 0 75 75" viewBox="0 0 75 75" width="75"  height="75" ><path d="M24.3 44.7c2 0 6-.1 11.6-2.4 6.5-2.7 19.3-7.5 28.6-12.5 6.5-3.5 9.3-8.1 9.3-14.3C73.8 7 66.9 0 58.3 0h-36C10 0 0 10 0 22.3s9.4 22.4 24.3 22.4z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#39594d"/><path d="M30.4 60c0-6 3.6-11.5 9.2-13.8l11.3-4.7C62.4 36.8 75 45.2 75 57.6 75 67.2 67.2 75 57.6 75H45.3c-8.2 0-14.9-6.7-14.9-15z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#d18ee2"/><path d="M12.9 47.6C5.8 47.6 0 53.4 0 60.5v1.7C0 69.2 5.8 75 12.9 75c7.1 0 12.9-5.8 12.9-12.9v-1.7c-.1-7-5.8-12.8-12.9-12.8z" style="fill:#ff7759"/></svg>
--- a/web/src/pages/user-setting/setting-model/constant.ts
+++ b/web/src/pages/user-setting/setting-model/constant.ts
  StepFun: 'stepfun',
  NVIDIA:'nvidia',
  'LM-Studio':'lm-studio',
  'OpenAI-API-Compatible':'openai-api'
  'OpenAI-API-Compatible':'openai-api',
  'cohere':'cohere'
 };
 export const BedrockRegionList = [

					<svg xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" xml:space="preserve" style="enable-background:new 0 0 75 75" viewBox="0 0 75 75" width="75" height="75" ><path d="M24.3 44.7c2 0 6-.1 11.6-2.4 6.5-2.7 19.3-7.5 28.6-12.5 6.5-3.5 9.3-8.1 9.3-14.3C73.8 7 66.9 0 58.3 0h-36C10 0 0 10 0 22.3s9.4 22.4 24.3 22.4z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#39594d"/><path d="M30.4 60c0-6 3.6-11.5 9.2-13.8l11.3-4.7C62.4 36.8 75 45.2 75 57.6 75 67.2 67.2 75 57.6 75H45.3c-8.2 0-14.9-6.7-14.9-15z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#d18ee2"/><path d="M12.9 47.6C5.8 47.6 0 53.4 0 60.5v1.7C0 69.2 5.8 75 12.9 75c7.1 0 12.9-5.8 12.9-12.9v-1.7c-.1-7-5.8-12.8-12.9-12.8z" style="fill:#ff7759"/></svg>