### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/6138 This PR is going to support vision llm for gpustack, modify url path from `/v1-openai` to `/v1` ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.18.0
| @@ -107,7 +107,8 @@ from .cv_model import ( | |||
| YiCV, | |||
| HunyuanCV, | |||
| AnthropicCV, | |||
| SILICONFLOWCV | |||
| SILICONFLOWCV, | |||
| GPUStackCV, | |||
| ) | |||
| from .rerank_model import ( | |||
| @@ -145,7 +146,7 @@ from .tts_model import ( | |||
| SparkTTS, | |||
| XinferenceTTS, | |||
| GPUStackTTS, | |||
| SILICONFLOWTTS | |||
| SILICONFLOWTTS, | |||
| ) | |||
| EmbeddingModel = { | |||
| @@ -202,6 +203,7 @@ CvModel = { | |||
| "Tencent Hunyuan": HunyuanCV, | |||
| "Anthropic": AnthropicCV, | |||
| "SILICONFLOW": SILICONFLOWCV, | |||
| "GPUStack": GPUStackCV, | |||
| } | |||
| ChatModel = { | |||
| @@ -1586,6 +1586,6 @@ class GPUStackChat(Base): | |||
| def __init__(self, key=None, model_name="", base_url=""): | |||
| if not base_url: | |||
| raise ValueError("Local llm url cannot be None") | |||
| if base_url.split("/")[-1] != "v1-openai": | |||
| base_url = os.path.join(base_url, "v1-openai") | |||
| if base_url.split("/")[-1] != "v1": | |||
| base_url = os.path.join(base_url, "v1") | |||
| super().__init__(key, model_name, base_url) | |||
| @@ -1028,4 +1028,14 @@ class AnthropicCV(Base): | |||
| except Exception as e: | |||
| yield ans + "\n**ERROR**: " + str(e) | |||
| yield total_tokens | |||
| yield total_tokens | |||
| class GPUStackCV(GptV4): | |||
| def __init__(self, key, model_name, lang="Chinese", base_url=""): | |||
| if not base_url: | |||
| raise ValueError("Local llm url cannot be None") | |||
| if base_url.split("/")[-1] != "v1": | |||
| base_url = os.path.join(base_url, "v1") | |||
| self.client = OpenAI(api_key=key, base_url=base_url) | |||
| self.model_name = model_name | |||
| self.lang = lang | |||
| @@ -832,9 +832,8 @@ class GPUStackEmbed(OpenAIEmbed): | |||
| def __init__(self, key, model_name, base_url): | |||
| if not base_url: | |||
| raise ValueError("url cannot be None") | |||
| if base_url.split("/")[-1] != "v1-openai": | |||
| base_url = os.path.join(base_url, "v1-openai") | |||
| if base_url.split("/")[-1] != "v1": | |||
| base_url = os.path.join(base_url, "v1") | |||
| print(key,base_url) | |||
| self.client = OpenAI(api_key=key, base_url=base_url) | |||
| self.model_name = model_name | |||
| @@ -198,8 +198,8 @@ class GPUStackSeq2txt(Base): | |||
| def __init__(self, key, model_name, base_url): | |||
| if not base_url: | |||
| raise ValueError("url cannot be None") | |||
| if base_url.split("/")[-1] != "v1-openai": | |||
| base_url = os.path.join(base_url, "v1-openai") | |||
| if base_url.split("/")[-1] != "v1": | |||
| base_url = os.path.join(base_url, "v1") | |||
| self.base_url = base_url | |||
| self.model_name = model_name | |||
| self.key = key | |||
| @@ -378,7 +378,7 @@ class GPUStackTTS: | |||
| } | |||
| response = requests.post( | |||
| f"{self.base_url}/v1-openai/audio/speech", | |||
| f"{self.base_url}/v1/audio/speech", | |||
| headers=self.headers, | |||
| json=payload, | |||
| stream=stream | |||