Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

cv_model.py 29KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import base64
  17. import json
  18. import os
  19. from abc import ABC
  20. from copy import deepcopy
  21. from io import BytesIO
  22. from urllib.parse import urljoin
  23. import requests
  24. from openai import OpenAI
  25. from openai.lib.azure import AzureOpenAI
  26. from zhipuai import ZhipuAI
  27. from rag.nlp import is_english
  28. from rag.prompts import vision_llm_describe_prompt
  29. from rag.utils import num_tokens_from_string
  30. class Base(ABC):
  31. def __init__(self, **kwargs):
  32. # Configure retry parameters
  33. self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5)))
  34. self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))
  35. self.max_rounds = kwargs.get("max_rounds", 5)
  36. self.is_tools = False
  37. self.tools = []
  38. self.toolcall_sessions = {}
  39. def describe(self, image):
  40. raise NotImplementedError("Please implement encode method!")
  41. def describe_with_prompt(self, image, prompt=None):
  42. raise NotImplementedError("Please implement encode method!")
  43. def _form_history(self, system, history, images=[]):
  44. hist = []
  45. if system:
  46. hist.append({"role": "system", "content": system})
  47. for h in history:
  48. if images and h["role"] == "user":
  49. h["content"] = self._image_prompt(h["content"], images)
  50. images = []
  51. hist.append(h)
  52. return hist
  53. def _image_prompt(self, text, images):
  54. if not images:
  55. return text
  56. pmpt = [{"type": "text", "text": text}]
  57. for img in images:
  58. pmpt.append({
  59. "type": "image_url",
  60. "image_url": {
  61. "url": f"data:image/jpeg;base64,{img}" if img[:4] != "data" else img
  62. }
  63. })
  64. return pmpt
  65. def chat(self, system, history, gen_conf, images=[], **kwargs):
  66. try:
  67. response = self.client.chat.completions.create(
  68. model=self.model_name,
  69. messages=self._form_history(system, history, images)
  70. )
  71. return response.choices[0].message.content.strip(), response.usage.total_tokens
  72. except Exception as e:
  73. return "**ERROR**: " + str(e), 0
  74. def chat_streamly(self, system, history, gen_conf, images=[], **kwargs):
  75. ans = ""
  76. tk_count = 0
  77. try:
  78. response = self.client.chat.completions.create(
  79. model=self.model_name,
  80. messages=self._form_history(system, history, images),
  81. stream=True
  82. )
  83. for resp in response:
  84. if not resp.choices[0].delta.content:
  85. continue
  86. delta = resp.choices[0].delta.content
  87. ans = delta
  88. if resp.choices[0].finish_reason == "length":
  89. ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
  90. if resp.choices[0].finish_reason == "stop":
  91. tk_count += resp.usage.total_tokens
  92. yield ans
  93. except Exception as e:
  94. yield ans + "\n**ERROR**: " + str(e)
  95. yield tk_count
  96. @staticmethod
  97. def image2base64(image):
  98. if isinstance(image, bytes):
  99. return base64.b64encode(image).decode("utf-8")
  100. if isinstance(image, BytesIO):
  101. return base64.b64encode(image.getvalue()).decode("utf-8")
  102. buffered = BytesIO()
  103. try:
  104. image.save(buffered, format="JPEG")
  105. except Exception:
  106. image.save(buffered, format="PNG")
  107. return base64.b64encode(buffered.getvalue()).decode("utf-8")
  108. def prompt(self, b64):
  109. return [
  110. {
  111. "role": "user",
  112. "content": self._image_prompt(
  113. "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。"
  114. if self.lang.lower() == "chinese"
  115. else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
  116. b64
  117. )
  118. }
  119. ]
  120. def vision_llm_prompt(self, b64, prompt=None):
  121. return [
  122. {
  123. "role": "user",
  124. "content": self._image_prompt(prompt if prompt else vision_llm_describe_prompt(), b64)
  125. }
  126. ]
  127. class GptV4(Base):
  128. _FACTORY_NAME = "OpenAI"
  129. def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
  130. if not base_url:
  131. base_url = "https://api.openai.com/v1"
  132. self.client = OpenAI(api_key=key, base_url=base_url)
  133. self.model_name = model_name
  134. self.lang = lang
  135. super().__init__(**kwargs)
  136. def describe(self, image):
  137. b64 = self.image2base64(image)
  138. res = self.client.chat.completions.create(
  139. model=self.model_name,
  140. messages=self.prompt(b64),
  141. )
  142. return res.choices[0].message.content.strip(), res.usage.total_tokens
  143. def describe_with_prompt(self, image, prompt=None):
  144. b64 = self.image2base64(image)
  145. res = self.client.chat.completions.create(
  146. model=self.model_name,
  147. messages=self.vision_llm_prompt(b64, prompt),
  148. )
  149. return res.choices[0].message.content.strip(), res.usage.total_tokens
  150. class AzureGptV4(GptV4):
  151. _FACTORY_NAME = "Azure-OpenAI"
  152. def __init__(self, key, model_name, lang="Chinese", **kwargs):
  153. api_key = json.loads(key).get("api_key", "")
  154. api_version = json.loads(key).get("api_version", "2024-02-01")
  155. self.client = AzureOpenAI(api_key=api_key, azure_endpoint=kwargs["base_url"], api_version=api_version)
  156. self.model_name = model_name
  157. self.lang = lang
  158. Base.__init__(self, **kwargs)
  159. class xAICV(GptV4):
  160. _FACTORY_NAME = "xAI"
  161. def __init__(self, key, model_name="grok-3", lang="Chinese", base_url=None, **kwargs):
  162. if not base_url:
  163. base_url = "https://api.x.ai/v1"
  164. super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
  165. class QWenCV(GptV4):
  166. _FACTORY_NAME = "Tongyi-Qianwen"
  167. def __init__(self, key, model_name="qwen-vl-chat-v1", lang="Chinese", base_url=None, **kwargs):
  168. if not base_url:
  169. base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
  170. super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
  171. class HunyuanCV(GptV4):
  172. _FACTORY_NAME = "Tencent Hunyuan"
  173. def __init__(self, key, model_name, lang="Chinese", base_url=None, **kwargs):
  174. if not base_url:
  175. base_url = "https://api.hunyuan.cloud.tencent.com/v1"
  176. super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
  177. class Zhipu4V(GptV4):
  178. _FACTORY_NAME = "ZHIPU-AI"
  179. def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
  180. self.client = ZhipuAI(api_key=key)
  181. self.model_name = model_name
  182. self.lang = lang
  183. Base.__init__(self, **kwargs)
  184. class StepFunCV(GptV4):
  185. _FACTORY_NAME = "StepFun"
  186. def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1", **kwargs):
  187. if not base_url:
  188. base_url = "https://api.stepfun.com/v1"
  189. self.client = OpenAI(api_key=key, base_url=base_url)
  190. self.model_name = model_name
  191. self.lang = lang
  192. Base.__init__(self, **kwargs)
  193. class LmStudioCV(GptV4):
  194. _FACTORY_NAME = "LM-Studio"
  195. def __init__(self, key, model_name, lang="Chinese", base_url="", **kwargs):
  196. if not base_url:
  197. raise ValueError("Local llm url cannot be None")
  198. base_url = urljoin(base_url, "v1")
  199. self.client = OpenAI(api_key="lm-studio", base_url=base_url)
  200. self.model_name = model_name
  201. self.lang = lang
  202. Base.__init__(self, **kwargs)
  203. class OpenAI_APICV(GptV4):
  204. _FACTORY_NAME = ["VLLM", "OpenAI-API-Compatible"]
  205. def __init__(self, key, model_name, lang="Chinese", base_url="", **kwargs):
  206. if not base_url:
  207. raise ValueError("url cannot be None")
  208. base_url = urljoin(base_url, "v1")
  209. self.client = OpenAI(api_key=key, base_url=base_url)
  210. self.model_name = model_name.split("___")[0]
  211. self.lang = lang
  212. Base.__init__(self, **kwargs)
  213. class TogetherAICV(GptV4):
  214. _FACTORY_NAME = "TogetherAI"
  215. def __init__(self, key, model_name, lang="Chinese", base_url="https://api.together.xyz/v1", **kwargs):
  216. if not base_url:
  217. base_url = "https://api.together.xyz/v1"
  218. super().__init__(key, model_name, lang, base_url, **kwargs)
  219. class YiCV(GptV4):
  220. _FACTORY_NAME = "01.AI"
  221. def __init__(
  222. self,
  223. key,
  224. model_name,
  225. lang="Chinese",
  226. base_url="https://api.lingyiwanwu.com/v1", **kwargs
  227. ):
  228. if not base_url:
  229. base_url = "https://api.lingyiwanwu.com/v1"
  230. super().__init__(key, model_name, lang, base_url, **kwargs)
  231. class SILICONFLOWCV(GptV4):
  232. _FACTORY_NAME = "SILICONFLOW"
  233. def __init__(
  234. self,
  235. key,
  236. model_name,
  237. lang="Chinese",
  238. base_url="https://api.siliconflow.cn/v1", **kwargs
  239. ):
  240. if not base_url:
  241. base_url = "https://api.siliconflow.cn/v1"
  242. super().__init__(key, model_name, lang, base_url, **kwargs)
  243. class OpenRouterCV(GptV4):
  244. _FACTORY_NAME = "OpenRouter"
  245. def __init__(
  246. self,
  247. key,
  248. model_name,
  249. lang="Chinese",
  250. base_url="https://openrouter.ai/api/v1", **kwargs
  251. ):
  252. if not base_url:
  253. base_url = "https://openrouter.ai/api/v1"
  254. self.client = OpenAI(api_key=key, base_url=base_url)
  255. self.model_name = model_name
  256. self.lang = lang
  257. Base.__init__(self, **kwargs)
  258. class LocalAICV(GptV4):
  259. _FACTORY_NAME = "LocalAI"
  260. def __init__(self, key, model_name, base_url, lang="Chinese", **kwargs):
  261. if not base_url:
  262. raise ValueError("Local cv model url cannot be None")
  263. base_url = urljoin(base_url, "v1")
  264. self.client = OpenAI(api_key="empty", base_url=base_url)
  265. self.model_name = model_name.split("___")[0]
  266. self.lang = lang
  267. Base.__init__(self, **kwargs)
  268. class XinferenceCV(GptV4):
  269. _FACTORY_NAME = "Xinference"
  270. def __init__(self, key, model_name="", lang="Chinese", base_url="", **kwargs):
  271. base_url = urljoin(base_url, "v1")
  272. self.client = OpenAI(api_key=key, base_url=base_url)
  273. self.model_name = model_name
  274. self.lang = lang
  275. Base.__init__(self, **kwargs)
  276. class GPUStackCV(GptV4):
  277. _FACTORY_NAME = "GPUStack"
  278. def __init__(self, key, model_name, lang="Chinese", base_url="", **kwargs):
  279. if not base_url:
  280. raise ValueError("Local llm url cannot be None")
  281. base_url = urljoin(base_url, "v1")
  282. self.client = OpenAI(api_key=key, base_url=base_url)
  283. self.model_name = model_name
  284. self.lang = lang
  285. Base.__init__(self, **kwargs)
  286. class LocalCV(Base):
  287. _FACTORY_NAME = "Moonshot"
  288. def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
  289. pass
  290. def describe(self, image):
  291. return "", 0
  292. class OllamaCV(Base):
  293. _FACTORY_NAME = "Ollama"
  294. def __init__(self, key, model_name, lang="Chinese", **kwargs):
  295. from ollama import Client
  296. self.client = Client(host=kwargs["base_url"])
  297. self.model_name = model_name
  298. self.lang = lang
  299. self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1)))
  300. Base.__init__(self, **kwargs)
  301. def _clean_conf(self, gen_conf):
  302. options = {}
  303. if "temperature" in gen_conf:
  304. options["temperature"] = gen_conf["temperature"]
  305. if "top_p" in gen_conf:
  306. options["top_k"] = gen_conf["top_p"]
  307. if "presence_penalty" in gen_conf:
  308. options["presence_penalty"] = gen_conf["presence_penalty"]
  309. if "frequency_penalty" in gen_conf:
  310. options["frequency_penalty"] = gen_conf["frequency_penalty"]
  311. return options
  312. def _form_history(self, system, history, images=[]):
  313. hist = deepcopy(history)
  314. if system and hist[0]["role"] == "user":
  315. hist.insert(0, {"role": "system", "content": system})
  316. if not images:
  317. return hist
  318. for his in hist:
  319. if his["role"] == "user":
  320. his["images"] = images
  321. break
  322. return hist
  323. def describe(self, image):
  324. prompt = self.prompt("")
  325. try:
  326. response = self.client.generate(
  327. model=self.model_name,
  328. prompt=prompt[0]["content"][0]["text"],
  329. images=[image],
  330. )
  331. ans = response["response"].strip()
  332. return ans, 128
  333. except Exception as e:
  334. return "**ERROR**: " + str(e), 0
  335. def describe_with_prompt(self, image, prompt=None):
  336. vision_prompt = self.vision_llm_prompt("", prompt) if prompt else self.vision_llm_prompt("")
  337. try:
  338. response = self.client.generate(
  339. model=self.model_name,
  340. prompt=vision_prompt[0]["content"][0]["text"],
  341. images=[image],
  342. )
  343. ans = response["response"].strip()
  344. return ans, 128
  345. except Exception as e:
  346. return "**ERROR**: " + str(e), 0
  347. def chat(self, system, history, gen_conf, images=[]):
  348. try:
  349. response = self.client.chat(
  350. model=self.model_name,
  351. messages=self._form_history(system, history, images),
  352. options=self._clean_conf(gen_conf),
  353. keep_alive=self.keep_alive
  354. )
  355. ans = response["message"]["content"].strip()
  356. return ans, response["eval_count"] + response.get("prompt_eval_count", 0)
  357. except Exception as e:
  358. return "**ERROR**: " + str(e), 0
  359. def chat_streamly(self, system, history, gen_conf, images=[]):
  360. ans = ""
  361. try:
  362. response = self.client.chat(
  363. model=self.model_name,
  364. messages=self._form_history(system, history, images),
  365. stream=True,
  366. options=self._clean_conf(gen_conf),
  367. keep_alive=self.keep_alive
  368. )
  369. for resp in response:
  370. if resp["done"]:
  371. yield resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)
  372. ans = resp["message"]["content"]
  373. yield ans
  374. except Exception as e:
  375. yield ans + "\n**ERROR**: " + str(e)
  376. yield 0
  377. class GeminiCV(Base):
  378. _FACTORY_NAME = "Gemini"
  379. def __init__(self, key, model_name="gemini-1.0-pro-vision-latest", lang="Chinese", **kwargs):
  380. from google.generativeai import GenerativeModel, client
  381. client.configure(api_key=key)
  382. _client = client.get_default_generative_client()
  383. self.model_name = model_name
  384. self.model = GenerativeModel(model_name=self.model_name)
  385. self.model._client = _client
  386. self.lang = lang
  387. Base.__init__(self, **kwargs)
  388. def _form_history(self, system, history, images=[]):
  389. hist = []
  390. if system:
  391. hist.append({"role": "user", "parts": [system, history[0]["content"]]})
  392. for img in images:
  393. hist[0]["parts"].append(("data:image/jpeg;base64," + img) if img[:4]!="data" else img)
  394. for h in history[1:]:
  395. hist.append({"role": "user" if h["role"]=="user" else "model", "parts": [h["content"]]})
  396. return hist
  397. def describe(self, image):
  398. from PIL.Image import open
  399. prompt = (
  400. "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。"
  401. if self.lang.lower() == "chinese"
  402. else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
  403. )
  404. b64 = self.image2base64(image)
  405. img = open(BytesIO(base64.b64decode(b64)))
  406. input = [prompt, img]
  407. res = self.model.generate_content(input)
  408. img.close()
  409. return res.text, res.usage_metadata.total_token_count
  410. def describe_with_prompt(self, image, prompt=None):
  411. from PIL.Image import open
  412. b64 = self.image2base64(image)
  413. vision_prompt = prompt if prompt else vision_llm_describe_prompt()
  414. img = open(BytesIO(base64.b64decode(b64)))
  415. input = [vision_prompt, img]
  416. res = self.model.generate_content(
  417. input,
  418. )
  419. img.close()
  420. return res.text, res.usage_metadata.total_token_count
  421. def chat(self, system, history, gen_conf, images=[]):
  422. from transformers import GenerationConfig
  423. try:
  424. response = self.model.generate_content(
  425. self._form_history(system, history, images),
  426. generation_config=GenerationConfig(temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7)))
  427. ans = response.text
  428. return ans, response.usage_metadata.total_token_count
  429. except Exception as e:
  430. return "**ERROR**: " + str(e), 0
  431. def chat_streamly(self, system, history, gen_conf, images=[]):
  432. from transformers import GenerationConfig
  433. ans = ""
  434. try:
  435. response = self.model.generate_content(
  436. self._form_history(system, history, images),
  437. generation_config=GenerationConfig(temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7)),
  438. stream=True,
  439. )
  440. for resp in response:
  441. if not resp.text:
  442. continue
  443. ans = resp.text
  444. yield ans
  445. except Exception as e:
  446. yield ans + "\n**ERROR**: " + str(e)
  447. yield response._chunks[-1].usage_metadata.total_token_count
  448. class NvidiaCV(Base):
  449. _FACTORY_NAME = "NVIDIA"
  450. def __init__(
  451. self,
  452. key,
  453. model_name,
  454. lang="Chinese",
  455. base_url="https://ai.api.nvidia.com/v1/vlm", **kwargs
  456. ):
  457. if not base_url:
  458. base_url = ("https://ai.api.nvidia.com/v1/vlm",)
  459. self.lang = lang
  460. factory, llm_name = model_name.split("/")
  461. if factory != "liuhaotian":
  462. self.base_url = urljoin(base_url, f"{factory}/{llm_name}")
  463. else:
  464. self.base_url = urljoin(f"{base_url}/community", llm_name.replace("-v1.6", "16"))
  465. self.key = key
  466. Base.__init__(self, **kwargs)
  467. def _image_prompt(self, text, images):
  468. if not images:
  469. return text
  470. htmls = ""
  471. for img in images:
  472. htmls += ' <img src="{}"/>'.format(f"data:image/jpeg;base64,{img}" if img[:4] != "data" else img)
  473. return text + htmls
  474. def describe(self, image):
  475. b64 = self.image2base64(image)
  476. response = requests.post(
  477. url=self.base_url,
  478. headers={
  479. "accept": "application/json",
  480. "content-type": "application/json",
  481. "Authorization": f"Bearer {self.key}",
  482. },
  483. json={"messages": self.prompt(b64)},
  484. )
  485. response = response.json()
  486. return (
  487. response["choices"][0]["message"]["content"].strip(),
  488. response["usage"]["total_tokens"],
  489. )
  490. def _request(self, msg, gen_conf={}):
  491. response = requests.post(
  492. url=self.base_url,
  493. headers={
  494. "accept": "application/json",
  495. "content-type": "application/json",
  496. "Authorization": f"Bearer {self.key}",
  497. },
  498. json={
  499. "messages": msg, **gen_conf
  500. },
  501. )
  502. return response.json()
  503. def describe_with_prompt(self, image, prompt=None):
  504. b64 = self.image2base64(image)
  505. vision_prompt = self.vision_llm_prompt(b64, prompt) if prompt else self.vision_llm_prompt(b64)
  506. response = self._request(vision_prompt)
  507. return (
  508. response["choices"][0]["message"]["content"].strip(),
  509. response["usage"]["total_tokens"],
  510. )
  511. def chat(self, system, history, gen_conf, images=[], **kwargs):
  512. try:
  513. response = self._request(self._form_history(system, history, images), gen_conf)
  514. return (
  515. response["choices"][0]["message"]["content"].strip(),
  516. response["usage"]["total_tokens"],
  517. )
  518. except Exception as e:
  519. return "**ERROR**: " + str(e), 0
  520. def chat_streamly(self, system, history, gen_conf, images=[], **kwargs):
  521. try:
  522. response = self._request(self._form_history(system, history, images), gen_conf)
  523. cnt = response["choices"][0]["message"]["content"]
  524. for resp in cnt:
  525. yield resp
  526. except Exception as e:
  527. yield "\n**ERROR**: " + str(e)
  528. yield response["usage"]["total_tokens"]
  529. class AnthropicCV(Base):
  530. _FACTORY_NAME = "Anthropic"
  531. def __init__(self, key, model_name, base_url=None, **kwargs):
  532. import anthropic
  533. self.client = anthropic.Anthropic(api_key=key)
  534. self.model_name = model_name
  535. self.system = ""
  536. self.max_tokens = 8192
  537. if "haiku" in self.model_name or "opus" in self.model_name:
  538. self.max_tokens = 4096
  539. Base.__init__(self, **kwargs)
  540. def _image_prompt(self, text, images):
  541. if not images:
  542. return text
  543. pmpt = [{"type": "text", "text": text}]
  544. for img in images:
  545. pmpt.append({
  546. "type": "image",
  547. "source": {
  548. "type": "base64",
  549. "media_type": "image/jpeg" if img[:4] != "data" else img.split(":")[1].split(";")[0],
  550. "data": img if img[:4] != "data" else img.split(",")[1]
  551. },
  552. }
  553. )
  554. return pmpt
  555. def describe(self, image):
  556. b64 = self.image2base64(image)
  557. response = self.client.messages.create(model=self.model_name, max_tokens=self.max_tokens, messages=self.prompt(b64))
  558. return response["content"][0]["text"].strip(), response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
  559. def describe_with_prompt(self, image, prompt=None):
  560. b64 = self.image2base64(image)
  561. prompt = self.prompt(b64, prompt if prompt else vision_llm_describe_prompt())
  562. response = self.client.messages.create(model=self.model_name, max_tokens=self.max_tokens, messages=prompt)
  563. return response["content"][0]["text"].strip(), response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
  564. def _clean_conf(self, gen_conf):
  565. if "presence_penalty" in gen_conf:
  566. del gen_conf["presence_penalty"]
  567. if "frequency_penalty" in gen_conf:
  568. del gen_conf["frequency_penalty"]
  569. if "max_token" in gen_conf:
  570. gen_conf["max_tokens"] = self.max_tokens
  571. return gen_conf
  572. def chat(self, system, history, gen_conf, images=[]):
  573. gen_conf = self._clean_conf(gen_conf)
  574. ans = ""
  575. try:
  576. response = self.client.messages.create(
  577. model=self.model_name,
  578. messages=self._form_history(system, history, images),
  579. system=system,
  580. stream=False,
  581. **gen_conf,
  582. ).to_dict()
  583. ans = response["content"][0]["text"]
  584. if response["stop_reason"] == "max_tokens":
  585. ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
  586. return (
  587. ans,
  588. response["usage"]["input_tokens"] + response["usage"]["output_tokens"],
  589. )
  590. except Exception as e:
  591. return ans + "\n**ERROR**: " + str(e), 0
  592. def chat_streamly(self, system, history, gen_conf, images=[]):
  593. gen_conf = self._clean_conf(gen_conf)
  594. total_tokens = 0
  595. try:
  596. response = self.client.messages.create(
  597. model=self.model_name,
  598. messages=self._form_history(system, history, images),
  599. system=system,
  600. stream=True,
  601. **gen_conf,
  602. )
  603. think = False
  604. for res in response:
  605. if res.type == "content_block_delta":
  606. if res.delta.type == "thinking_delta" and res.delta.thinking:
  607. if not think:
  608. yield "<think>"
  609. think = True
  610. yield res.delta.thinking
  611. total_tokens += num_tokens_from_string(res.delta.thinking)
  612. elif think:
  613. yield "</think>"
  614. else:
  615. yield res.delta.text
  616. total_tokens += num_tokens_from_string(res.delta.text)
  617. except Exception as e:
  618. yield "\n**ERROR**: " + str(e)
  619. yield total_tokens
  620. class GoogleCV(AnthropicCV, GeminiCV):
  621. _FACTORY_NAME = "Google Cloud"
  622. def __init__(self, key, model_name, lang="Chinese", base_url=None, **kwargs):
  623. import base64
  624. from google.oauth2 import service_account
  625. key = json.loads(key)
  626. access_token = json.loads(base64.b64decode(key.get("google_service_account_key", "")))
  627. project_id = key.get("google_project_id", "")
  628. region = key.get("google_region", "")
  629. scopes = ["https://www.googleapis.com/auth/cloud-platform"]
  630. self.model_name = model_name
  631. self.lang = lang
  632. if "claude" in self.model_name:
  633. from anthropic import AnthropicVertex
  634. from google.auth.transport.requests import Request
  635. if access_token:
  636. credits = service_account.Credentials.from_service_account_info(access_token, scopes=scopes)
  637. request = Request()
  638. credits.refresh(request)
  639. token = credits.token
  640. self.client = AnthropicVertex(region=region, project_id=project_id, access_token=token)
  641. else:
  642. self.client = AnthropicVertex(region=region, project_id=project_id)
  643. else:
  644. import vertexai.generative_models as glm
  645. from google.cloud import aiplatform
  646. if access_token:
  647. credits = service_account.Credentials.from_service_account_info(access_token)
  648. aiplatform.init(credentials=credits, project=project_id, location=region)
  649. else:
  650. aiplatform.init(project=project_id, location=region)
  651. self.client = glm.GenerativeModel(model_name=self.model_name)
  652. Base.__init__(self, **kwargs)
  653. def describe(self, image):
  654. if "claude" in self.model_name:
  655. return AnthropicCV.describe(self, image)
  656. else:
  657. return GeminiCV.describe(self, image)
  658. def describe_with_prompt(self, image, prompt=None):
  659. if "claude" in self.model_name:
  660. return AnthropicCV.describe_with_prompt(self, image, prompt)
  661. else:
  662. return GeminiCV.describe_with_prompt(self, image, prompt)
  663. def chat(self, system, history, gen_conf, images=[]):
  664. if "claude" in self.model_name:
  665. return AnthropicCV.chat(self, system, history, gen_conf, images)
  666. else:
  667. return GeminiCV.chat(self, system, history, gen_conf, images)
  668. def chat_streamly(self, system, history, gen_conf, images=[]):
  669. if "claude" in self.model_name:
  670. for ans in AnthropicCV.chat_streamly(self, system, history, gen_conf, images):
  671. yield ans
  672. else:
  673. for ans in GeminiCV.chat_streamly(self, system, history, gen_conf, images):
  674. yield ans