Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import base64
  17. import json
  18. import os
  19. from abc import ABC
  20. from copy import deepcopy
  21. from io import BytesIO
  22. from urllib.parse import urljoin
  23. import requests
  24. from openai import OpenAI
  25. from openai.lib.azure import AzureOpenAI
  26. from zhipuai import ZhipuAI
  27. from rag.nlp import is_english
  28. from rag.prompts import vision_llm_describe_prompt
  29. from rag.utils import num_tokens_from_string
  30. class Base(ABC):
  31. def __init__(self, **kwargs):
  32. # Configure retry parameters
  33. self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5)))
  34. self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))
  35. self.max_rounds = kwargs.get("max_rounds", 5)
  36. self.is_tools = False
  37. self.tools = []
  38. self.toolcall_sessions = {}
  39. def describe(self, image):
  40. raise NotImplementedError("Please implement encode method!")
  41. def describe_with_prompt(self, image, prompt=None):
  42. raise NotImplementedError("Please implement encode method!")
  43. def _form_history(self, system, history, images=[]):
  44. hist = []
  45. if system:
  46. hist.append({"role": "system", "content": system})
  47. for h in history:
  48. if images and h["role"] == "user":
  49. h["content"] = self._image_prompt(h["content"], images)
  50. images = []
  51. hist.append(h)
  52. return hist
  53. def _image_prompt(self, text, images):
  54. if not images:
  55. return text
  56. if isinstance(images, str):
  57. images = [images]
  58. pmpt = [{"type": "text", "text": text}]
  59. for img in images:
  60. pmpt.append({
  61. "type": "image_url",
  62. "image_url": {
  63. "url": f"data:image/jpeg;base64,{img}" if img[:4] != "data" else img
  64. }
  65. })
  66. return pmpt
  67. def chat(self, system, history, gen_conf, images=[], **kwargs):
  68. try:
  69. response = self.client.chat.completions.create(
  70. model=self.model_name,
  71. messages=self._form_history(system, history, images)
  72. )
  73. return response.choices[0].message.content.strip(), response.usage.total_tokens
  74. except Exception as e:
  75. return "**ERROR**: " + str(e), 0
  76. def chat_streamly(self, system, history, gen_conf, images=[], **kwargs):
  77. ans = ""
  78. tk_count = 0
  79. try:
  80. response = self.client.chat.completions.create(
  81. model=self.model_name,
  82. messages=self._form_history(system, history, images),
  83. stream=True
  84. )
  85. for resp in response:
  86. if not resp.choices[0].delta.content:
  87. continue
  88. delta = resp.choices[0].delta.content
  89. ans = delta
  90. if resp.choices[0].finish_reason == "length":
  91. ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
  92. if resp.choices[0].finish_reason == "stop":
  93. tk_count += resp.usage.total_tokens
  94. yield ans
  95. except Exception as e:
  96. yield ans + "\n**ERROR**: " + str(e)
  97. yield tk_count
  98. @staticmethod
  99. def image2base64(image):
  100. if isinstance(image, bytes):
  101. return base64.b64encode(image).decode("utf-8")
  102. if isinstance(image, BytesIO):
  103. return base64.b64encode(image.getvalue()).decode("utf-8")
  104. buffered = BytesIO()
  105. try:
  106. image.save(buffered, format="JPEG")
  107. except Exception:
  108. image.save(buffered, format="PNG")
  109. return base64.b64encode(buffered.getvalue()).decode("utf-8")
  110. def prompt(self, b64):
  111. return [
  112. {
  113. "role": "user",
  114. "content": self._image_prompt(
  115. "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。"
  116. if self.lang.lower() == "chinese"
  117. else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
  118. b64
  119. )
  120. }
  121. ]
  122. def vision_llm_prompt(self, b64, prompt=None):
  123. return [
  124. {
  125. "role": "user",
  126. "content": self._image_prompt(prompt if prompt else vision_llm_describe_prompt(), b64)
  127. }
  128. ]
  129. class GptV4(Base):
  130. _FACTORY_NAME = "OpenAI"
  131. def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
  132. if not base_url:
  133. base_url = "https://api.openai.com/v1"
  134. self.client = OpenAI(api_key=key, base_url=base_url)
  135. self.model_name = model_name
  136. self.lang = lang
  137. super().__init__(**kwargs)
  138. def describe(self, image):
  139. b64 = self.image2base64(image)
  140. res = self.client.chat.completions.create(
  141. model=self.model_name,
  142. messages=self.prompt(b64),
  143. )
  144. return res.choices[0].message.content.strip(), res.usage.total_tokens
  145. def describe_with_prompt(self, image, prompt=None):
  146. b64 = self.image2base64(image)
  147. res = self.client.chat.completions.create(
  148. model=self.model_name,
  149. messages=self.vision_llm_prompt(b64, prompt),
  150. )
  151. return res.choices[0].message.content.strip(), res.usage.total_tokens
  152. class AzureGptV4(GptV4):
  153. _FACTORY_NAME = "Azure-OpenAI"
  154. def __init__(self, key, model_name, lang="Chinese", **kwargs):
  155. api_key = json.loads(key).get("api_key", "")
  156. api_version = json.loads(key).get("api_version", "2024-02-01")
  157. self.client = AzureOpenAI(api_key=api_key, azure_endpoint=kwargs["base_url"], api_version=api_version)
  158. self.model_name = model_name
  159. self.lang = lang
  160. Base.__init__(self, **kwargs)
  161. class xAICV(GptV4):
  162. _FACTORY_NAME = "xAI"
  163. def __init__(self, key, model_name="grok-3", lang="Chinese", base_url=None, **kwargs):
  164. if not base_url:
  165. base_url = "https://api.x.ai/v1"
  166. super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
  167. class QWenCV(GptV4):
  168. _FACTORY_NAME = "Tongyi-Qianwen"
  169. def __init__(self, key, model_name="qwen-vl-chat-v1", lang="Chinese", base_url=None, **kwargs):
  170. if not base_url:
  171. base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
  172. super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
  173. class HunyuanCV(GptV4):
  174. _FACTORY_NAME = "Tencent Hunyuan"
  175. def __init__(self, key, model_name, lang="Chinese", base_url=None, **kwargs):
  176. if not base_url:
  177. base_url = "https://api.hunyuan.cloud.tencent.com/v1"
  178. super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
  179. class Zhipu4V(GptV4):
  180. _FACTORY_NAME = "ZHIPU-AI"
  181. def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
  182. self.client = ZhipuAI(api_key=key)
  183. self.model_name = model_name
  184. self.lang = lang
  185. Base.__init__(self, **kwargs)
  186. class StepFunCV(GptV4):
  187. _FACTORY_NAME = "StepFun"
  188. def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1", **kwargs):
  189. if not base_url:
  190. base_url = "https://api.stepfun.com/v1"
  191. self.client = OpenAI(api_key=key, base_url=base_url)
  192. self.model_name = model_name
  193. self.lang = lang
  194. Base.__init__(self, **kwargs)
  195. class LmStudioCV(GptV4):
  196. _FACTORY_NAME = "LM-Studio"
  197. def __init__(self, key, model_name, lang="Chinese", base_url="", **kwargs):
  198. if not base_url:
  199. raise ValueError("Local llm url cannot be None")
  200. base_url = urljoin(base_url, "v1")
  201. self.client = OpenAI(api_key="lm-studio", base_url=base_url)
  202. self.model_name = model_name
  203. self.lang = lang
  204. Base.__init__(self, **kwargs)
  205. class OpenAI_APICV(GptV4):
  206. _FACTORY_NAME = ["VLLM", "OpenAI-API-Compatible"]
  207. def __init__(self, key, model_name, lang="Chinese", base_url="", **kwargs):
  208. if not base_url:
  209. raise ValueError("url cannot be None")
  210. base_url = urljoin(base_url, "v1")
  211. self.client = OpenAI(api_key=key, base_url=base_url)
  212. self.model_name = model_name.split("___")[0]
  213. self.lang = lang
  214. Base.__init__(self, **kwargs)
  215. class TogetherAICV(GptV4):
  216. _FACTORY_NAME = "TogetherAI"
  217. def __init__(self, key, model_name, lang="Chinese", base_url="https://api.together.xyz/v1", **kwargs):
  218. if not base_url:
  219. base_url = "https://api.together.xyz/v1"
  220. super().__init__(key, model_name, lang, base_url, **kwargs)
  221. class YiCV(GptV4):
  222. _FACTORY_NAME = "01.AI"
  223. def __init__(
  224. self,
  225. key,
  226. model_name,
  227. lang="Chinese",
  228. base_url="https://api.lingyiwanwu.com/v1", **kwargs
  229. ):
  230. if not base_url:
  231. base_url = "https://api.lingyiwanwu.com/v1"
  232. super().__init__(key, model_name, lang, base_url, **kwargs)
  233. class SILICONFLOWCV(GptV4):
  234. _FACTORY_NAME = "SILICONFLOW"
  235. def __init__(
  236. self,
  237. key,
  238. model_name,
  239. lang="Chinese",
  240. base_url="https://api.siliconflow.cn/v1", **kwargs
  241. ):
  242. if not base_url:
  243. base_url = "https://api.siliconflow.cn/v1"
  244. super().__init__(key, model_name, lang, base_url, **kwargs)
  245. class OpenRouterCV(GptV4):
  246. _FACTORY_NAME = "OpenRouter"
  247. def __init__(
  248. self,
  249. key,
  250. model_name,
  251. lang="Chinese",
  252. base_url="https://openrouter.ai/api/v1", **kwargs
  253. ):
  254. if not base_url:
  255. base_url = "https://openrouter.ai/api/v1"
  256. self.client = OpenAI(api_key=key, base_url=base_url)
  257. self.model_name = model_name
  258. self.lang = lang
  259. Base.__init__(self, **kwargs)
  260. class LocalAICV(GptV4):
  261. _FACTORY_NAME = "LocalAI"
  262. def __init__(self, key, model_name, base_url, lang="Chinese", **kwargs):
  263. if not base_url:
  264. raise ValueError("Local cv model url cannot be None")
  265. base_url = urljoin(base_url, "v1")
  266. self.client = OpenAI(api_key="empty", base_url=base_url)
  267. self.model_name = model_name.split("___")[0]
  268. self.lang = lang
  269. Base.__init__(self, **kwargs)
  270. class XinferenceCV(GptV4):
  271. _FACTORY_NAME = "Xinference"
  272. def __init__(self, key, model_name="", lang="Chinese", base_url="", **kwargs):
  273. base_url = urljoin(base_url, "v1")
  274. self.client = OpenAI(api_key=key, base_url=base_url)
  275. self.model_name = model_name
  276. self.lang = lang
  277. Base.__init__(self, **kwargs)
  278. class GPUStackCV(GptV4):
  279. _FACTORY_NAME = "GPUStack"
  280. def __init__(self, key, model_name, lang="Chinese", base_url="", **kwargs):
  281. if not base_url:
  282. raise ValueError("Local llm url cannot be None")
  283. base_url = urljoin(base_url, "v1")
  284. self.client = OpenAI(api_key=key, base_url=base_url)
  285. self.model_name = model_name
  286. self.lang = lang
  287. Base.__init__(self, **kwargs)
  288. class LocalCV(Base):
  289. _FACTORY_NAME = "Moonshot"
  290. def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
  291. pass
  292. def describe(self, image):
  293. return "", 0
  294. class OllamaCV(Base):
  295. _FACTORY_NAME = "Ollama"
  296. def __init__(self, key, model_name, lang="Chinese", **kwargs):
  297. from ollama import Client
  298. self.client = Client(host=kwargs["base_url"])
  299. self.model_name = model_name
  300. self.lang = lang
  301. self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1)))
  302. Base.__init__(self, **kwargs)
  303. def _clean_conf(self, gen_conf):
  304. options = {}
  305. if "temperature" in gen_conf:
  306. options["temperature"] = gen_conf["temperature"]
  307. if "top_p" in gen_conf:
  308. options["top_k"] = gen_conf["top_p"]
  309. if "presence_penalty" in gen_conf:
  310. options["presence_penalty"] = gen_conf["presence_penalty"]
  311. if "frequency_penalty" in gen_conf:
  312. options["frequency_penalty"] = gen_conf["frequency_penalty"]
  313. return options
  314. def _form_history(self, system, history, images=[]):
  315. hist = deepcopy(history)
  316. if system and hist[0]["role"] == "user":
  317. hist.insert(0, {"role": "system", "content": system})
  318. if not images:
  319. return hist
  320. for his in hist:
  321. if his["role"] == "user":
  322. his["images"] = images
  323. break
  324. return hist
  325. def describe(self, image):
  326. prompt = self.prompt("")
  327. try:
  328. response = self.client.generate(
  329. model=self.model_name,
  330. prompt=prompt[0]["content"][0]["text"],
  331. images=[image],
  332. )
  333. ans = response["response"].strip()
  334. return ans, 128
  335. except Exception as e:
  336. return "**ERROR**: " + str(e), 0
  337. def describe_with_prompt(self, image, prompt=None):
  338. vision_prompt = self.vision_llm_prompt("", prompt) if prompt else self.vision_llm_prompt("")
  339. try:
  340. response = self.client.generate(
  341. model=self.model_name,
  342. prompt=vision_prompt[0]["content"][0]["text"],
  343. images=[image],
  344. )
  345. ans = response["response"].strip()
  346. return ans, 128
  347. except Exception as e:
  348. return "**ERROR**: " + str(e), 0
  349. def chat(self, system, history, gen_conf, images=[]):
  350. try:
  351. response = self.client.chat(
  352. model=self.model_name,
  353. messages=self._form_history(system, history, images),
  354. options=self._clean_conf(gen_conf),
  355. keep_alive=self.keep_alive
  356. )
  357. ans = response["message"]["content"].strip()
  358. return ans, response["eval_count"] + response.get("prompt_eval_count", 0)
  359. except Exception as e:
  360. return "**ERROR**: " + str(e), 0
  361. def chat_streamly(self, system, history, gen_conf, images=[]):
  362. ans = ""
  363. try:
  364. response = self.client.chat(
  365. model=self.model_name,
  366. messages=self._form_history(system, history, images),
  367. stream=True,
  368. options=self._clean_conf(gen_conf),
  369. keep_alive=self.keep_alive
  370. )
  371. for resp in response:
  372. if resp["done"]:
  373. yield resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)
  374. ans = resp["message"]["content"]
  375. yield ans
  376. except Exception as e:
  377. yield ans + "\n**ERROR**: " + str(e)
  378. yield 0
  379. class GeminiCV(Base):
  380. _FACTORY_NAME = "Gemini"
  381. def __init__(self, key, model_name="gemini-1.0-pro-vision-latest", lang="Chinese", **kwargs):
  382. from google.generativeai import GenerativeModel, client
  383. client.configure(api_key=key)
  384. _client = client.get_default_generative_client()
  385. self.model_name = model_name
  386. self.model = GenerativeModel(model_name=self.model_name)
  387. self.model._client = _client
  388. self.lang = lang
  389. Base.__init__(self, **kwargs)
  390. def _form_history(self, system, history, images=[]):
  391. hist = []
  392. if system:
  393. hist.append({"role": "user", "parts": [system, history[0]["content"]]})
  394. for img in images:
  395. hist[0]["parts"].append(("data:image/jpeg;base64," + img) if img[:4]!="data" else img)
  396. for h in history[1:]:
  397. hist.append({"role": "user" if h["role"]=="user" else "model", "parts": [h["content"]]})
  398. return hist
  399. def describe(self, image):
  400. from PIL.Image import open
  401. prompt = (
  402. "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。"
  403. if self.lang.lower() == "chinese"
  404. else "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
  405. )
  406. b64 = self.image2base64(image)
  407. img = open(BytesIO(base64.b64decode(b64)))
  408. input = [prompt, img]
  409. res = self.model.generate_content(input)
  410. img.close()
  411. return res.text, res.usage_metadata.total_token_count
  412. def describe_with_prompt(self, image, prompt=None):
  413. from PIL.Image import open
  414. b64 = self.image2base64(image)
  415. vision_prompt = prompt if prompt else vision_llm_describe_prompt()
  416. img = open(BytesIO(base64.b64decode(b64)))
  417. input = [vision_prompt, img]
  418. res = self.model.generate_content(
  419. input,
  420. )
  421. img.close()
  422. return res.text, res.usage_metadata.total_token_count
  423. def chat(self, system, history, gen_conf, images=[]):
  424. from transformers import GenerationConfig
  425. try:
  426. response = self.model.generate_content(
  427. self._form_history(system, history, images),
  428. generation_config=GenerationConfig(temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7)))
  429. ans = response.text
  430. return ans, response.usage_metadata.total_token_count
  431. except Exception as e:
  432. return "**ERROR**: " + str(e), 0
  433. def chat_streamly(self, system, history, gen_conf, images=[]):
  434. from transformers import GenerationConfig
  435. ans = ""
  436. try:
  437. response = self.model.generate_content(
  438. self._form_history(system, history, images),
  439. generation_config=GenerationConfig(temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7)),
  440. stream=True,
  441. )
  442. for resp in response:
  443. if not resp.text:
  444. continue
  445. ans = resp.text
  446. yield ans
  447. except Exception as e:
  448. yield ans + "\n**ERROR**: " + str(e)
  449. yield response._chunks[-1].usage_metadata.total_token_count
  450. class NvidiaCV(Base):
  451. _FACTORY_NAME = "NVIDIA"
  452. def __init__(
  453. self,
  454. key,
  455. model_name,
  456. lang="Chinese",
  457. base_url="https://ai.api.nvidia.com/v1/vlm", **kwargs
  458. ):
  459. if not base_url:
  460. base_url = ("https://ai.api.nvidia.com/v1/vlm",)
  461. self.lang = lang
  462. factory, llm_name = model_name.split("/")
  463. if factory != "liuhaotian":
  464. self.base_url = urljoin(base_url, f"{factory}/{llm_name}")
  465. else:
  466. self.base_url = urljoin(f"{base_url}/community", llm_name.replace("-v1.6", "16"))
  467. self.key = key
  468. Base.__init__(self, **kwargs)
  469. def _image_prompt(self, text, images):
  470. if not images:
  471. return text
  472. htmls = ""
  473. for img in images:
  474. htmls += ' <img src="{}"/>'.format(f"data:image/jpeg;base64,{img}" if img[:4] != "data" else img)
  475. return text + htmls
  476. def describe(self, image):
  477. b64 = self.image2base64(image)
  478. response = requests.post(
  479. url=self.base_url,
  480. headers={
  481. "accept": "application/json",
  482. "content-type": "application/json",
  483. "Authorization": f"Bearer {self.key}",
  484. },
  485. json={"messages": self.prompt(b64)},
  486. )
  487. response = response.json()
  488. return (
  489. response["choices"][0]["message"]["content"].strip(),
  490. response["usage"]["total_tokens"],
  491. )
  492. def _request(self, msg, gen_conf={}):
  493. response = requests.post(
  494. url=self.base_url,
  495. headers={
  496. "accept": "application/json",
  497. "content-type": "application/json",
  498. "Authorization": f"Bearer {self.key}",
  499. },
  500. json={
  501. "messages": msg, **gen_conf
  502. },
  503. )
  504. return response.json()
  505. def describe_with_prompt(self, image, prompt=None):
  506. b64 = self.image2base64(image)
  507. vision_prompt = self.vision_llm_prompt(b64, prompt) if prompt else self.vision_llm_prompt(b64)
  508. response = self._request(vision_prompt)
  509. return (
  510. response["choices"][0]["message"]["content"].strip(),
  511. response["usage"]["total_tokens"],
  512. )
  513. def chat(self, system, history, gen_conf, images=[], **kwargs):
  514. try:
  515. response = self._request(self._form_history(system, history, images), gen_conf)
  516. return (
  517. response["choices"][0]["message"]["content"].strip(),
  518. response["usage"]["total_tokens"],
  519. )
  520. except Exception as e:
  521. return "**ERROR**: " + str(e), 0
  522. def chat_streamly(self, system, history, gen_conf, images=[], **kwargs):
  523. try:
  524. response = self._request(self._form_history(system, history, images), gen_conf)
  525. cnt = response["choices"][0]["message"]["content"]
  526. for resp in cnt:
  527. yield resp
  528. except Exception as e:
  529. yield "\n**ERROR**: " + str(e)
  530. yield response["usage"]["total_tokens"]
  531. class AnthropicCV(Base):
  532. _FACTORY_NAME = "Anthropic"
  533. def __init__(self, key, model_name, base_url=None, **kwargs):
  534. import anthropic
  535. self.client = anthropic.Anthropic(api_key=key)
  536. self.model_name = model_name
  537. self.system = ""
  538. self.max_tokens = 8192
  539. if "haiku" in self.model_name or "opus" in self.model_name:
  540. self.max_tokens = 4096
  541. Base.__init__(self, **kwargs)
  542. def _image_prompt(self, text, images):
  543. if not images:
  544. return text
  545. pmpt = [{"type": "text", "text": text}]
  546. for img in images:
  547. pmpt.append({
  548. "type": "image",
  549. "source": {
  550. "type": "base64",
  551. "media_type": "image/jpeg" if img[:4] != "data" else img.split(":")[1].split(";")[0],
  552. "data": img if img[:4] != "data" else img.split(",")[1]
  553. },
  554. }
  555. )
  556. return pmpt
  557. def describe(self, image):
  558. b64 = self.image2base64(image)
  559. response = self.client.messages.create(model=self.model_name, max_tokens=self.max_tokens, messages=self.prompt(b64))
  560. return response["content"][0]["text"].strip(), response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
  561. def describe_with_prompt(self, image, prompt=None):
  562. b64 = self.image2base64(image)
  563. prompt = self.prompt(b64, prompt if prompt else vision_llm_describe_prompt())
  564. response = self.client.messages.create(model=self.model_name, max_tokens=self.max_tokens, messages=prompt)
  565. return response["content"][0]["text"].strip(), response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
  566. def _clean_conf(self, gen_conf):
  567. if "presence_penalty" in gen_conf:
  568. del gen_conf["presence_penalty"]
  569. if "frequency_penalty" in gen_conf:
  570. del gen_conf["frequency_penalty"]
  571. if "max_token" in gen_conf:
  572. gen_conf["max_tokens"] = self.max_tokens
  573. return gen_conf
  574. def chat(self, system, history, gen_conf, images=[]):
  575. gen_conf = self._clean_conf(gen_conf)
  576. ans = ""
  577. try:
  578. response = self.client.messages.create(
  579. model=self.model_name,
  580. messages=self._form_history(system, history, images),
  581. system=system,
  582. stream=False,
  583. **gen_conf,
  584. ).to_dict()
  585. ans = response["content"][0]["text"]
  586. if response["stop_reason"] == "max_tokens":
  587. ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
  588. return (
  589. ans,
  590. response["usage"]["input_tokens"] + response["usage"]["output_tokens"],
  591. )
  592. except Exception as e:
  593. return ans + "\n**ERROR**: " + str(e), 0
  594. def chat_streamly(self, system, history, gen_conf, images=[]):
  595. gen_conf = self._clean_conf(gen_conf)
  596. total_tokens = 0
  597. try:
  598. response = self.client.messages.create(
  599. model=self.model_name,
  600. messages=self._form_history(system, history, images),
  601. system=system,
  602. stream=True,
  603. **gen_conf,
  604. )
  605. think = False
  606. for res in response:
  607. if res.type == "content_block_delta":
  608. if res.delta.type == "thinking_delta" and res.delta.thinking:
  609. if not think:
  610. yield "<think>"
  611. think = True
  612. yield res.delta.thinking
  613. total_tokens += num_tokens_from_string(res.delta.thinking)
  614. elif think:
  615. yield "</think>"
  616. else:
  617. yield res.delta.text
  618. total_tokens += num_tokens_from_string(res.delta.text)
  619. except Exception as e:
  620. yield "\n**ERROR**: " + str(e)
  621. yield total_tokens
  622. class GoogleCV(AnthropicCV, GeminiCV):
  623. _FACTORY_NAME = "Google Cloud"
  624. def __init__(self, key, model_name, lang="Chinese", base_url=None, **kwargs):
  625. import base64
  626. from google.oauth2 import service_account
  627. key = json.loads(key)
  628. access_token = json.loads(base64.b64decode(key.get("google_service_account_key", "")))
  629. project_id = key.get("google_project_id", "")
  630. region = key.get("google_region", "")
  631. scopes = ["https://www.googleapis.com/auth/cloud-platform"]
  632. self.model_name = model_name
  633. self.lang = lang
  634. if "claude" in self.model_name:
  635. from anthropic import AnthropicVertex
  636. from google.auth.transport.requests import Request
  637. if access_token:
  638. credits = service_account.Credentials.from_service_account_info(access_token, scopes=scopes)
  639. request = Request()
  640. credits.refresh(request)
  641. token = credits.token
  642. self.client = AnthropicVertex(region=region, project_id=project_id, access_token=token)
  643. else:
  644. self.client = AnthropicVertex(region=region, project_id=project_id)
  645. else:
  646. import vertexai.generative_models as glm
  647. from google.cloud import aiplatform
  648. if access_token:
  649. credits = service_account.Credentials.from_service_account_info(access_token)
  650. aiplatform.init(credentials=credits, project=project_id, location=region)
  651. else:
  652. aiplatform.init(project=project_id, location=region)
  653. self.client = glm.GenerativeModel(model_name=self.model_name)
  654. Base.__init__(self, **kwargs)
  655. def describe(self, image):
  656. if "claude" in self.model_name:
  657. return AnthropicCV.describe(self, image)
  658. else:
  659. return GeminiCV.describe(self, image)
  660. def describe_with_prompt(self, image, prompt=None):
  661. if "claude" in self.model_name:
  662. return AnthropicCV.describe_with_prompt(self, image, prompt)
  663. else:
  664. return GeminiCV.describe_with_prompt(self, image, prompt)
  665. def chat(self, system, history, gen_conf, images=[]):
  666. if "claude" in self.model_name:
  667. return AnthropicCV.chat(self, system, history, gen_conf, images)
  668. else:
  669. return GeminiCV.chat(self, system, history, gen_conf, images)
  670. def chat_streamly(self, system, history, gen_conf, images=[]):
  671. if "claude" in self.model_name:
  672. for ans in AnthropicCV.chat_streamly(self, system, history, gen_conf, images):
  673. yield ans
  674. else:
  675. for ans in GeminiCV.chat_streamly(self, system, history, gen_conf, images):
  676. yield ans