Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from abc import ABC
  17. from openai import OpenAI
  18. import os
  19. import base64
  20. from io import BytesIO
  21. class Base(ABC):
  22. def __init__(self, key, model_name):
  23. pass
  24. def describe(self, image, max_tokens=300):
  25. raise NotImplementedError("Please implement encode method!")
  26. def image2base64(self, image):
  27. if isinstance(image, bytes):
  28. return base64.b64encode(image).decode("utf-8")
  29. if isinstance(image, BytesIO):
  30. return base64.b64encode(image.getvalue()).decode("utf-8")
  31. buffered = BytesIO()
  32. try:
  33. image.save(buffered, format="JPEG")
  34. except Exception as e:
  35. image.save(buffered, format="PNG")
  36. return base64.b64encode(buffered.getvalue()).decode("utf-8")
  37. def prompt(self, b64):
  38. return [
  39. {
  40. "role": "user",
  41. "content": [
  42. {
  43. "type": "text",
  44. "text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等。",
  45. },
  46. {
  47. "type": "image_url",
  48. "image_url": {
  49. "url": f"data:image/jpeg;base64,{b64}"
  50. },
  51. },
  52. ],
  53. }
  54. ]
  55. class GptV4(Base):
  56. def __init__(self, key, model_name="gpt-4-vision-preview"):
  57. self.client = OpenAI(api_key=key)
  58. self.model_name = model_name
  59. def describe(self, image, max_tokens=300):
  60. b64 = self.image2base64(image)
  61. res = self.client.chat.completions.create(
  62. model=self.model_name,
  63. messages=self.prompt(b64),
  64. max_tokens=max_tokens,
  65. )
  66. return res.choices[0].message.content.strip(), res.usage.total_tokens
  67. class QWenCV(Base):
  68. def __init__(self, key, model_name="qwen-vl-chat-v1"):
  69. import dashscope
  70. dashscope.api_key = key
  71. self.model_name = model_name
  72. def describe(self, image, max_tokens=300):
  73. from http import HTTPStatus
  74. from dashscope import MultiModalConversation
  75. response = MultiModalConversation.call(model=self.model_name,
  76. messages=self.prompt(self.image2base64(image)))
  77. if response.status_code == HTTPStatus.OK:
  78. return response.output.choices[0]['message']['content'], response.usage.output_tokens
  79. return response.message, 0
  80. from zhipuai import ZhipuAI
  81. class Zhipu4V(Base):
  82. def __init__(self, key, model_name="glm-4v"):
  83. self.client = ZhipuAI(api_key=key)
  84. self.model_name = model_name
  85. def describe(self, image, max_tokens=1024):
  86. b64 = self.image2base64(image)
  87. res = self.client.chat.completions.create(
  88. model=self.model_name,
  89. messages=self.prompt(b64),
  90. max_tokens=max_tokens,
  91. )
  92. return res.choices[0].message.content.strip(), res.usage.total_tokens