You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.py 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. import re
  2. from json import dumps as json_dumps
  3. from json import loads as json_loads
  4. from json.decoder import JSONDecodeError
  5. from typing import Any
  6. from flask import request
  7. from requests import get
  8. from yaml import YAMLError, safe_load
  9. from core.tools.entities.common_entities import I18nObject
  10. from core.tools.entities.tool_bundle import ApiToolBundle
  11. from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
  12. from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
  13. class ApiBasedToolSchemaParser:
  14. @staticmethod
  15. def parse_openapi_to_tool_bundle(
  16. openapi: dict, extra_info: dict | None = None, warning: dict | None = None
  17. ) -> list[ApiToolBundle]:
  18. warning = warning if warning is not None else {}
  19. extra_info = extra_info if extra_info is not None else {}
  20. # set description to extra_info
  21. extra_info["description"] = openapi["info"].get("description", "")
  22. if len(openapi["servers"]) == 0:
  23. raise ToolProviderNotFoundError("No server found in the openapi yaml.")
  24. server_url = openapi["servers"][0]["url"]
  25. request_env = request.headers.get("X-Request-Env")
  26. if request_env:
  27. matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
  28. server_url = matched_servers[0] if matched_servers else server_url
  29. # list all interfaces
  30. interfaces = []
  31. for path, path_item in openapi["paths"].items():
  32. methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
  33. for method in methods:
  34. if method in path_item:
  35. interfaces.append(
  36. {
  37. "path": path,
  38. "method": method,
  39. "operation": path_item[method],
  40. }
  41. )
  42. # get all parameters
  43. bundles = []
  44. for interface in interfaces:
  45. # convert parameters
  46. parameters = []
  47. if "parameters" in interface["operation"]:
  48. for i, parameter in enumerate(interface["operation"]["parameters"]):
  49. if "$ref" in parameter:
  50. root = openapi
  51. reference = parameter["$ref"].split("/")[1:]
  52. for ref in reference:
  53. root = root[ref]
  54. interface["operation"]["parameters"][i] = root
  55. for parameter in interface["operation"]["parameters"]:
  56. tool_parameter = ToolParameter(
  57. name=parameter["name"],
  58. label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
  59. human_description=I18nObject(
  60. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  61. ),
  62. type=ToolParameter.ToolParameterType.STRING,
  63. required=parameter.get("required", False),
  64. form=ToolParameter.ToolParameterForm.LLM,
  65. llm_description=parameter.get("description"),
  66. default=parameter["schema"]["default"]
  67. if "schema" in parameter and "default" in parameter["schema"]
  68. else None,
  69. placeholder=I18nObject(
  70. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  71. ),
  72. )
  73. # check if there is a type
  74. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
  75. if typ:
  76. tool_parameter.type = typ
  77. parameters.append(tool_parameter)
  78. # create tool bundle
  79. # check if there is a request body
  80. if "requestBody" in interface["operation"]:
  81. request_body = interface["operation"]["requestBody"]
  82. if "content" in request_body:
  83. for content_type, content in request_body["content"].items():
  84. # if there is a reference, get the reference and overwrite the content
  85. if "schema" not in content:
  86. continue
  87. if "$ref" in content["schema"]:
  88. # get the reference
  89. root = openapi
  90. reference = content["schema"]["$ref"].split("/")[1:]
  91. for ref in reference:
  92. root = root[ref]
  93. # overwrite the content
  94. interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
  95. # handle allOf reference in schema properties
  96. for prop_dict in root.get("properties", {}).values():
  97. for item in prop_dict.get("allOf", []):
  98. if "$ref" in item:
  99. ref_schema = openapi
  100. reference = item["$ref"].split("/")[1:]
  101. for ref in reference:
  102. ref_schema = ref_schema[ref]
  103. else:
  104. ref_schema = item
  105. for key, value in ref_schema.items():
  106. if isinstance(value, list):
  107. if key not in prop_dict:
  108. prop_dict[key] = []
  109. # extends list field
  110. if isinstance(prop_dict[key], list):
  111. prop_dict[key].extend(value)
  112. elif key not in prop_dict:
  113. # add new field
  114. prop_dict[key] = value
  115. if "allOf" in prop_dict:
  116. del prop_dict["allOf"]
  117. # parse body parameters
  118. if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
  119. body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
  120. required = body_schema.get("required", [])
  121. properties = body_schema.get("properties", {})
  122. for name, property in properties.items():
  123. tool = ToolParameter(
  124. name=name,
  125. label=I18nObject(en_US=name, zh_Hans=name),
  126. human_description=I18nObject(
  127. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  128. ),
  129. type=ToolParameter.ToolParameterType.STRING,
  130. required=name in required,
  131. form=ToolParameter.ToolParameterForm.LLM,
  132. llm_description=property.get("description", ""),
  133. default=property.get("default", None),
  134. placeholder=I18nObject(
  135. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  136. ),
  137. )
  138. # check if there is a type
  139. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
  140. if typ:
  141. tool.type = typ
  142. parameters.append(tool)
  143. # check if parameters is duplicated
  144. parameters_count = {}
  145. for parameter in parameters:
  146. if parameter.name not in parameters_count:
  147. parameters_count[parameter.name] = 0
  148. parameters_count[parameter.name] += 1
  149. for name, count in parameters_count.items():
  150. if count > 1:
  151. warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
  152. # check if there is a operation id, use $path_$method as operation id if not
  153. if "operationId" not in interface["operation"]:
  154. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  155. path = interface["path"]
  156. if interface["path"].startswith("/"):
  157. path = interface["path"][1:]
  158. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  159. path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
  160. if not path:
  161. path = "<root>"
  162. interface["operation"]["operationId"] = f"{path}_{interface['method']}"
  163. bundles.append(
  164. ApiToolBundle(
  165. server_url=server_url + interface["path"],
  166. method=interface["method"],
  167. summary=interface["operation"]["description"]
  168. if "description" in interface["operation"]
  169. else interface["operation"].get("summary", None),
  170. operation_id=interface["operation"]["operationId"],
  171. parameters=parameters,
  172. author="",
  173. icon=None,
  174. openapi=interface["operation"],
  175. )
  176. )
  177. return bundles
  178. @staticmethod
  179. def _get_tool_parameter_type(parameter: dict) -> ToolParameter.ToolParameterType | None:
  180. parameter = parameter or {}
  181. typ: str | None = None
  182. if parameter.get("format") == "binary":
  183. return ToolParameter.ToolParameterType.FILE
  184. if "type" in parameter:
  185. typ = parameter["type"]
  186. elif "schema" in parameter and "type" in parameter["schema"]:
  187. typ = parameter["schema"]["type"]
  188. if typ in {"integer", "number"}:
  189. return ToolParameter.ToolParameterType.NUMBER
  190. elif typ == "boolean":
  191. return ToolParameter.ToolParameterType.BOOLEAN
  192. elif typ == "string":
  193. return ToolParameter.ToolParameterType.STRING
  194. elif typ == "array":
  195. items = parameter.get("items") or parameter.get("schema", {}).get("items")
  196. return ToolParameter.ToolParameterType.FILES if items and items.get("format") == "binary" else None
  197. else:
  198. return None
  199. @staticmethod
  200. def parse_openapi_yaml_to_tool_bundle(
  201. yaml: str, extra_info: dict | None = None, warning: dict | None = None
  202. ) -> list[ApiToolBundle]:
  203. """
  204. parse openapi yaml to tool bundle
  205. :param yaml: the yaml string
  206. :param extra_info: the extra info
  207. :param warning: the warning message
  208. :return: the tool bundle
  209. """
  210. warning = warning if warning is not None else {}
  211. extra_info = extra_info if extra_info is not None else {}
  212. openapi: dict = safe_load(yaml)
  213. if openapi is None:
  214. raise ToolApiSchemaError("Invalid openapi yaml.")
  215. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
  216. @staticmethod
  217. def parse_swagger_to_openapi(
  218. swagger: dict, extra_info: dict | None = None, warning: dict | None = None
  219. ) -> dict[str, Any]:
  220. warning = warning or {}
  221. """
  222. parse swagger to openapi
  223. :param swagger: the swagger dict
  224. :return: the openapi dict
  225. """
  226. # convert swagger to openapi
  227. info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
  228. servers = swagger.get("servers", [])
  229. if len(servers) == 0:
  230. raise ToolApiSchemaError("No server found in the swagger yaml.")
  231. converted_openapi: dict[str, Any] = {
  232. "openapi": "3.0.0",
  233. "info": {
  234. "title": info.get("title", "Swagger"),
  235. "description": info.get("description", "Swagger"),
  236. "version": info.get("version", "1.0.0"),
  237. },
  238. "servers": swagger["servers"],
  239. "paths": {},
  240. "components": {"schemas": {}},
  241. }
  242. # check paths
  243. if "paths" not in swagger or len(swagger["paths"]) == 0:
  244. raise ToolApiSchemaError("No paths found in the swagger yaml.")
  245. # convert paths
  246. for path, path_item in swagger["paths"].items():
  247. converted_openapi["paths"][path] = {}
  248. for method, operation in path_item.items():
  249. if "operationId" not in operation:
  250. raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
  251. if ("summary" not in operation or len(operation["summary"]) == 0) and (
  252. "description" not in operation or len(operation["description"]) == 0
  253. ):
  254. if warning is not None:
  255. warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
  256. converted_openapi["paths"][path][method] = {
  257. "operationId": operation["operationId"],
  258. "summary": operation.get("summary", ""),
  259. "description": operation.get("description", ""),
  260. "parameters": operation.get("parameters", []),
  261. "responses": operation.get("responses", {}),
  262. }
  263. if "requestBody" in operation:
  264. converted_openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
  265. # convert definitions
  266. if "definitions" in swagger:
  267. for name, definition in swagger["definitions"].items():
  268. converted_openapi["components"]["schemas"][name] = definition
  269. return converted_openapi
  270. @staticmethod
  271. def parse_openai_plugin_json_to_tool_bundle(
  272. json: str, extra_info: dict | None = None, warning: dict | None = None
  273. ) -> list[ApiToolBundle]:
  274. """
  275. parse openapi plugin yaml to tool bundle
  276. :param json: the json string
  277. :param extra_info: the extra info
  278. :param warning: the warning message
  279. :return: the tool bundle
  280. """
  281. warning = warning if warning is not None else {}
  282. extra_info = extra_info if extra_info is not None else {}
  283. try:
  284. openai_plugin = json_loads(json)
  285. api = openai_plugin["api"]
  286. api_url = api["url"]
  287. api_type = api["type"]
  288. except JSONDecodeError:
  289. raise ToolProviderNotFoundError("Invalid openai plugin json.")
  290. if api_type != "openapi":
  291. raise ToolNotSupportedError("Only openapi is supported now.")
  292. # get openapi yaml
  293. response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
  294. if response.status_code != 200:
  295. raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
  296. return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
  297. response.text, extra_info=extra_info, warning=warning
  298. )
  299. @staticmethod
  300. def auto_parse_to_tool_bundle(
  301. content: str, extra_info: dict | None = None, warning: dict | None = None
  302. ) -> tuple[list[ApiToolBundle], str]:
  303. """
  304. auto parse to tool bundle
  305. :param content: the content
  306. :param extra_info: the extra info
  307. :param warning: the warning message
  308. :return: tools bundle, schema_type
  309. """
  310. warning = warning if warning is not None else {}
  311. extra_info = extra_info if extra_info is not None else {}
  312. content = content.strip()
  313. loaded_content = None
  314. json_error = None
  315. yaml_error = None
  316. try:
  317. loaded_content = json_loads(content)
  318. except JSONDecodeError as e:
  319. json_error = e
  320. if loaded_content is None:
  321. try:
  322. loaded_content = safe_load(content)
  323. except YAMLError as e:
  324. yaml_error = e
  325. if loaded_content is None:
  326. raise ToolApiSchemaError(
  327. f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
  328. f" yaml error: {str(yaml_error)}"
  329. )
  330. swagger_error = None
  331. openapi_error = None
  332. openapi_plugin_error = None
  333. schema_type = None
  334. try:
  335. openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  336. loaded_content, extra_info=extra_info, warning=warning
  337. )
  338. schema_type = ApiProviderSchemaType.OPENAPI.value
  339. return openapi, schema_type
  340. except ToolApiSchemaError as e:
  341. openapi_error = e
  342. # openai parse error, fallback to swagger
  343. try:
  344. converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
  345. loaded_content, extra_info=extra_info, warning=warning
  346. )
  347. schema_type = ApiProviderSchemaType.SWAGGER.value
  348. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  349. converted_swagger, extra_info=extra_info, warning=warning
  350. ), schema_type
  351. except ToolApiSchemaError as e:
  352. swagger_error = e
  353. # swagger parse error, fallback to openai plugin
  354. try:
  355. openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
  356. json_dumps(loaded_content), extra_info=extra_info, warning=warning
  357. )
  358. return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
  359. except ToolNotSupportedError as e:
  360. # maybe it's not plugin at all
  361. openapi_plugin_error = e
  362. raise ToolApiSchemaError(
  363. f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
  364. f" openapi plugin error: {str(openapi_plugin_error)}"
  365. )