You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import json
  17. import logging
  18. import os
  19. import re
  20. import time
  21. from abc import ABC
  22. import requests
  23. from api.utils.api_utils import timeout
  24. from deepdoc.parser import HtmlParser
  25. from agent.component.base import ComponentBase, ComponentParamBase
  26. class InvokeParam(ComponentParamBase):
  27. """
  28. Define the Crawler component parameters.
  29. """
  30. def __init__(self):
  31. super().__init__()
  32. self.proxy = None
  33. self.headers = ""
  34. self.method = "get"
  35. self.variables = []
  36. self.url = ""
  37. self.timeout = 60
  38. self.clean_html = False
  39. self.datatype = "json" # New parameter to determine data posting type
  40. def check(self):
  41. self.check_valid_value(self.method.lower(), "Type of content from the crawler", ['get', 'post', 'put'])
  42. self.check_empty(self.url, "End point URL")
  43. self.check_positive_integer(self.timeout, "Timeout time in second")
  44. self.check_boolean(self.clean_html, "Clean HTML")
  45. self.check_valid_value(self.datatype.lower(), "Data post type", ['json', 'formdata']) # Check for valid datapost value
  46. class Invoke(ComponentBase, ABC):
  47. component_name = "Invoke"
  48. @timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3))
  49. def _invoke(self, **kwargs):
  50. args = {}
  51. for para in self._param.variables:
  52. if para.get("value"):
  53. args[para["key"]] = para["value"]
  54. else:
  55. args[para["key"]] = self._canvas.get_variable_value(para["ref"])
  56. url = self._param.url.strip()
  57. if url.find("http") != 0:
  58. url = "http://" + url
  59. method = self._param.method.lower()
  60. headers = {}
  61. if self._param.headers:
  62. headers = json.loads(self._param.headers)
  63. proxies = None
  64. if re.sub(r"https?:?/?/?", "", self._param.proxy):
  65. proxies = {"http": self._param.proxy, "https": self._param.proxy}
  66. last_e = ""
  67. for _ in range(self._param.max_retries+1):
  68. try:
  69. if method == 'get':
  70. response = requests.get(url=url,
  71. params=args,
  72. headers=headers,
  73. proxies=proxies,
  74. timeout=self._param.timeout)
  75. if self._param.clean_html:
  76. sections = HtmlParser()(None, response.content)
  77. self.set_output("result", "\n".join(sections))
  78. else:
  79. self.set_output("result", response.text)
  80. if method == 'put':
  81. if self._param.datatype.lower() == 'json':
  82. response = requests.put(url=url,
  83. json=args,
  84. headers=headers,
  85. proxies=proxies,
  86. timeout=self._param.timeout)
  87. else:
  88. response = requests.put(url=url,
  89. data=args,
  90. headers=headers,
  91. proxies=proxies,
  92. timeout=self._param.timeout)
  93. if self._param.clean_html:
  94. sections = HtmlParser()(None, response.content)
  95. self.set_output("result", "\n".join(sections))
  96. else:
  97. self.set_output("result", response.text)
  98. if method == 'post':
  99. if self._param.datatype.lower() == 'json':
  100. response = requests.post(url=url,
  101. json=args,
  102. headers=headers,
  103. proxies=proxies,
  104. timeout=self._param.timeout)
  105. else:
  106. response = requests.post(url=url,
  107. data=args,
  108. headers=headers,
  109. proxies=proxies,
  110. timeout=self._param.timeout)
  111. if self._param.clean_html:
  112. self.set_output("result", "\n".join(sections))
  113. else:
  114. self.set_output("result", response.text)
  115. return self.output("result")
  116. except Exception as e:
  117. last_e = e
  118. logging.exception(f"Http request error: {e}")
  119. time.sleep(self._param.delay_after_error)
  120. if last_e:
  121. self.set_output("_ERROR", str(last_e))
  122. return f"Http request error: {last_e}"
  123. assert False, self.output()
  124. def thoughts(self) -> str:
  125. return "Waiting for the server respond..."