您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

web_utils.py 3.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. import re
  2. import socket
  3. from urllib.parse import urlparse
  4. import ipaddress
  5. import json
  6. import base64
  7. from selenium import webdriver
  8. from selenium.webdriver.chrome.options import Options
  9. from selenium.webdriver.chrome.service import Service
  10. from selenium.common.exceptions import TimeoutException
  11. from selenium.webdriver.support.ui import WebDriverWait
  12. from selenium.webdriver.support.expected_conditions import staleness_of
  13. from webdriver_manager.chrome import ChromeDriverManager
  14. from selenium.webdriver.common.by import By
  15. def html2pdf(
  16. source: str,
  17. timeout: int = 2,
  18. install_driver: bool = True,
  19. print_options: dict = {},
  20. ):
  21. result = __get_pdf_from_html(source, timeout, install_driver, print_options)
  22. return result
  23. def __send_devtools(driver, cmd, params={}):
  24. resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
  25. url = driver.command_executor._url + resource
  26. body = json.dumps({"cmd": cmd, "params": params})
  27. response = driver.command_executor._request("POST", url, body)
  28. if not response:
  29. raise Exception(response.get("value"))
  30. return response.get("value")
  31. def __get_pdf_from_html(
  32. path: str,
  33. timeout: int,
  34. install_driver: bool,
  35. print_options: dict
  36. ):
  37. webdriver_options = Options()
  38. webdriver_prefs = {}
  39. webdriver_options.add_argument("--headless")
  40. webdriver_options.add_argument("--disable-gpu")
  41. webdriver_options.add_argument("--no-sandbox")
  42. webdriver_options.add_argument("--disable-dev-shm-usage")
  43. webdriver_options.experimental_options["prefs"] = webdriver_prefs
  44. webdriver_prefs["profile.default_content_settings"] = {"images": 2}
  45. if install_driver:
  46. service = Service(ChromeDriverManager().install())
  47. driver = webdriver.Chrome(service=service, options=webdriver_options)
  48. else:
  49. driver = webdriver.Chrome(options=webdriver_options)
  50. driver.get(path)
  51. try:
  52. WebDriverWait(driver, timeout).until(
  53. staleness_of(driver.find_element(by=By.TAG_NAME, value="html"))
  54. )
  55. except TimeoutException:
  56. calculated_print_options = {
  57. "landscape": False,
  58. "displayHeaderFooter": False,
  59. "printBackground": True,
  60. "preferCSSPageSize": True,
  61. }
  62. calculated_print_options.update(print_options)
  63. result = __send_devtools(
  64. driver, "Page.printToPDF", calculated_print_options)
  65. driver.quit()
  66. return base64.b64decode(result["data"])
  67. def is_private_ip(ip: str) -> bool:
  68. try:
  69. ip_obj = ipaddress.ip_address(ip)
  70. return ip_obj.is_private
  71. except ValueError:
  72. return False
  73. def is_valid_url(url: str) -> bool:
  74. if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
  75. return False
  76. parsed_url = urlparse(url)
  77. hostname = parsed_url.hostname
  78. if not hostname:
  79. return False
  80. try:
  81. ip = socket.gethostbyname(hostname)
  82. if is_private_ip(ip):
  83. return False
  84. except socket.gaierror:
  85. return False
  86. return True