您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import re
  2. import json
  3. import base64
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.options import Options
  6. from selenium.webdriver.chrome.service import Service
  7. from selenium.common.exceptions import TimeoutException
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support.expected_conditions import staleness_of
  10. from webdriver_manager.chrome import ChromeDriverManager
  11. from selenium.webdriver.common.by import By
  12. def html2pdf(
  13. source: str,
  14. timeout: int = 2,
  15. install_driver: bool = True,
  16. print_options: dict = {},
  17. ):
  18. result = __get_pdf_from_html(source, timeout, install_driver, print_options)
  19. return result
  20. def __send_devtools(driver, cmd, params={}):
  21. resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
  22. url = driver.command_executor._url + resource
  23. body = json.dumps({"cmd": cmd, "params": params})
  24. response = driver.command_executor._request("POST", url, body)
  25. if not response:
  26. raise Exception(response.get("value"))
  27. return response.get("value")
  28. def __get_pdf_from_html(
  29. path: str,
  30. timeout: int,
  31. install_driver: bool,
  32. print_options: dict
  33. ):
  34. webdriver_options = Options()
  35. webdriver_prefs = {}
  36. webdriver_options.add_argument("--headless")
  37. webdriver_options.add_argument("--disable-gpu")
  38. webdriver_options.add_argument("--no-sandbox")
  39. webdriver_options.add_argument("--disable-dev-shm-usage")
  40. webdriver_options.experimental_options["prefs"] = webdriver_prefs
  41. webdriver_prefs["profile.default_content_settings"] = {"images": 2}
  42. if install_driver:
  43. service = Service(ChromeDriverManager().install())
  44. driver = webdriver.Chrome(service=service, options=webdriver_options)
  45. else:
  46. driver = webdriver.Chrome(options=webdriver_options)
  47. driver.get(path)
  48. try:
  49. WebDriverWait(driver, timeout).until(
  50. staleness_of(driver.find_element(by=By.TAG_NAME, value="html"))
  51. )
  52. except TimeoutException:
  53. calculated_print_options = {
  54. "landscape": False,
  55. "displayHeaderFooter": False,
  56. "printBackground": True,
  57. "preferCSSPageSize": True,
  58. }
  59. calculated_print_options.update(print_options)
  60. result = __send_devtools(
  61. driver, "Page.printToPDF", calculated_print_options)
  62. driver.quit()
  63. return base64.b64decode(result["data"])
  64. def is_valid_url(url: str) -> bool:
  65. return bool(re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))