You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

web_utils.py 2.6KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import re
  2. import json
  3. import base64
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.options import Options
  6. from selenium.webdriver.chrome.service import Service
  7. from selenium.common.exceptions import TimeoutException
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support.expected_conditions import staleness_of
  10. from webdriver_manager.chrome import ChromeDriverManager
  11. from selenium.webdriver.common.by import By
  12. def html2pdf(
  13. source: str,
  14. timeout: int = 2,
  15. install_driver: bool = True,
  16. print_options: dict = {},
  17. ):
  18. result = __get_pdf_from_html(source, timeout, install_driver, print_options)
  19. return result
  20. def __send_devtools(driver, cmd, params={}):
  21. resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
  22. url = driver.command_executor._url + resource
  23. body = json.dumps({"cmd": cmd, "params": params})
  24. response = driver.command_executor._request("POST", url, body)
  25. if not response:
  26. raise Exception(response.get("value"))
  27. return response.get("value")
  28. def __get_pdf_from_html(
  29. path: str,
  30. timeout: int,
  31. install_driver: bool,
  32. print_options: dict
  33. ):
  34. webdriver_options = Options()
  35. webdriver_prefs = {}
  36. webdriver_options.add_argument("--headless")
  37. webdriver_options.add_argument("--disable-gpu")
  38. webdriver_options.add_argument("--no-sandbox")
  39. webdriver_options.add_argument("--disable-dev-shm-usage")
  40. webdriver_options.experimental_options["prefs"] = webdriver_prefs
  41. webdriver_prefs["profile.default_content_settings"] = {"images": 2}
  42. if install_driver:
  43. service = Service(ChromeDriverManager().install())
  44. driver = webdriver.Chrome(service=service, options=webdriver_options)
  45. else:
  46. driver = webdriver.Chrome(options=webdriver_options)
  47. driver.get(path)
  48. try:
  49. WebDriverWait(driver, timeout).until(
  50. staleness_of(driver.find_element(by=By.TAG_NAME, value="html"))
  51. )
  52. except TimeoutException:
  53. calculated_print_options = {
  54. "landscape": False,
  55. "displayHeaderFooter": False,
  56. "printBackground": True,
  57. "preferCSSPageSize": True,
  58. }
  59. calculated_print_options.update(print_options)
  60. result = __send_devtools(
  61. driver, "Page.printToPDF", calculated_print_options)
  62. driver.quit()
  63. return base64.b64decode(result["data"])
  64. def is_valid_url(url: str) -> bool:
  65. return bool(re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))