|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- import re
- import json
- import base64
-
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.chrome.service import Service
- from selenium.common.exceptions import TimeoutException
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support.expected_conditions import staleness_of
- from webdriver_manager.chrome import ChromeDriverManager
- from selenium.webdriver.common.by import By
-
-
- def html2pdf(
- source: str,
- timeout: int = 2,
- install_driver: bool = True,
- print_options: dict = {},
- ):
- result = __get_pdf_from_html(source, timeout, install_driver, print_options)
- return result
-
-
- def __send_devtools(driver, cmd, params={}):
- resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
- url = driver.command_executor._url + resource
- body = json.dumps({"cmd": cmd, "params": params})
- response = driver.command_executor._request("POST", url, body)
-
- if not response:
- raise Exception(response.get("value"))
-
- return response.get("value")
-
-
- def __get_pdf_from_html(
- path: str,
- timeout: int,
- install_driver: bool,
- print_options: dict
- ):
- webdriver_options = Options()
- webdriver_prefs = {}
- webdriver_options.add_argument("--headless")
- webdriver_options.add_argument("--disable-gpu")
- webdriver_options.add_argument("--no-sandbox")
- webdriver_options.add_argument("--disable-dev-shm-usage")
- webdriver_options.experimental_options["prefs"] = webdriver_prefs
-
- webdriver_prefs["profile.default_content_settings"] = {"images": 2}
-
- if install_driver:
- service = Service(ChromeDriverManager().install())
- driver = webdriver.Chrome(service=service, options=webdriver_options)
- else:
- driver = webdriver.Chrome(options=webdriver_options)
-
- driver.get(path)
-
- try:
- WebDriverWait(driver, timeout).until(
- staleness_of(driver.find_element(by=By.TAG_NAME, value="html"))
- )
- except TimeoutException:
- calculated_print_options = {
- "landscape": False,
- "displayHeaderFooter": False,
- "printBackground": True,
- "preferCSSPageSize": True,
- }
- calculated_print_options.update(print_options)
- result = __send_devtools(
- driver, "Page.printToPDF", calculated_print_options)
- driver.quit()
- return base64.b64decode(result["data"])
-
-
- def is_valid_url(url: str) -> bool:
- return bool(re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))
|