Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

web_utils.py 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import base64
  17. import ipaddress
  18. import json
  19. import re
  20. import socket
  21. from urllib.parse import urlparse
  22. from api.apps import smtp_mail_server
  23. from flask_mail import Message
  24. from flask import render_template_string
  25. from selenium import webdriver
  26. from selenium.common.exceptions import TimeoutException
  27. from selenium.webdriver.chrome.options import Options
  28. from selenium.webdriver.chrome.service import Service
  29. from selenium.webdriver.common.by import By
  30. from selenium.webdriver.support.expected_conditions import staleness_of
  31. from selenium.webdriver.support.ui import WebDriverWait
  32. from webdriver_manager.chrome import ChromeDriverManager
  33. CONTENT_TYPE_MAP = {
  34. # Office
  35. "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
  36. "doc": "application/msword",
  37. "pdf": "application/pdf",
  38. "csv": "text/csv",
  39. "xls": "application/vnd.ms-excel",
  40. "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
  41. # Text/code
  42. "txt": "text/plain",
  43. "py": "text/plain",
  44. "js": "text/plain",
  45. "java": "text/plain",
  46. "c": "text/plain",
  47. "cpp": "text/plain",
  48. "h": "text/plain",
  49. "php": "text/plain",
  50. "go": "text/plain",
  51. "ts": "text/plain",
  52. "sh": "text/plain",
  53. "cs": "text/plain",
  54. "kt": "text/plain",
  55. "sql": "text/plain",
  56. # Web
  57. "md": "text/markdown",
  58. "markdown": "text/markdown",
  59. "htm": "text/html",
  60. "html": "text/html",
  61. "json": "application/json",
  62. # Image formats
  63. "png": "image/png",
  64. "jpg": "image/jpeg",
  65. "jpeg": "image/jpeg",
  66. "gif": "image/gif",
  67. "bmp": "image/bmp",
  68. "tiff": "image/tiff",
  69. "tif": "image/tiff",
  70. "webp": "image/webp",
  71. "svg": "image/svg+xml",
  72. "ico": "image/x-icon",
  73. "avif": "image/avif",
  74. "heic": "image/heic",
  75. }
  76. def html2pdf(
  77. source: str,
  78. timeout: int = 2,
  79. install_driver: bool = True,
  80. print_options: dict = {},
  81. ):
  82. result = __get_pdf_from_html(source, timeout, install_driver, print_options)
  83. return result
  84. def __send_devtools(driver, cmd, params={}):
  85. resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
  86. url = driver.command_executor._url + resource
  87. body = json.dumps({"cmd": cmd, "params": params})
  88. response = driver.command_executor._request("POST", url, body)
  89. if not response:
  90. raise Exception(response.get("value"))
  91. return response.get("value")
  92. def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options: dict):
  93. webdriver_options = Options()
  94. webdriver_prefs = {}
  95. webdriver_options.add_argument("--headless")
  96. webdriver_options.add_argument("--disable-gpu")
  97. webdriver_options.add_argument("--no-sandbox")
  98. webdriver_options.add_argument("--disable-dev-shm-usage")
  99. webdriver_options.experimental_options["prefs"] = webdriver_prefs
  100. webdriver_prefs["profile.default_content_settings"] = {"images": 2}
  101. if install_driver:
  102. service = Service(ChromeDriverManager().install())
  103. driver = webdriver.Chrome(service=service, options=webdriver_options)
  104. else:
  105. driver = webdriver.Chrome(options=webdriver_options)
  106. driver.get(path)
  107. try:
  108. WebDriverWait(driver, timeout).until(staleness_of(driver.find_element(by=By.TAG_NAME, value="html")))
  109. except TimeoutException:
  110. calculated_print_options = {
  111. "landscape": False,
  112. "displayHeaderFooter": False,
  113. "printBackground": True,
  114. "preferCSSPageSize": True,
  115. }
  116. calculated_print_options.update(print_options)
  117. result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
  118. driver.quit()
  119. return base64.b64decode(result["data"])
  120. def is_private_ip(ip: str) -> bool:
  121. try:
  122. ip_obj = ipaddress.ip_address(ip)
  123. return ip_obj.is_private
  124. except ValueError:
  125. return False
  126. def is_valid_url(url: str) -> bool:
  127. if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
  128. return False
  129. parsed_url = urlparse(url)
  130. hostname = parsed_url.hostname
  131. if not hostname:
  132. return False
  133. try:
  134. ip = socket.gethostbyname(hostname)
  135. if is_private_ip(ip):
  136. return False
  137. except socket.gaierror:
  138. return False
  139. return True
  140. def safe_json_parse(data: str | dict) -> dict:
  141. if isinstance(data, dict):
  142. return data
  143. try:
  144. return json.loads(data) if data else {}
  145. except (json.JSONDecodeError, TypeError):
  146. return {}
  147. def get_float(req: dict, key: str, default: float | int = 10.0) -> float:
  148. try:
  149. parsed = float(req.get(key, default))
  150. return parsed if parsed > 0 else default
  151. except (TypeError, ValueError):
  152. return default
  153. INVITE_EMAIL_TMPL = """
  154. <p>Hi {{email}},</p>
  155. <p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
  156. <p>Click the link below to complete your registration:<br>
  157. <a href="{{invite_url}}">{{invite_url}}</a></p>
  158. <p>If you did not request this, please ignore this email.</p>
  159. """
  160. def send_invite_email(to_email, invite_url, tenant_id, inviter):
  161. from api.apps import app
  162. with app.app_context():
  163. msg = Message(subject="RAGFlow Invitation",
  164. recipients=[to_email])
  165. msg.html = render_template_string(
  166. INVITE_EMAIL_TMPL,
  167. email=to_email,
  168. invite_url=invite_url,
  169. tenant_id=tenant_id,
  170. inviter=inviter,
  171. )
  172. smtp_mail_server.send(msg)