Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

exesql.py 5.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from abc import ABC
  17. import re
  18. import pandas as pd
  19. import pymysql
  20. import psycopg2
  21. from agent.component.base import ComponentBase, ComponentParamBase
  22. import pyodbc
  23. import logging
  24. class ExeSQLParam(ComponentParamBase):
  25. """
  26. Define the ExeSQL component parameters.
  27. """
  28. def __init__(self):
  29. super().__init__()
  30. self.db_type = "mysql"
  31. self.database = ""
  32. self.username = ""
  33. self.host = ""
  34. self.port = 3306
  35. self.password = ""
  36. self.loop = 3
  37. self.top_n = 30
  38. def check(self):
  39. self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgresql', 'mariadb', 'mssql'])
  40. self.check_empty(self.database, "Database name")
  41. self.check_empty(self.username, "database username")
  42. self.check_empty(self.host, "IP Address")
  43. self.check_positive_integer(self.port, "IP Port")
  44. self.check_empty(self.password, "Database password")
  45. self.check_positive_integer(self.top_n, "Number of records")
  46. if self.database == "rag_flow":
  47. if self.host == "ragflow-mysql":
  48. raise ValueError("The host is not accessible.")
  49. if self.password == "infini_rag_flow":
  50. raise ValueError("The host is not accessible.")
  51. class ExeSQL(ComponentBase, ABC):
  52. component_name = "ExeSQL"
  53. def _run(self, history, **kwargs):
  54. if not hasattr(self, "_loop"):
  55. setattr(self, "_loop", 0)
  56. if self._loop >= self._param.loop:
  57. self._loop = 0
  58. raise Exception("Maximum loop time exceeds. Can't query the correct data via SQL statement.")
  59. self._loop += 1
  60. ans = self.get_input()
  61. ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
  62. if self._param.db_type == 'mssql':
  63. # improve the information extraction, most llm return results in markdown format ```sql query ```
  64. match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
  65. if match:
  66. ans = match.group(1) # Query content
  67. print(ans)
  68. else:
  69. print("no markdown")
  70. ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
  71. else:
  72. ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
  73. ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
  74. ans = re.sub(r';[^;]*$', r';', ans)
  75. if not ans:
  76. raise Exception("SQL statement not found!")
  77. logging.info("db_type: ",self._param.db_type)
  78. if self._param.db_type in ["mysql", "mariadb"]:
  79. db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
  80. port=self._param.port, password=self._param.password)
  81. elif self._param.db_type == 'postgresql':
  82. db = psycopg2.connect(dbname=self._param.database, user=self._param.username, host=self._param.host,
  83. port=self._param.port, password=self._param.password)
  84. elif self._param.db_type == 'mssql':
  85. conn_str = (
  86. r'DRIVER={ODBC Driver 17 for SQL Server};'
  87. r'SERVER=' + self._param.host + ',' + str(self._param.port) + ';'
  88. r'DATABASE=' + self._param.database + ';'
  89. r'UID=' + self._param.username + ';'
  90. r'PWD=' + self._param.password
  91. )
  92. db = pyodbc.connect(conn_str)
  93. try:
  94. cursor = db.cursor()
  95. except Exception as e:
  96. raise Exception("Database Connection Failed! \n" + str(e))
  97. sql_res = []
  98. for single_sql in re.split(r';', ans.replace(r"\n", " ")):
  99. if not single_sql:
  100. continue
  101. try:
  102. logging.info("single_sql: ",single_sql)
  103. cursor.execute(single_sql)
  104. if cursor.rowcount == 0:
  105. sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
  106. continue
  107. single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
  108. single_res.columns = [i[0] for i in cursor.description]
  109. sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
  110. except Exception as e:
  111. sql_res.append({"content": "**Error**:" + str(e) + "\nError SQL Statement:" + single_sql})
  112. pass
  113. db.close()
  114. if not sql_res:
  115. return ExeSQL.be_output("")
  116. return pd.DataFrame(sql_res)