Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

exesql.py 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from abc import ABC
  17. import re
  18. from copy import deepcopy
  19. import pandas as pd
  20. import pymysql
  21. import psycopg2
  22. from agent.component import GenerateParam, Generate
  23. import pyodbc
  24. import logging
  25. class ExeSQLParam(GenerateParam):
  26. """
  27. Define the ExeSQL component parameters.
  28. """
  29. def __init__(self):
  30. super().__init__()
  31. self.db_type = "mysql"
  32. self.database = ""
  33. self.username = ""
  34. self.host = ""
  35. self.port = 3306
  36. self.password = ""
  37. self.loop = 3
  38. self.top_n = 30
  39. def check(self):
  40. super().check()
  41. self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgresql', 'mariadb', 'mssql'])
  42. self.check_empty(self.database, "Database name")
  43. self.check_empty(self.username, "database username")
  44. self.check_empty(self.host, "IP Address")
  45. self.check_positive_integer(self.port, "IP Port")
  46. self.check_empty(self.password, "Database password")
  47. self.check_positive_integer(self.top_n, "Number of records")
  48. if self.database == "rag_flow":
  49. if self.host == "ragflow-mysql":
  50. raise ValueError("For the security reason, it dose not support database named rag_flow.")
  51. if self.password == "infini_rag_flow":
  52. raise ValueError("For the security reason, it dose not support database named rag_flow.")
  53. class ExeSQL(Generate, ABC):
  54. component_name = "ExeSQL"
  55. def _refactor(self, ans):
  56. ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
  57. match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
  58. if match:
  59. ans = match.group(1) # Query content
  60. return ans
  61. else:
  62. print("no markdown")
  63. ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
  64. ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
  65. ans = re.sub(r';[^;]*$', r';', ans)
  66. if not ans:
  67. raise Exception("SQL statement not found!")
  68. return ans
  69. def _run(self, history, **kwargs):
  70. ans = self.get_input()
  71. ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
  72. ans = self._refactor(ans)
  73. logging.info("db_type: ", self._param.db_type)
  74. if self._param.db_type in ["mysql", "mariadb"]:
  75. db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
  76. port=self._param.port, password=self._param.password)
  77. elif self._param.db_type == 'postgresql':
  78. db = psycopg2.connect(dbname=self._param.database, user=self._param.username, host=self._param.host,
  79. port=self._param.port, password=self._param.password)
  80. elif self._param.db_type == 'mssql':
  81. conn_str = (
  82. r'DRIVER={ODBC Driver 17 for SQL Server};'
  83. r'SERVER=' + self._param.host + ',' + str(self._param.port) + ';'
  84. r'DATABASE=' + self._param.database + ';'
  85. r'UID=' + self._param.username + ';'
  86. r'PWD=' + self._param.password
  87. )
  88. db = pyodbc.connect(conn_str)
  89. try:
  90. cursor = db.cursor()
  91. except Exception as e:
  92. raise Exception("Database Connection Failed! \n" + str(e))
  93. if not hasattr(self, "_loop"):
  94. setattr(self, "_loop", 0)
  95. self._loop += 1
  96. input_list = re.split(r';', ans.replace(r"\n", " "))
  97. sql_res = []
  98. for i in range(len(input_list)):
  99. single_sql = input_list[i]
  100. while self._loop <= self._param.loop:
  101. self._loop += 1
  102. if not single_sql:
  103. break
  104. try:
  105. cursor.execute(single_sql)
  106. if cursor.rowcount == 0:
  107. sql_res.append({"content": "No record in the database!"})
  108. break
  109. if self._param.db_type == 'mssql':
  110. single_res = pd.DataFrame.from_records(cursor.fetchmany(self._param.top_n),
  111. columns=[desc[0] for desc in cursor.description])
  112. else:
  113. single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
  114. single_res.columns = [i[0] for i in cursor.description]
  115. sql_res.append({"content": single_res.to_markdown(index=False, floatfmt=".6f")})
  116. break
  117. except Exception as e:
  118. single_sql = self._regenerate_sql(single_sql, str(e), **kwargs)
  119. single_sql = self._refactor(single_sql)
  120. if self._loop > self._param.loop:
  121. sql_res.append({"content": "Can't query the correct data via SQL statement."})
  122. # raise Exception("Maximum loop time exceeds. Can't query the correct data via SQL statement.")
  123. db.close()
  124. if not sql_res:
  125. return ExeSQL.be_output("")
  126. return pd.DataFrame(sql_res)
  127. def _regenerate_sql(self, failed_sql, error_message, **kwargs):
  128. prompt = f'''
  129. ## You are the Repair SQL Statement Helper, please modify the original SQL statement based on the SQL query error report.
  130. ## The original SQL statement is as follows:{failed_sql}.
  131. ## The contents of the SQL query error report is as follows:{error_message}.
  132. ## Answer only the modified SQL statement. Please do not give any explanation, just answer the code.
  133. '''
  134. self._param.prompt = prompt
  135. kwargs_ = deepcopy(kwargs)
  136. kwargs_["stream"] = False
  137. response = Generate._run(self, [], **kwargs_)
  138. try:
  139. regenerated_sql = response.loc[0, "content"]
  140. return regenerated_sql
  141. except Exception as e:
  142. logging.error(f"Failed to regenerate SQL: {e}")
  143. return None
  144. def debug(self, **kwargs):
  145. return self._run([], **kwargs)