Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

api_app.py 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import json
  17. import os
  18. import re
  19. from datetime import datetime, timedelta
  20. from flask import request, Response
  21. from flask_login import login_required, current_user
  22. from api.db import FileType, ParserType
  23. from api.db.db_models import APIToken, API4Conversation, Task
  24. from api.db.services import duplicate_name
  25. from api.db.services.api_service import APITokenService, API4ConversationService
  26. from api.db.services.dialog_service import DialogService, chat
  27. from api.db.services.document_service import DocumentService
  28. from api.db.services.file2document_service import File2DocumentService
  29. from api.db.services.file_service import FileService
  30. from api.db.services.knowledgebase_service import KnowledgebaseService
  31. from api.db.services.task_service import queue_tasks, TaskService
  32. from api.db.services.user_service import UserTenantService
  33. from api.settings import RetCode
  34. from api.utils import get_uuid, current_timestamp, datetime_format
  35. from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
  36. from itsdangerous import URLSafeTimedSerializer
  37. from api.utils.file_utils import filename_type, thumbnail
  38. from rag.utils.minio_conn import MINIO
  39. from rag.utils.es_conn import ELASTICSEARCH
  40. from rag.nlp import search
  41. from elasticsearch_dsl import Q
  42. def generate_confirmation_token(tenent_id):
  43. serializer = URLSafeTimedSerializer(tenent_id)
  44. return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
  45. @manager.route('/new_token', methods=['POST'])
  46. @validate_request("dialog_id")
  47. @login_required
  48. def new_token():
  49. req = request.json
  50. try:
  51. tenants = UserTenantService.query(user_id=current_user.id)
  52. if not tenants:
  53. return get_data_error_result(retmsg="Tenant not found!")
  54. tenant_id = tenants[0].tenant_id
  55. obj = {"tenant_id": tenant_id, "token": generate_confirmation_token(tenant_id),
  56. "dialog_id": req["dialog_id"],
  57. "create_time": current_timestamp(),
  58. "create_date": datetime_format(datetime.now()),
  59. "update_time": None,
  60. "update_date": None
  61. }
  62. if not APITokenService.save(**obj):
  63. return get_data_error_result(retmsg="Fail to new a dialog!")
  64. return get_json_result(data=obj)
  65. except Exception as e:
  66. return server_error_response(e)
  67. @manager.route('/token_list', methods=['GET'])
  68. @login_required
  69. def token_list():
  70. try:
  71. tenants = UserTenantService.query(user_id=current_user.id)
  72. if not tenants:
  73. return get_data_error_result(retmsg="Tenant not found!")
  74. objs = APITokenService.query(tenant_id=tenants[0].tenant_id, dialog_id=request.args["dialog_id"])
  75. return get_json_result(data=[o.to_dict() for o in objs])
  76. except Exception as e:
  77. return server_error_response(e)
  78. @manager.route('/rm', methods=['POST'])
  79. @validate_request("tokens", "tenant_id")
  80. @login_required
  81. def rm():
  82. req = request.json
  83. try:
  84. for token in req["tokens"]:
  85. APITokenService.filter_delete(
  86. [APIToken.tenant_id == req["tenant_id"], APIToken.token == token])
  87. return get_json_result(data=True)
  88. except Exception as e:
  89. return server_error_response(e)
  90. @manager.route('/stats', methods=['GET'])
  91. @login_required
  92. def stats():
  93. try:
  94. tenants = UserTenantService.query(user_id=current_user.id)
  95. if not tenants:
  96. return get_data_error_result(retmsg="Tenant not found!")
  97. objs = API4ConversationService.stats(
  98. tenants[0].tenant_id,
  99. request.args.get(
  100. "from_date",
  101. (datetime.now() -
  102. timedelta(
  103. days=7)).strftime("%Y-%m-%d 24:00:00")),
  104. request.args.get(
  105. "to_date",
  106. datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
  107. res = {
  108. "pv": [(o["dt"], o["pv"]) for o in objs],
  109. "uv": [(o["dt"], o["uv"]) for o in objs],
  110. "speed": [(o["dt"], float(o["tokens"])/(float(o["duration"]+0.1))) for o in objs],
  111. "tokens": [(o["dt"], float(o["tokens"])/1000.) for o in objs],
  112. "round": [(o["dt"], o["round"]) for o in objs],
  113. "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs]
  114. }
  115. return get_json_result(data=res)
  116. except Exception as e:
  117. return server_error_response(e)
  118. @manager.route('/new_conversation', methods=['GET'])
  119. def set_conversation():
  120. token = request.headers.get('Authorization').split()[1]
  121. objs = APIToken.query(token=token)
  122. if not objs:
  123. return get_json_result(
  124. data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
  125. req = request.json
  126. try:
  127. e, dia = DialogService.get_by_id(objs[0].dialog_id)
  128. if not e:
  129. return get_data_error_result(retmsg="Dialog not found")
  130. conv = {
  131. "id": get_uuid(),
  132. "dialog_id": dia.id,
  133. "user_id": request.args.get("user_id", ""),
  134. "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
  135. }
  136. API4ConversationService.save(**conv)
  137. e, conv = API4ConversationService.get_by_id(conv["id"])
  138. if not e:
  139. return get_data_error_result(retmsg="Fail to new a conversation!")
  140. conv = conv.to_dict()
  141. return get_json_result(data=conv)
  142. except Exception as e:
  143. return server_error_response(e)
  144. @manager.route('/completion', methods=['POST'])
  145. @validate_request("conversation_id", "messages")
  146. def completion():
  147. token = request.headers.get('Authorization').split()[1]
  148. if not APIToken.query(token=token):
  149. return get_json_result(
  150. data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
  151. req = request.json
  152. e, conv = API4ConversationService.get_by_id(req["conversation_id"])
  153. if not e:
  154. return get_data_error_result(retmsg="Conversation not found!")
  155. if "quote" not in req: req["quote"] = False
  156. msg = []
  157. for m in req["messages"]:
  158. if m["role"] == "system":
  159. continue
  160. if m["role"] == "assistant" and not msg:
  161. continue
  162. msg.append({"role": m["role"], "content": m["content"]})
  163. try:
  164. conv.message.append(msg[-1])
  165. e, dia = DialogService.get_by_id(conv.dialog_id)
  166. if not e:
  167. return get_data_error_result(retmsg="Dialog not found!")
  168. del req["conversation_id"]
  169. del req["messages"]
  170. if not conv.reference:
  171. conv.reference = []
  172. conv.message.append({"role": "assistant", "content": ""})
  173. conv.reference.append({"chunks": [], "doc_aggs": []})
  174. def fillin_conv(ans):
  175. nonlocal conv
  176. if not conv.reference:
  177. conv.reference.append(ans["reference"])
  178. else: conv.reference[-1] = ans["reference"]
  179. conv.message[-1] = {"role": "assistant", "content": ans["answer"]}
  180. def stream():
  181. nonlocal dia, msg, req, conv
  182. try:
  183. for ans in chat(dia, msg, True, **req):
  184. fillin_conv(ans)
  185. yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": ans}, ensure_ascii=False) + "\n\n"
  186. API4ConversationService.append_message(conv.id, conv.to_dict())
  187. except Exception as e:
  188. yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
  189. "data": {"answer": "**ERROR**: "+str(e), "reference": []}},
  190. ensure_ascii=False) + "\n\n"
  191. yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n"
  192. if req.get("stream", True):
  193. resp = Response(stream(), mimetype="text/event-stream")
  194. resp.headers.add_header("Cache-control", "no-cache")
  195. resp.headers.add_header("Connection", "keep-alive")
  196. resp.headers.add_header("X-Accel-Buffering", "no")
  197. resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
  198. return resp
  199. else:
  200. ans = chat(dia, msg, False, **req)
  201. fillin_conv(ans)
  202. API4ConversationService.append_message(conv.id, conv.to_dict())
  203. return get_json_result(data=ans)
  204. except Exception as e:
  205. return server_error_response(e)
  206. @manager.route('/conversation/<conversation_id>', methods=['GET'])
  207. # @login_required
  208. def get(conversation_id):
  209. try:
  210. e, conv = API4ConversationService.get_by_id(conversation_id)
  211. if not e:
  212. return get_data_error_result(retmsg="Conversation not found!")
  213. return get_json_result(data=conv.to_dict())
  214. except Exception as e:
  215. return server_error_response(e)
  216. @manager.route('/document/upload', methods=['POST'])
  217. @validate_request("kb_name")
  218. def upload():
  219. token = request.headers.get('Authorization').split()[1]
  220. objs = APIToken.query(token=token)
  221. if not objs:
  222. return get_json_result(
  223. data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
  224. kb_name = request.form.get("kb_name").strip()
  225. tenant_id = objs[0].tenant_id
  226. try:
  227. e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
  228. if not e:
  229. return get_data_error_result(
  230. retmsg="Can't find this knowledgebase!")
  231. kb_id = kb.id
  232. except Exception as e:
  233. return server_error_response(e)
  234. if 'file' not in request.files:
  235. return get_json_result(
  236. data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
  237. file = request.files['file']
  238. if file.filename == '':
  239. return get_json_result(
  240. data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
  241. root_folder = FileService.get_root_folder(tenant_id)
  242. pf_id = root_folder["id"]
  243. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  244. kb_root_folder = FileService.get_kb_folder(tenant_id)
  245. kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
  246. try:
  247. if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)):
  248. return get_data_error_result(
  249. retmsg="Exceed the maximum file number of a free user!")
  250. filename = duplicate_name(
  251. DocumentService.query,
  252. name=file.filename,
  253. kb_id=kb_id)
  254. filetype = filename_type(filename)
  255. if not filetype:
  256. return get_data_error_result(
  257. retmsg="This type of file has not been supported yet!")
  258. location = filename
  259. while MINIO.obj_exist(kb_id, location):
  260. location += "_"
  261. blob = request.files['file'].read()
  262. MINIO.put(kb_id, location, blob)
  263. doc = {
  264. "id": get_uuid(),
  265. "kb_id": kb.id,
  266. "parser_id": kb.parser_id,
  267. "parser_config": kb.parser_config,
  268. "created_by": kb.tenant_id,
  269. "type": filetype,
  270. "name": filename,
  271. "location": location,
  272. "size": len(blob),
  273. "thumbnail": thumbnail(filename, blob)
  274. }
  275. form_data=request.form
  276. if "parser_id" in form_data.keys():
  277. if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
  278. doc["parser_id"] = request.form.get("parser_id").strip()
  279. if doc["type"] == FileType.VISUAL:
  280. doc["parser_id"] = ParserType.PICTURE.value
  281. if re.search(r"\.(ppt|pptx|pages)$", filename):
  282. doc["parser_id"] = ParserType.PRESENTATION.value
  283. doc_result = DocumentService.insert(doc)
  284. FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
  285. except Exception as e:
  286. return server_error_response(e)
  287. if "run" in form_data.keys():
  288. if request.form.get("run").strip() == "1":
  289. try:
  290. info = {"run": 1, "progress": 0}
  291. info["progress_msg"] = ""
  292. info["chunk_num"] = 0
  293. info["token_num"] = 0
  294. DocumentService.update_by_id(doc["id"], info)
  295. # if str(req["run"]) == TaskStatus.CANCEL.value:
  296. tenant_id = DocumentService.get_tenant_id(doc["id"])
  297. if not tenant_id:
  298. return get_data_error_result(retmsg="Tenant not found!")
  299. #e, doc = DocumentService.get_by_id(doc["id"])
  300. TaskService.filter_delete([Task.doc_id == doc["id"]])
  301. e, doc = DocumentService.get_by_id(doc["id"])
  302. doc = doc.to_dict()
  303. doc["tenant_id"] = tenant_id
  304. bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"])
  305. queue_tasks(doc, bucket, name)
  306. except Exception as e:
  307. return server_error_response(e)
  308. return get_json_result(data=doc_result.to_json())
  309. @manager.route('/list_chunks', methods=['POST'])
  310. # @login_required
  311. def list_chunks():
  312. token = request.headers.get('Authorization').split()[1]
  313. objs = APIToken.query(token=token)
  314. if not objs:
  315. return get_json_result(
  316. data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
  317. form_data = request.form
  318. try:
  319. if "doc_name" in form_data.keys():
  320. tenant_id = DocumentService.get_tenant_id_by_name(form_data['doc_name'])
  321. q = Q("match", docnm_kwd=form_data['doc_name'])
  322. elif "doc_id" in form_data.keys():
  323. tenant_id = DocumentService.get_tenant_id(form_data['doc_id'])
  324. q = Q("match", doc_id=form_data['doc_id'])
  325. else:
  326. return get_json_result(
  327. data=False,retmsg="Can't find doc_name or doc_id"
  328. )
  329. res_es_search = ELASTICSEARCH.search(q,idxnm=search.index_name(tenant_id),timeout="600s")
  330. res = [{} for _ in range(len(res_es_search['hits']['hits']))]
  331. for index , chunk in enumerate(res_es_search['hits']['hits']):
  332. res[index]['doc_name'] = chunk['_source']['docnm_kwd']
  333. res[index]['content'] = chunk['_source']['content_with_weight']
  334. if 'img_id' in chunk['_source'].keys():
  335. res[index]['img_id'] = chunk['_source']['img_id']
  336. except Exception as e:
  337. return server_error_response(e)
  338. return get_json_result(data=res)