您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

api_app.py 35KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import json
  17. import os
  18. import re
  19. from datetime import datetime, timedelta
  20. from flask import request, Response
  21. from api.db.services.llm_service import LLMBundle
  22. from flask_login import login_required, current_user
  23. from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, LLMType, ParserType, FileSource
  24. from api.db.db_models import APIToken, Task, File
  25. from api.db.services import duplicate_name
  26. from api.db.services.api_service import APITokenService, API4ConversationService
  27. from api.db.services.dialog_service import DialogService, chat
  28. from api.db.services.document_service import DocumentService, doc_upload_and_parse
  29. from api.db.services.file2document_service import File2DocumentService
  30. from api.db.services.file_service import FileService
  31. from api.db.services.knowledgebase_service import KnowledgebaseService
  32. from api.db.services.task_service import queue_tasks, TaskService
  33. from api.db.services.user_service import UserTenantService
  34. from api import settings
  35. from api.utils import get_uuid, current_timestamp, datetime_format
  36. from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request, \
  37. generate_confirmation_token
  38. from api.utils.file_utils import filename_type, thumbnail
  39. from rag.app.tag import label_question
  40. from rag.prompts import keyword_extraction
  41. from rag.utils.storage_factory import STORAGE_IMPL
  42. from api.db.services.canvas_service import UserCanvasService
  43. from agent.canvas import Canvas
  44. from functools import partial
  45. from pathlib import Path
  46. @manager.route('/new_token', methods=['POST']) # noqa: F821
  47. @login_required
  48. def new_token():
  49. req = request.json
  50. try:
  51. tenants = UserTenantService.query(user_id=current_user.id)
  52. if not tenants:
  53. return get_data_error_result(message="Tenant not found!")
  54. tenant_id = tenants[0].tenant_id
  55. obj = {"tenant_id": tenant_id, "token": generate_confirmation_token(tenant_id),
  56. "create_time": current_timestamp(),
  57. "create_date": datetime_format(datetime.now()),
  58. "update_time": None,
  59. "update_date": None
  60. }
  61. if req.get("canvas_id"):
  62. obj["dialog_id"] = req["canvas_id"]
  63. obj["source"] = "agent"
  64. else:
  65. obj["dialog_id"] = req["dialog_id"]
  66. if not APITokenService.save(**obj):
  67. return get_data_error_result(message="Fail to new a dialog!")
  68. return get_json_result(data=obj)
  69. except Exception as e:
  70. return server_error_response(e)
  71. @manager.route('/token_list', methods=['GET']) # noqa: F821
  72. @login_required
  73. def token_list():
  74. try:
  75. tenants = UserTenantService.query(user_id=current_user.id)
  76. if not tenants:
  77. return get_data_error_result(message="Tenant not found!")
  78. id = request.args["dialog_id"] if "dialog_id" in request.args else request.args["canvas_id"]
  79. objs = APITokenService.query(tenant_id=tenants[0].tenant_id, dialog_id=id)
  80. return get_json_result(data=[o.to_dict() for o in objs])
  81. except Exception as e:
  82. return server_error_response(e)
  83. @manager.route('/rm', methods=['POST']) # noqa: F821
  84. @validate_request("tokens", "tenant_id")
  85. @login_required
  86. def rm():
  87. req = request.json
  88. try:
  89. for token in req["tokens"]:
  90. APITokenService.filter_delete(
  91. [APIToken.tenant_id == req["tenant_id"], APIToken.token == token])
  92. return get_json_result(data=True)
  93. except Exception as e:
  94. return server_error_response(e)
  95. @manager.route('/stats', methods=['GET']) # noqa: F821
  96. @login_required
  97. def stats():
  98. try:
  99. tenants = UserTenantService.query(user_id=current_user.id)
  100. if not tenants:
  101. return get_data_error_result(message="Tenant not found!")
  102. objs = API4ConversationService.stats(
  103. tenants[0].tenant_id,
  104. request.args.get(
  105. "from_date",
  106. (datetime.now() -
  107. timedelta(
  108. days=7)).strftime("%Y-%m-%d 00:00:00")),
  109. request.args.get(
  110. "to_date",
  111. datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
  112. "agent" if "canvas_id" in request.args else None)
  113. res = {
  114. "pv": [(o["dt"], o["pv"]) for o in objs],
  115. "uv": [(o["dt"], o["uv"]) for o in objs],
  116. "speed": [(o["dt"], float(o["tokens"]) / (float(o["duration"] + 0.1))) for o in objs],
  117. "tokens": [(o["dt"], float(o["tokens"]) / 1000.) for o in objs],
  118. "round": [(o["dt"], o["round"]) for o in objs],
  119. "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs]
  120. }
  121. return get_json_result(data=res)
  122. except Exception as e:
  123. return server_error_response(e)
  124. @manager.route('/new_conversation', methods=['GET']) # noqa: F821
  125. def set_conversation():
  126. token = request.headers.get('Authorization').split()[1]
  127. objs = APIToken.query(token=token)
  128. if not objs:
  129. return get_json_result(
  130. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  131. try:
  132. if objs[0].source == "agent":
  133. e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
  134. if not e:
  135. return server_error_response("canvas not found.")
  136. if not isinstance(cvs.dsl, str):
  137. cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
  138. canvas = Canvas(cvs.dsl, objs[0].tenant_id)
  139. conv = {
  140. "id": get_uuid(),
  141. "dialog_id": cvs.id,
  142. "user_id": request.args.get("user_id", ""),
  143. "message": [{"role": "assistant", "content": canvas.get_prologue()}],
  144. "source": "agent"
  145. }
  146. API4ConversationService.save(**conv)
  147. return get_json_result(data=conv)
  148. else:
  149. e, dia = DialogService.get_by_id(objs[0].dialog_id)
  150. if not e:
  151. return get_data_error_result(message="Dialog not found")
  152. conv = {
  153. "id": get_uuid(),
  154. "dialog_id": dia.id,
  155. "user_id": request.args.get("user_id", ""),
  156. "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
  157. }
  158. API4ConversationService.save(**conv)
  159. return get_json_result(data=conv)
  160. except Exception as e:
  161. return server_error_response(e)
  162. @manager.route('/completion', methods=['POST']) # noqa: F821
  163. @validate_request("conversation_id", "messages")
  164. def completion():
  165. token = request.headers.get('Authorization').split()[1]
  166. objs = APIToken.query(token=token)
  167. if not objs:
  168. return get_json_result(
  169. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  170. req = request.json
  171. e, conv = API4ConversationService.get_by_id(req["conversation_id"])
  172. if not e:
  173. return get_data_error_result(message="Conversation not found!")
  174. if "quote" not in req:
  175. req["quote"] = False
  176. msg = []
  177. for m in req["messages"]:
  178. if m["role"] == "system":
  179. continue
  180. if m["role"] == "assistant" and not msg:
  181. continue
  182. msg.append(m)
  183. if not msg[-1].get("id"):
  184. msg[-1]["id"] = get_uuid()
  185. message_id = msg[-1]["id"]
  186. def fillin_conv(ans):
  187. nonlocal conv, message_id
  188. if not conv.reference:
  189. conv.reference.append(ans["reference"])
  190. else:
  191. conv.reference[-1] = ans["reference"]
  192. conv.message[-1] = {"role": "assistant", "content": ans["answer"], "id": message_id}
  193. ans["id"] = message_id
  194. def rename_field(ans):
  195. reference = ans['reference']
  196. if not isinstance(reference, dict):
  197. return
  198. for chunk_i in reference.get('chunks', []):
  199. if 'docnm_kwd' in chunk_i:
  200. chunk_i['doc_name'] = chunk_i['docnm_kwd']
  201. chunk_i.pop('docnm_kwd')
  202. try:
  203. if conv.source == "agent":
  204. stream = req.get("stream", True)
  205. conv.message.append(msg[-1])
  206. e, cvs = UserCanvasService.get_by_id(conv.dialog_id)
  207. if not e:
  208. return server_error_response("canvas not found.")
  209. del req["conversation_id"]
  210. del req["messages"]
  211. if not isinstance(cvs.dsl, str):
  212. cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
  213. if not conv.reference:
  214. conv.reference = []
  215. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  216. conv.reference.append({"chunks": [], "doc_aggs": []})
  217. final_ans = {"reference": [], "content": ""}
  218. canvas = Canvas(cvs.dsl, objs[0].tenant_id)
  219. canvas.messages.append(msg[-1])
  220. canvas.add_user_input(msg[-1]["content"])
  221. answer = canvas.run(stream=stream)
  222. assert answer is not None, "Nothing. Is it over?"
  223. if stream:
  224. assert isinstance(answer, partial), "Nothing. Is it over?"
  225. def sse():
  226. nonlocal answer, cvs, conv
  227. try:
  228. for ans in answer():
  229. for k in ans.keys():
  230. final_ans[k] = ans[k]
  231. ans = {"answer": ans["content"], "reference": ans.get("reference", [])}
  232. fillin_conv(ans)
  233. rename_field(ans)
  234. yield "data:" + json.dumps({"code": 0, "message": "", "data": ans},
  235. ensure_ascii=False) + "\n\n"
  236. canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
  237. canvas.history.append(("assistant", final_ans["content"]))
  238. if final_ans.get("reference"):
  239. canvas.reference.append(final_ans["reference"])
  240. cvs.dsl = json.loads(str(canvas))
  241. API4ConversationService.append_message(conv.id, conv.to_dict())
  242. except Exception as e:
  243. yield "data:" + json.dumps({"code": 500, "message": str(e),
  244. "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
  245. ensure_ascii=False) + "\n\n"
  246. yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
  247. resp = Response(sse(), mimetype="text/event-stream")
  248. resp.headers.add_header("Cache-control", "no-cache")
  249. resp.headers.add_header("Connection", "keep-alive")
  250. resp.headers.add_header("X-Accel-Buffering", "no")
  251. resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
  252. return resp
  253. final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
  254. canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
  255. if final_ans.get("reference"):
  256. canvas.reference.append(final_ans["reference"])
  257. cvs.dsl = json.loads(str(canvas))
  258. result = {"answer": final_ans["content"], "reference": final_ans.get("reference", [])}
  259. fillin_conv(result)
  260. API4ConversationService.append_message(conv.id, conv.to_dict())
  261. rename_field(result)
  262. return get_json_result(data=result)
  263. # ******************For dialog******************
  264. conv.message.append(msg[-1])
  265. e, dia = DialogService.get_by_id(conv.dialog_id)
  266. if not e:
  267. return get_data_error_result(message="Dialog not found!")
  268. del req["conversation_id"]
  269. del req["messages"]
  270. if not conv.reference:
  271. conv.reference = []
  272. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  273. conv.reference.append({"chunks": [], "doc_aggs": []})
  274. def stream():
  275. nonlocal dia, msg, req, conv
  276. try:
  277. for ans in chat(dia, msg, True, **req):
  278. fillin_conv(ans)
  279. rename_field(ans)
  280. yield "data:" + json.dumps({"code": 0, "message": "", "data": ans},
  281. ensure_ascii=False) + "\n\n"
  282. API4ConversationService.append_message(conv.id, conv.to_dict())
  283. except Exception as e:
  284. yield "data:" + json.dumps({"code": 500, "message": str(e),
  285. "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
  286. ensure_ascii=False) + "\n\n"
  287. yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
  288. if req.get("stream", True):
  289. resp = Response(stream(), mimetype="text/event-stream")
  290. resp.headers.add_header("Cache-control", "no-cache")
  291. resp.headers.add_header("Connection", "keep-alive")
  292. resp.headers.add_header("X-Accel-Buffering", "no")
  293. resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
  294. return resp
  295. answer = None
  296. for ans in chat(dia, msg, **req):
  297. answer = ans
  298. fillin_conv(ans)
  299. API4ConversationService.append_message(conv.id, conv.to_dict())
  300. break
  301. rename_field(answer)
  302. return get_json_result(data=answer)
  303. except Exception as e:
  304. return server_error_response(e)
  305. @manager.route('/conversation/<conversation_id>', methods=['GET']) # noqa: F821
  306. # @login_required
  307. def get_conversation(conversation_id):
  308. token = request.headers.get('Authorization').split()[1]
  309. objs = APIToken.query(token=token)
  310. if not objs:
  311. return get_json_result(
  312. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  313. try:
  314. e, conv = API4ConversationService.get_by_id(conversation_id)
  315. if not e:
  316. return get_data_error_result(message="Conversation not found!")
  317. conv = conv.to_dict()
  318. if token != APIToken.query(dialog_id=conv['dialog_id'])[0].token:
  319. return get_json_result(data=False, message='Authentication error: API key is invalid for this conversation_id!"',
  320. code=settings.RetCode.AUTHENTICATION_ERROR)
  321. for referenct_i in conv['reference']:
  322. if referenct_i is None or len(referenct_i) == 0:
  323. continue
  324. for chunk_i in referenct_i['chunks']:
  325. if 'docnm_kwd' in chunk_i.keys():
  326. chunk_i['doc_name'] = chunk_i['docnm_kwd']
  327. chunk_i.pop('docnm_kwd')
  328. return get_json_result(data=conv)
  329. except Exception as e:
  330. return server_error_response(e)
  331. @manager.route('/document/upload', methods=['POST']) # noqa: F821
  332. @validate_request("kb_name")
  333. def upload():
  334. token = request.headers.get('Authorization').split()[1]
  335. objs = APIToken.query(token=token)
  336. if not objs:
  337. return get_json_result(
  338. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  339. kb_name = request.form.get("kb_name").strip()
  340. tenant_id = objs[0].tenant_id
  341. try:
  342. e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
  343. if not e:
  344. return get_data_error_result(
  345. message="Can't find this knowledgebase!")
  346. kb_id = kb.id
  347. except Exception as e:
  348. return server_error_response(e)
  349. if 'file' not in request.files:
  350. return get_json_result(
  351. data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
  352. file = request.files['file']
  353. if file.filename == '':
  354. return get_json_result(
  355. data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR)
  356. root_folder = FileService.get_root_folder(tenant_id)
  357. pf_id = root_folder["id"]
  358. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  359. kb_root_folder = FileService.get_kb_folder(tenant_id)
  360. kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
  361. try:
  362. if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)):
  363. return get_data_error_result(
  364. message="Exceed the maximum file number of a free user!")
  365. filename = duplicate_name(
  366. DocumentService.query,
  367. name=file.filename,
  368. kb_id=kb_id)
  369. filetype = filename_type(filename)
  370. if not filetype:
  371. return get_data_error_result(
  372. message="This type of file has not been supported yet!")
  373. location = filename
  374. while STORAGE_IMPL.obj_exist(kb_id, location):
  375. location += "_"
  376. blob = request.files['file'].read()
  377. STORAGE_IMPL.put(kb_id, location, blob)
  378. doc = {
  379. "id": get_uuid(),
  380. "kb_id": kb.id,
  381. "parser_id": kb.parser_id,
  382. "parser_config": kb.parser_config,
  383. "created_by": kb.tenant_id,
  384. "type": filetype,
  385. "name": filename,
  386. "location": location,
  387. "size": len(blob),
  388. "thumbnail": thumbnail(filename, blob),
  389. "suffix": Path(filename).suffix.lstrip("."),
  390. }
  391. form_data = request.form
  392. if "parser_id" in form_data.keys():
  393. if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
  394. doc["parser_id"] = request.form.get("parser_id").strip()
  395. if doc["type"] == FileType.VISUAL:
  396. doc["parser_id"] = ParserType.PICTURE.value
  397. if doc["type"] == FileType.AURAL:
  398. doc["parser_id"] = ParserType.AUDIO.value
  399. if re.search(r"\.(ppt|pptx|pages)$", filename):
  400. doc["parser_id"] = ParserType.PRESENTATION.value
  401. if re.search(r"\.(eml)$", filename):
  402. doc["parser_id"] = ParserType.EMAIL.value
  403. doc_result = DocumentService.insert(doc)
  404. FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
  405. except Exception as e:
  406. return server_error_response(e)
  407. if "run" in form_data.keys():
  408. if request.form.get("run").strip() == "1":
  409. try:
  410. info = {"run": 1, "progress": 0}
  411. info["progress_msg"] = ""
  412. info["chunk_num"] = 0
  413. info["token_num"] = 0
  414. DocumentService.update_by_id(doc["id"], info)
  415. # if str(req["run"]) == TaskStatus.CANCEL.value:
  416. tenant_id = DocumentService.get_tenant_id(doc["id"])
  417. if not tenant_id:
  418. return get_data_error_result(message="Tenant not found!")
  419. # e, doc = DocumentService.get_by_id(doc["id"])
  420. TaskService.filter_delete([Task.doc_id == doc["id"]])
  421. e, doc = DocumentService.get_by_id(doc["id"])
  422. doc = doc.to_dict()
  423. doc["tenant_id"] = tenant_id
  424. bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
  425. queue_tasks(doc, bucket, name, 0)
  426. except Exception as e:
  427. return server_error_response(e)
  428. return get_json_result(data=doc_result.to_json())
  429. @manager.route('/document/upload_and_parse', methods=['POST']) # noqa: F821
  430. @validate_request("conversation_id")
  431. def upload_parse():
  432. token = request.headers.get('Authorization').split()[1]
  433. objs = APIToken.query(token=token)
  434. if not objs:
  435. return get_json_result(
  436. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  437. if 'file' not in request.files:
  438. return get_json_result(
  439. data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
  440. file_objs = request.files.getlist('file')
  441. for file_obj in file_objs:
  442. if file_obj.filename == '':
  443. return get_json_result(
  444. data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR)
  445. doc_ids = doc_upload_and_parse(request.form.get("conversation_id"), file_objs, objs[0].tenant_id)
  446. return get_json_result(data=doc_ids)
  447. @manager.route('/list_chunks', methods=['POST']) # noqa: F821
  448. # @login_required
  449. def list_chunks():
  450. token = request.headers.get('Authorization').split()[1]
  451. objs = APIToken.query(token=token)
  452. if not objs:
  453. return get_json_result(
  454. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  455. req = request.json
  456. try:
  457. if "doc_name" in req.keys():
  458. tenant_id = DocumentService.get_tenant_id_by_name(req['doc_name'])
  459. doc_id = DocumentService.get_doc_id_by_doc_name(req['doc_name'])
  460. elif "doc_id" in req.keys():
  461. tenant_id = DocumentService.get_tenant_id(req['doc_id'])
  462. doc_id = req['doc_id']
  463. else:
  464. return get_json_result(
  465. data=False, message="Can't find doc_name or doc_id"
  466. )
  467. kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
  468. res = settings.retrievaler.chunk_list(doc_id, tenant_id, kb_ids)
  469. res = [
  470. {
  471. "content": res_item["content_with_weight"],
  472. "doc_name": res_item["docnm_kwd"],
  473. "image_id": res_item["img_id"]
  474. } for res_item in res
  475. ]
  476. except Exception as e:
  477. return server_error_response(e)
  478. return get_json_result(data=res)
  479. @manager.route('/get_chunk/<chunk_id>', methods=['GET']) # noqa: F821
  480. # @login_required
  481. def get_chunk(chunk_id):
  482. from rag.nlp import search
  483. token = request.headers.get('Authorization').split()[1]
  484. objs = APIToken.query(token=token)
  485. if not objs:
  486. return get_json_result(
  487. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  488. try:
  489. tenant_id = objs[0].tenant_id
  490. kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
  491. chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant_id), kb_ids)
  492. if chunk is None:
  493. return server_error_response(Exception("Chunk not found"))
  494. k = []
  495. for n in chunk.keys():
  496. if re.search(r"(_vec$|_sm_|_tks|_ltks)", n):
  497. k.append(n)
  498. for n in k:
  499. del chunk[n]
  500. return get_json_result(data=chunk)
  501. except Exception as e:
  502. return server_error_response(e)
  503. @manager.route('/list_kb_docs', methods=['POST']) # noqa: F821
  504. # @login_required
  505. def list_kb_docs():
  506. token = request.headers.get('Authorization').split()[1]
  507. objs = APIToken.query(token=token)
  508. if not objs:
  509. return get_json_result(
  510. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  511. req = request.json
  512. tenant_id = objs[0].tenant_id
  513. kb_name = req.get("kb_name", "").strip()
  514. try:
  515. e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
  516. if not e:
  517. return get_data_error_result(
  518. message="Can't find this knowledgebase!")
  519. kb_id = kb.id
  520. except Exception as e:
  521. return server_error_response(e)
  522. page_number = int(req.get("page", 1))
  523. items_per_page = int(req.get("page_size", 15))
  524. orderby = req.get("orderby", "create_time")
  525. desc = req.get("desc", True)
  526. keywords = req.get("keywords", "")
  527. status = req.get("status", [])
  528. if status:
  529. invalid_status = {s for s in status if s not in VALID_TASK_STATUS}
  530. if invalid_status:
  531. return get_data_error_result(
  532. message=f"Invalid filter status conditions: {', '.join(invalid_status)}"
  533. )
  534. types = req.get("types", [])
  535. if types:
  536. invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
  537. if invalid_types:
  538. return get_data_error_result(
  539. message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}"
  540. )
  541. try:
  542. docs, tol = DocumentService.get_by_kb_id(
  543. kb_id, page_number, items_per_page, orderby, desc, keywords, status, types)
  544. docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs]
  545. return get_json_result(data={"total": tol, "docs": docs})
  546. except Exception as e:
  547. return server_error_response(e)
  548. @manager.route('/document/infos', methods=['POST']) # noqa: F821
  549. @validate_request("doc_ids")
  550. def docinfos():
  551. token = request.headers.get('Authorization').split()[1]
  552. objs = APIToken.query(token=token)
  553. if not objs:
  554. return get_json_result(
  555. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  556. req = request.json
  557. doc_ids = req["doc_ids"]
  558. docs = DocumentService.get_by_ids(doc_ids)
  559. return get_json_result(data=list(docs.dicts()))
  560. @manager.route('/document', methods=['DELETE']) # noqa: F821
  561. # @login_required
  562. def document_rm():
  563. token = request.headers.get('Authorization').split()[1]
  564. objs = APIToken.query(token=token)
  565. if not objs:
  566. return get_json_result(
  567. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  568. tenant_id = objs[0].tenant_id
  569. req = request.json
  570. try:
  571. doc_ids = DocumentService.get_doc_ids_by_doc_names(req.get("doc_names", []))
  572. for doc_id in req.get("doc_ids", []):
  573. if doc_id not in doc_ids:
  574. doc_ids.append(doc_id)
  575. if not doc_ids:
  576. return get_json_result(
  577. data=False, message="Can't find doc_names or doc_ids"
  578. )
  579. except Exception as e:
  580. return server_error_response(e)
  581. root_folder = FileService.get_root_folder(tenant_id)
  582. pf_id = root_folder["id"]
  583. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  584. errors = ""
  585. docs = DocumentService.get_by_ids(doc_ids)
  586. doc_dic = {}
  587. for doc in docs:
  588. doc_dic[doc.id] = doc
  589. for doc_id in doc_ids:
  590. try:
  591. if doc_id not in doc_dic:
  592. return get_data_error_result(message="Document not found!")
  593. doc = doc_dic[doc_id]
  594. tenant_id = DocumentService.get_tenant_id(doc_id)
  595. if not tenant_id:
  596. return get_data_error_result(message="Tenant not found!")
  597. b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
  598. if not DocumentService.remove_document(doc, tenant_id):
  599. return get_data_error_result(
  600. message="Database error (Document removal)!")
  601. f2d = File2DocumentService.get_by_document_id(doc_id)
  602. FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
  603. File2DocumentService.delete_by_document_id(doc_id)
  604. STORAGE_IMPL.rm(b, n)
  605. except Exception as e:
  606. errors += str(e)
  607. if errors:
  608. return get_json_result(data=False, message=errors, code=settings.RetCode.SERVER_ERROR)
  609. return get_json_result(data=True)
  610. @manager.route('/completion_aibotk', methods=['POST']) # noqa: F821
  611. @validate_request("Authorization", "conversation_id", "word")
  612. def completion_faq():
  613. import base64
  614. req = request.json
  615. token = req["Authorization"]
  616. objs = APIToken.query(token=token)
  617. if not objs:
  618. return get_json_result(
  619. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  620. e, conv = API4ConversationService.get_by_id(req["conversation_id"])
  621. if not e:
  622. return get_data_error_result(message="Conversation not found!")
  623. if "quote" not in req:
  624. req["quote"] = True
  625. msg = []
  626. msg.append({"role": "user", "content": req["word"]})
  627. if not msg[-1].get("id"):
  628. msg[-1]["id"] = get_uuid()
  629. message_id = msg[-1]["id"]
  630. def fillin_conv(ans):
  631. nonlocal conv, message_id
  632. if not conv.reference:
  633. conv.reference.append(ans["reference"])
  634. else:
  635. conv.reference[-1] = ans["reference"]
  636. conv.message[-1] = {"role": "assistant", "content": ans["answer"], "id": message_id}
  637. ans["id"] = message_id
  638. try:
  639. if conv.source == "agent":
  640. conv.message.append(msg[-1])
  641. e, cvs = UserCanvasService.get_by_id(conv.dialog_id)
  642. if not e:
  643. return server_error_response("canvas not found.")
  644. if not isinstance(cvs.dsl, str):
  645. cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
  646. if not conv.reference:
  647. conv.reference = []
  648. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  649. conv.reference.append({"chunks": [], "doc_aggs": []})
  650. final_ans = {"reference": [], "doc_aggs": []}
  651. canvas = Canvas(cvs.dsl, objs[0].tenant_id)
  652. canvas.messages.append(msg[-1])
  653. canvas.add_user_input(msg[-1]["content"])
  654. answer = canvas.run(stream=False)
  655. assert answer is not None, "Nothing. Is it over?"
  656. data_type_picture = {
  657. "type": 3,
  658. "url": "base64 content"
  659. }
  660. data = [
  661. {
  662. "type": 1,
  663. "content": ""
  664. }
  665. ]
  666. final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
  667. canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
  668. if final_ans.get("reference"):
  669. canvas.reference.append(final_ans["reference"])
  670. cvs.dsl = json.loads(str(canvas))
  671. ans = {"answer": final_ans["content"], "reference": final_ans.get("reference", [])}
  672. data[0]["content"] += re.sub(r'##\d\$\$', '', ans["answer"])
  673. fillin_conv(ans)
  674. API4ConversationService.append_message(conv.id, conv.to_dict())
  675. chunk_idxs = [int(match[2]) for match in re.findall(r'##\d\$\$', ans["answer"])]
  676. for chunk_idx in chunk_idxs[:1]:
  677. if ans["reference"]["chunks"][chunk_idx]["img_id"]:
  678. try:
  679. bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
  680. response = STORAGE_IMPL.get(bkt, nm)
  681. data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
  682. data.append(data_type_picture)
  683. break
  684. except Exception as e:
  685. return server_error_response(e)
  686. response = {"code": 200, "msg": "success", "data": data}
  687. return response
  688. # ******************For dialog******************
  689. conv.message.append(msg[-1])
  690. e, dia = DialogService.get_by_id(conv.dialog_id)
  691. if not e:
  692. return get_data_error_result(message="Dialog not found!")
  693. del req["conversation_id"]
  694. if not conv.reference:
  695. conv.reference = []
  696. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  697. conv.reference.append({"chunks": [], "doc_aggs": []})
  698. data_type_picture = {
  699. "type": 3,
  700. "url": "base64 content"
  701. }
  702. data = [
  703. {
  704. "type": 1,
  705. "content": ""
  706. }
  707. ]
  708. ans = ""
  709. for a in chat(dia, msg, stream=False, **req):
  710. ans = a
  711. break
  712. data[0]["content"] += re.sub(r'##\d\$\$', '', ans["answer"])
  713. fillin_conv(ans)
  714. API4ConversationService.append_message(conv.id, conv.to_dict())
  715. chunk_idxs = [int(match[2]) for match in re.findall(r'##\d\$\$', ans["answer"])]
  716. for chunk_idx in chunk_idxs[:1]:
  717. if ans["reference"]["chunks"][chunk_idx]["img_id"]:
  718. try:
  719. bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
  720. response = STORAGE_IMPL.get(bkt, nm)
  721. data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
  722. data.append(data_type_picture)
  723. break
  724. except Exception as e:
  725. return server_error_response(e)
  726. response = {"code": 200, "msg": "success", "data": data}
  727. return response
  728. except Exception as e:
  729. return server_error_response(e)
  730. @manager.route('/retrieval', methods=['POST']) # noqa: F821
  731. @validate_request("kb_id", "question")
  732. def retrieval():
  733. token = request.headers.get('Authorization').split()[1]
  734. objs = APIToken.query(token=token)
  735. if not objs:
  736. return get_json_result(
  737. data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  738. req = request.json
  739. kb_ids = req.get("kb_id", [])
  740. doc_ids = req.get("doc_ids", [])
  741. question = req.get("question")
  742. page = int(req.get("page", 1))
  743. size = int(req.get("page_size", 30))
  744. similarity_threshold = float(req.get("similarity_threshold", 0.2))
  745. vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
  746. top = int(req.get("top_k", 1024))
  747. highlight = bool(req.get("highlight", False))
  748. try:
  749. kbs = KnowledgebaseService.get_by_ids(kb_ids)
  750. embd_nms = list(set([kb.embd_id for kb in kbs]))
  751. if len(embd_nms) != 1:
  752. return get_json_result(
  753. data=False, message='Knowledge bases use different embedding models or does not exist."',
  754. code=settings.RetCode.AUTHENTICATION_ERROR)
  755. embd_mdl = LLMBundle(kbs[0].tenant_id, LLMType.EMBEDDING, llm_name=kbs[0].embd_id)
  756. rerank_mdl = None
  757. if req.get("rerank_id"):
  758. rerank_mdl = LLMBundle(kbs[0].tenant_id, LLMType.RERANK, llm_name=req["rerank_id"])
  759. if req.get("keyword", False):
  760. chat_mdl = LLMBundle(kbs[0].tenant_id, LLMType.CHAT)
  761. question += keyword_extraction(chat_mdl, question)
  762. ranks = settings.retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
  763. similarity_threshold, vector_similarity_weight, top,
  764. doc_ids, rerank_mdl=rerank_mdl, highlight= highlight,
  765. rank_feature=label_question(question, kbs))
  766. for c in ranks["chunks"]:
  767. c.pop("vector", None)
  768. return get_json_result(data=ranks)
  769. except Exception as e:
  770. if str(e).find("not_found") > 0:
  771. return get_json_result(data=False, message='No chunk found! Check the chunk status please!',
  772. code=settings.RetCode.DATA_ERROR)
  773. return server_error_response(e)