Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

api_app.py 33KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import json
  17. import os
  18. import re
  19. from datetime import datetime, timedelta
  20. from flask import request, Response
  21. from api.db.services.llm_service import TenantLLMService
  22. from flask_login import login_required, current_user
  23. from api.db import FileType, LLMType, ParserType, FileSource
  24. from api.db.db_models import APIToken, Task, File
  25. from api.db.services import duplicate_name
  26. from api.db.services.api_service import APITokenService, API4ConversationService
  27. from api.db.services.dialog_service import DialogService, chat, keyword_extraction
  28. from api.db.services.document_service import DocumentService, doc_upload_and_parse
  29. from api.db.services.file2document_service import File2DocumentService
  30. from api.db.services.file_service import FileService
  31. from api.db.services.knowledgebase_service import KnowledgebaseService
  32. from api.db.services.task_service import queue_tasks, TaskService
  33. from api.db.services.user_service import UserTenantService
  34. from api import settings
  35. from api.utils import get_uuid, current_timestamp, datetime_format
  36. from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request, \
  37. generate_confirmation_token
  38. from api.utils.file_utils import filename_type, thumbnail
  39. from rag.utils.storage_factory import STORAGE_IMPL
  40. from api.db.services.canvas_service import UserCanvasService
  41. from agent.canvas import Canvas
  42. from functools import partial
  43. @manager.route('/new_token', methods=['POST']) # noqa: F821
  44. @login_required
  45. def new_token():
  46. req = request.json
  47. try:
  48. tenants = UserTenantService.query(user_id=current_user.id)
  49. if not tenants:
  50. return get_data_error_result(message="Tenant not found!")
  51. tenant_id = tenants[0].tenant_id
  52. obj = {"tenant_id": tenant_id, "token": generate_confirmation_token(tenant_id),
  53. "create_time": current_timestamp(),
  54. "create_date": datetime_format(datetime.now()),
  55. "update_time": None,
  56. "update_date": None
  57. }
  58. if req.get("canvas_id"):
  59. obj["dialog_id"] = req["canvas_id"]
  60. obj["source"] = "agent"
  61. else:
  62. obj["dialog_id"] = req["dialog_id"]
  63. if not APITokenService.save(**obj):
  64. return get_data_error_result(message="Fail to new a dialog!")
  65. return get_json_result(data=obj)
  66. except Exception as e:
  67. return server_error_response(e)
  68. @manager.route('/token_list', methods=['GET']) # noqa: F821
  69. @login_required
  70. def token_list():
  71. try:
  72. tenants = UserTenantService.query(user_id=current_user.id)
  73. if not tenants:
  74. return get_data_error_result(message="Tenant not found!")
  75. id = request.args["dialog_id"] if "dialog_id" in request.args else request.args["canvas_id"]
  76. objs = APITokenService.query(tenant_id=tenants[0].tenant_id, dialog_id=id)
  77. return get_json_result(data=[o.to_dict() for o in objs])
  78. except Exception as e:
  79. return server_error_response(e)
  80. @manager.route('/rm', methods=['POST']) # noqa: F821
  81. @validate_request("tokens", "tenant_id")
  82. @login_required
  83. def rm():
  84. req = request.json
  85. try:
  86. for token in req["tokens"]:
  87. APITokenService.filter_delete(
  88. [APIToken.tenant_id == req["tenant_id"], APIToken.token == token])
  89. return get_json_result(data=True)
  90. except Exception as e:
  91. return server_error_response(e)
  92. @manager.route('/stats', methods=['GET']) # noqa: F821
  93. @login_required
  94. def stats():
  95. try:
  96. tenants = UserTenantService.query(user_id=current_user.id)
  97. if not tenants:
  98. return get_data_error_result(message="Tenant not found!")
  99. objs = API4ConversationService.stats(
  100. tenants[0].tenant_id,
  101. request.args.get(
  102. "from_date",
  103. (datetime.now() -
  104. timedelta(
  105. days=7)).strftime("%Y-%m-%d 00:00:00")),
  106. request.args.get(
  107. "to_date",
  108. datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
  109. "agent" if "canvas_id" in request.args else None)
  110. res = {
  111. "pv": [(o["dt"], o["pv"]) for o in objs],
  112. "uv": [(o["dt"], o["uv"]) for o in objs],
  113. "speed": [(o["dt"], float(o["tokens"]) / (float(o["duration"] + 0.1))) for o in objs],
  114. "tokens": [(o["dt"], float(o["tokens"]) / 1000.) for o in objs],
  115. "round": [(o["dt"], o["round"]) for o in objs],
  116. "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs]
  117. }
  118. return get_json_result(data=res)
  119. except Exception as e:
  120. return server_error_response(e)
  121. @manager.route('/new_conversation', methods=['GET']) # noqa: F821
  122. def set_conversation():
  123. token = request.headers.get('Authorization').split()[1]
  124. objs = APIToken.query(token=token)
  125. if not objs:
  126. return get_json_result(
  127. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  128. try:
  129. if objs[0].source == "agent":
  130. e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
  131. if not e:
  132. return server_error_response("canvas not found.")
  133. if not isinstance(cvs.dsl, str):
  134. cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
  135. canvas = Canvas(cvs.dsl, objs[0].tenant_id)
  136. conv = {
  137. "id": get_uuid(),
  138. "dialog_id": cvs.id,
  139. "user_id": request.args.get("user_id", ""),
  140. "message": [{"role": "assistant", "content": canvas.get_prologue()}],
  141. "source": "agent"
  142. }
  143. API4ConversationService.save(**conv)
  144. return get_json_result(data=conv)
  145. else:
  146. e, dia = DialogService.get_by_id(objs[0].dialog_id)
  147. if not e:
  148. return get_data_error_result(message="Dialog not found")
  149. conv = {
  150. "id": get_uuid(),
  151. "dialog_id": dia.id,
  152. "user_id": request.args.get("user_id", ""),
  153. "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
  154. }
  155. API4ConversationService.save(**conv)
  156. return get_json_result(data=conv)
  157. except Exception as e:
  158. return server_error_response(e)
  159. @manager.route('/completion', methods=['POST']) # noqa: F821
  160. @validate_request("conversation_id", "messages")
  161. def completion():
  162. token = request.headers.get('Authorization').split()[1]
  163. objs = APIToken.query(token=token)
  164. if not objs:
  165. return get_json_result(
  166. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  167. req = request.json
  168. e, conv = API4ConversationService.get_by_id(req["conversation_id"])
  169. if not e:
  170. return get_data_error_result(message="Conversation not found!")
  171. if "quote" not in req:
  172. req["quote"] = False
  173. msg = []
  174. for m in req["messages"]:
  175. if m["role"] == "system":
  176. continue
  177. if m["role"] == "assistant" and not msg:
  178. continue
  179. msg.append(m)
  180. if not msg[-1].get("id"):
  181. msg[-1]["id"] = get_uuid()
  182. message_id = msg[-1]["id"]
  183. def fillin_conv(ans):
  184. nonlocal conv, message_id
  185. if not conv.reference:
  186. conv.reference.append(ans["reference"])
  187. else:
  188. conv.reference[-1] = ans["reference"]
  189. conv.message[-1] = {"role": "assistant", "content": ans["answer"], "id": message_id}
  190. ans["id"] = message_id
  191. def rename_field(ans):
  192. reference = ans['reference']
  193. if not isinstance(reference, dict):
  194. return
  195. for chunk_i in reference.get('chunks', []):
  196. if 'docnm_kwd' in chunk_i:
  197. chunk_i['doc_name'] = chunk_i['docnm_kwd']
  198. chunk_i.pop('docnm_kwd')
  199. try:
  200. if conv.source == "agent":
  201. stream = req.get("stream", True)
  202. conv.message.append(msg[-1])
  203. e, cvs = UserCanvasService.get_by_id(conv.dialog_id)
  204. if not e:
  205. return server_error_response("canvas not found.")
  206. del req["conversation_id"]
  207. del req["messages"]
  208. if not isinstance(cvs.dsl, str):
  209. cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
  210. if not conv.reference:
  211. conv.reference = []
  212. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  213. conv.reference.append({"chunks": [], "doc_aggs": []})
  214. final_ans = {"reference": [], "content": ""}
  215. canvas = Canvas(cvs.dsl, objs[0].tenant_id)
  216. canvas.messages.append(msg[-1])
  217. canvas.add_user_input(msg[-1]["content"])
  218. answer = canvas.run(stream=stream)
  219. assert answer is not None, "Nothing. Is it over?"
  220. if stream:
  221. assert isinstance(answer, partial), "Nothing. Is it over?"
  222. def sse():
  223. nonlocal answer, cvs, conv
  224. try:
  225. for ans in answer():
  226. for k in ans.keys():
  227. final_ans[k] = ans[k]
  228. ans = {"answer": ans["content"], "reference": ans.get("reference", [])}
  229. fillin_conv(ans)
  230. rename_field(ans)
  231. yield "data:" + json.dumps({"code": 0, "message": "", "data": ans},
  232. ensure_ascii=False) + "\n\n"
  233. canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
  234. canvas.history.append(("assistant", final_ans["content"]))
  235. if final_ans.get("reference"):
  236. canvas.reference.append(final_ans["reference"])
  237. cvs.dsl = json.loads(str(canvas))
  238. API4ConversationService.append_message(conv.id, conv.to_dict())
  239. except Exception as e:
  240. yield "data:" + json.dumps({"code": 500, "message": str(e),
  241. "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
  242. ensure_ascii=False) + "\n\n"
  243. yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
  244. resp = Response(sse(), mimetype="text/event-stream")
  245. resp.headers.add_header("Cache-control", "no-cache")
  246. resp.headers.add_header("Connection", "keep-alive")
  247. resp.headers.add_header("X-Accel-Buffering", "no")
  248. resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
  249. return resp
  250. final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
  251. canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
  252. if final_ans.get("reference"):
  253. canvas.reference.append(final_ans["reference"])
  254. cvs.dsl = json.loads(str(canvas))
  255. result = {"answer": final_ans["content"], "reference": final_ans.get("reference", [])}
  256. fillin_conv(result)
  257. API4ConversationService.append_message(conv.id, conv.to_dict())
  258. rename_field(result)
  259. return get_json_result(data=result)
  260. # ******************For dialog******************
  261. conv.message.append(msg[-1])
  262. e, dia = DialogService.get_by_id(conv.dialog_id)
  263. if not e:
  264. return get_data_error_result(message="Dialog not found!")
  265. del req["conversation_id"]
  266. del req["messages"]
  267. if not conv.reference:
  268. conv.reference = []
  269. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  270. conv.reference.append({"chunks": [], "doc_aggs": []})
  271. def stream():
  272. nonlocal dia, msg, req, conv
  273. try:
  274. for ans in chat(dia, msg, True, **req):
  275. fillin_conv(ans)
  276. rename_field(ans)
  277. yield "data:" + json.dumps({"code": 0, "message": "", "data": ans},
  278. ensure_ascii=False) + "\n\n"
  279. API4ConversationService.append_message(conv.id, conv.to_dict())
  280. except Exception as e:
  281. yield "data:" + json.dumps({"code": 500, "message": str(e),
  282. "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
  283. ensure_ascii=False) + "\n\n"
  284. yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
  285. if req.get("stream", True):
  286. resp = Response(stream(), mimetype="text/event-stream")
  287. resp.headers.add_header("Cache-control", "no-cache")
  288. resp.headers.add_header("Connection", "keep-alive")
  289. resp.headers.add_header("X-Accel-Buffering", "no")
  290. resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
  291. return resp
  292. answer = None
  293. for ans in chat(dia, msg, **req):
  294. answer = ans
  295. fillin_conv(ans)
  296. API4ConversationService.append_message(conv.id, conv.to_dict())
  297. break
  298. rename_field(answer)
  299. return get_json_result(data=answer)
  300. except Exception as e:
  301. return server_error_response(e)
  302. @manager.route('/conversation/<conversation_id>', methods=['GET']) # noqa: F821
  303. # @login_required
  304. def get(conversation_id):
  305. token = request.headers.get('Authorization').split()[1]
  306. objs = APIToken.query(token=token)
  307. if not objs:
  308. return get_json_result(
  309. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  310. try:
  311. e, conv = API4ConversationService.get_by_id(conversation_id)
  312. if not e:
  313. return get_data_error_result(message="Conversation not found!")
  314. conv = conv.to_dict()
  315. if token != APIToken.query(dialog_id=conv['dialog_id'])[0].token:
  316. return get_json_result(data=False, message='Token is not valid for this conversation_id!"',
  317. code=settings.RetCode.AUTHENTICATION_ERROR)
  318. for referenct_i in conv['reference']:
  319. if referenct_i is None or len(referenct_i) == 0:
  320. continue
  321. for chunk_i in referenct_i['chunks']:
  322. if 'docnm_kwd' in chunk_i.keys():
  323. chunk_i['doc_name'] = chunk_i['docnm_kwd']
  324. chunk_i.pop('docnm_kwd')
  325. return get_json_result(data=conv)
  326. except Exception as e:
  327. return server_error_response(e)
  328. @manager.route('/document/upload', methods=['POST']) # noqa: F821
  329. @validate_request("kb_name")
  330. def upload():
  331. token = request.headers.get('Authorization').split()[1]
  332. objs = APIToken.query(token=token)
  333. if not objs:
  334. return get_json_result(
  335. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  336. kb_name = request.form.get("kb_name").strip()
  337. tenant_id = objs[0].tenant_id
  338. try:
  339. e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
  340. if not e:
  341. return get_data_error_result(
  342. message="Can't find this knowledgebase!")
  343. kb_id = kb.id
  344. except Exception as e:
  345. return server_error_response(e)
  346. if 'file' not in request.files:
  347. return get_json_result(
  348. data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
  349. file = request.files['file']
  350. if file.filename == '':
  351. return get_json_result(
  352. data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR)
  353. root_folder = FileService.get_root_folder(tenant_id)
  354. pf_id = root_folder["id"]
  355. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  356. kb_root_folder = FileService.get_kb_folder(tenant_id)
  357. kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
  358. try:
  359. if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)):
  360. return get_data_error_result(
  361. message="Exceed the maximum file number of a free user!")
  362. filename = duplicate_name(
  363. DocumentService.query,
  364. name=file.filename,
  365. kb_id=kb_id)
  366. filetype = filename_type(filename)
  367. if not filetype:
  368. return get_data_error_result(
  369. message="This type of file has not been supported yet!")
  370. location = filename
  371. while STORAGE_IMPL.obj_exist(kb_id, location):
  372. location += "_"
  373. blob = request.files['file'].read()
  374. STORAGE_IMPL.put(kb_id, location, blob)
  375. doc = {
  376. "id": get_uuid(),
  377. "kb_id": kb.id,
  378. "parser_id": kb.parser_id,
  379. "parser_config": kb.parser_config,
  380. "created_by": kb.tenant_id,
  381. "type": filetype,
  382. "name": filename,
  383. "location": location,
  384. "size": len(blob),
  385. "thumbnail": thumbnail(filename, blob)
  386. }
  387. form_data = request.form
  388. if "parser_id" in form_data.keys():
  389. if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
  390. doc["parser_id"] = request.form.get("parser_id").strip()
  391. if doc["type"] == FileType.VISUAL:
  392. doc["parser_id"] = ParserType.PICTURE.value
  393. if doc["type"] == FileType.AURAL:
  394. doc["parser_id"] = ParserType.AUDIO.value
  395. if re.search(r"\.(ppt|pptx|pages)$", filename):
  396. doc["parser_id"] = ParserType.PRESENTATION.value
  397. if re.search(r"\.(eml)$", filename):
  398. doc["parser_id"] = ParserType.EMAIL.value
  399. doc_result = DocumentService.insert(doc)
  400. FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
  401. except Exception as e:
  402. return server_error_response(e)
  403. if "run" in form_data.keys():
  404. if request.form.get("run").strip() == "1":
  405. try:
  406. info = {"run": 1, "progress": 0}
  407. info["progress_msg"] = ""
  408. info["chunk_num"] = 0
  409. info["token_num"] = 0
  410. DocumentService.update_by_id(doc["id"], info)
  411. # if str(req["run"]) == TaskStatus.CANCEL.value:
  412. tenant_id = DocumentService.get_tenant_id(doc["id"])
  413. if not tenant_id:
  414. return get_data_error_result(message="Tenant not found!")
  415. # e, doc = DocumentService.get_by_id(doc["id"])
  416. TaskService.filter_delete([Task.doc_id == doc["id"]])
  417. e, doc = DocumentService.get_by_id(doc["id"])
  418. doc = doc.to_dict()
  419. doc["tenant_id"] = tenant_id
  420. bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
  421. queue_tasks(doc, bucket, name)
  422. except Exception as e:
  423. return server_error_response(e)
  424. return get_json_result(data=doc_result.to_json())
  425. @manager.route('/document/upload_and_parse', methods=['POST']) # noqa: F821
  426. @validate_request("conversation_id")
  427. def upload_parse():
  428. token = request.headers.get('Authorization').split()[1]
  429. objs = APIToken.query(token=token)
  430. if not objs:
  431. return get_json_result(
  432. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  433. if 'file' not in request.files:
  434. return get_json_result(
  435. data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR)
  436. file_objs = request.files.getlist('file')
  437. for file_obj in file_objs:
  438. if file_obj.filename == '':
  439. return get_json_result(
  440. data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR)
  441. doc_ids = doc_upload_and_parse(request.form.get("conversation_id"), file_objs, objs[0].tenant_id)
  442. return get_json_result(data=doc_ids)
  443. @manager.route('/list_chunks', methods=['POST']) # noqa: F821
  444. # @login_required
  445. def list_chunks():
  446. token = request.headers.get('Authorization').split()[1]
  447. objs = APIToken.query(token=token)
  448. if not objs:
  449. return get_json_result(
  450. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  451. req = request.json
  452. try:
  453. if "doc_name" in req.keys():
  454. tenant_id = DocumentService.get_tenant_id_by_name(req['doc_name'])
  455. doc_id = DocumentService.get_doc_id_by_doc_name(req['doc_name'])
  456. elif "doc_id" in req.keys():
  457. tenant_id = DocumentService.get_tenant_id(req['doc_id'])
  458. doc_id = req['doc_id']
  459. else:
  460. return get_json_result(
  461. data=False, message="Can't find doc_name or doc_id"
  462. )
  463. kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
  464. res = settings.retrievaler.chunk_list(doc_id, tenant_id, kb_ids)
  465. res = [
  466. {
  467. "content": res_item["content_with_weight"],
  468. "doc_name": res_item["docnm_kwd"],
  469. "image_id": res_item["img_id"]
  470. } for res_item in res
  471. ]
  472. except Exception as e:
  473. return server_error_response(e)
  474. return get_json_result(data=res)
  475. @manager.route('/list_kb_docs', methods=['POST']) # noqa: F821
  476. # @login_required
  477. def list_kb_docs():
  478. token = request.headers.get('Authorization').split()[1]
  479. objs = APIToken.query(token=token)
  480. if not objs:
  481. return get_json_result(
  482. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  483. req = request.json
  484. tenant_id = objs[0].tenant_id
  485. kb_name = req.get("kb_name", "").strip()
  486. try:
  487. e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
  488. if not e:
  489. return get_data_error_result(
  490. message="Can't find this knowledgebase!")
  491. kb_id = kb.id
  492. except Exception as e:
  493. return server_error_response(e)
  494. page_number = int(req.get("page", 1))
  495. items_per_page = int(req.get("page_size", 15))
  496. orderby = req.get("orderby", "create_time")
  497. desc = req.get("desc", True)
  498. keywords = req.get("keywords", "")
  499. try:
  500. docs, tol = DocumentService.get_by_kb_id(
  501. kb_id, page_number, items_per_page, orderby, desc, keywords)
  502. docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs]
  503. return get_json_result(data={"total": tol, "docs": docs})
  504. except Exception as e:
  505. return server_error_response(e)
  506. @manager.route('/document/infos', methods=['POST']) # noqa: F821
  507. @validate_request("doc_ids")
  508. def docinfos():
  509. token = request.headers.get('Authorization').split()[1]
  510. objs = APIToken.query(token=token)
  511. if not objs:
  512. return get_json_result(
  513. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  514. req = request.json
  515. doc_ids = req["doc_ids"]
  516. docs = DocumentService.get_by_ids(doc_ids)
  517. return get_json_result(data=list(docs.dicts()))
  518. @manager.route('/document', methods=['DELETE']) # noqa: F821
  519. # @login_required
  520. def document_rm():
  521. token = request.headers.get('Authorization').split()[1]
  522. objs = APIToken.query(token=token)
  523. if not objs:
  524. return get_json_result(
  525. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  526. tenant_id = objs[0].tenant_id
  527. req = request.json
  528. try:
  529. doc_ids = [DocumentService.get_doc_id_by_doc_name(doc_name) for doc_name in req.get("doc_names", [])]
  530. for doc_id in req.get("doc_ids", []):
  531. if doc_id not in doc_ids:
  532. doc_ids.append(doc_id)
  533. if not doc_ids:
  534. return get_json_result(
  535. data=False, message="Can't find doc_names or doc_ids"
  536. )
  537. except Exception as e:
  538. return server_error_response(e)
  539. root_folder = FileService.get_root_folder(tenant_id)
  540. pf_id = root_folder["id"]
  541. FileService.init_knowledgebase_docs(pf_id, tenant_id)
  542. errors = ""
  543. for doc_id in doc_ids:
  544. try:
  545. e, doc = DocumentService.get_by_id(doc_id)
  546. if not e:
  547. return get_data_error_result(message="Document not found!")
  548. tenant_id = DocumentService.get_tenant_id(doc_id)
  549. if not tenant_id:
  550. return get_data_error_result(message="Tenant not found!")
  551. b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
  552. if not DocumentService.remove_document(doc, tenant_id):
  553. return get_data_error_result(
  554. message="Database error (Document removal)!")
  555. f2d = File2DocumentService.get_by_document_id(doc_id)
  556. FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
  557. File2DocumentService.delete_by_document_id(doc_id)
  558. STORAGE_IMPL.rm(b, n)
  559. except Exception as e:
  560. errors += str(e)
  561. if errors:
  562. return get_json_result(data=False, message=errors, code=settings.RetCode.SERVER_ERROR)
  563. return get_json_result(data=True)
  564. @manager.route('/completion_aibotk', methods=['POST']) # noqa: F821
  565. @validate_request("Authorization", "conversation_id", "word")
  566. def completion_faq():
  567. import base64
  568. req = request.json
  569. token = req["Authorization"]
  570. objs = APIToken.query(token=token)
  571. if not objs:
  572. return get_json_result(
  573. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  574. e, conv = API4ConversationService.get_by_id(req["conversation_id"])
  575. if not e:
  576. return get_data_error_result(message="Conversation not found!")
  577. if "quote" not in req:
  578. req["quote"] = True
  579. msg = []
  580. msg.append({"role": "user", "content": req["word"]})
  581. if not msg[-1].get("id"):
  582. msg[-1]["id"] = get_uuid()
  583. message_id = msg[-1]["id"]
  584. def fillin_conv(ans):
  585. nonlocal conv, message_id
  586. if not conv.reference:
  587. conv.reference.append(ans["reference"])
  588. else:
  589. conv.reference[-1] = ans["reference"]
  590. conv.message[-1] = {"role": "assistant", "content": ans["answer"], "id": message_id}
  591. ans["id"] = message_id
  592. try:
  593. if conv.source == "agent":
  594. conv.message.append(msg[-1])
  595. e, cvs = UserCanvasService.get_by_id(conv.dialog_id)
  596. if not e:
  597. return server_error_response("canvas not found.")
  598. if not isinstance(cvs.dsl, str):
  599. cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
  600. if not conv.reference:
  601. conv.reference = []
  602. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  603. conv.reference.append({"chunks": [], "doc_aggs": []})
  604. final_ans = {"reference": [], "doc_aggs": []}
  605. canvas = Canvas(cvs.dsl, objs[0].tenant_id)
  606. canvas.messages.append(msg[-1])
  607. canvas.add_user_input(msg[-1]["content"])
  608. answer = canvas.run(stream=False)
  609. assert answer is not None, "Nothing. Is it over?"
  610. data_type_picture = {
  611. "type": 3,
  612. "url": "base64 content"
  613. }
  614. data = [
  615. {
  616. "type": 1,
  617. "content": ""
  618. }
  619. ]
  620. final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
  621. canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
  622. if final_ans.get("reference"):
  623. canvas.reference.append(final_ans["reference"])
  624. cvs.dsl = json.loads(str(canvas))
  625. ans = {"answer": final_ans["content"], "reference": final_ans.get("reference", [])}
  626. data[0]["content"] += re.sub(r'##\d\$\$', '', ans["answer"])
  627. fillin_conv(ans)
  628. API4ConversationService.append_message(conv.id, conv.to_dict())
  629. chunk_idxs = [int(match[2]) for match in re.findall(r'##\d\$\$', ans["answer"])]
  630. for chunk_idx in chunk_idxs[:1]:
  631. if ans["reference"]["chunks"][chunk_idx]["img_id"]:
  632. try:
  633. bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
  634. response = STORAGE_IMPL.get(bkt, nm)
  635. data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
  636. data.append(data_type_picture)
  637. break
  638. except Exception as e:
  639. return server_error_response(e)
  640. response = {"code": 200, "msg": "success", "data": data}
  641. return response
  642. # ******************For dialog******************
  643. conv.message.append(msg[-1])
  644. e, dia = DialogService.get_by_id(conv.dialog_id)
  645. if not e:
  646. return get_data_error_result(message="Dialog not found!")
  647. del req["conversation_id"]
  648. if not conv.reference:
  649. conv.reference = []
  650. conv.message.append({"role": "assistant", "content": "", "id": message_id})
  651. conv.reference.append({"chunks": [], "doc_aggs": []})
  652. data_type_picture = {
  653. "type": 3,
  654. "url": "base64 content"
  655. }
  656. data = [
  657. {
  658. "type": 1,
  659. "content": ""
  660. }
  661. ]
  662. ans = ""
  663. for a in chat(dia, msg, stream=False, **req):
  664. ans = a
  665. break
  666. data[0]["content"] += re.sub(r'##\d\$\$', '', ans["answer"])
  667. fillin_conv(ans)
  668. API4ConversationService.append_message(conv.id, conv.to_dict())
  669. chunk_idxs = [int(match[2]) for match in re.findall(r'##\d\$\$', ans["answer"])]
  670. for chunk_idx in chunk_idxs[:1]:
  671. if ans["reference"]["chunks"][chunk_idx]["img_id"]:
  672. try:
  673. bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
  674. response = STORAGE_IMPL.get(bkt, nm)
  675. data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
  676. data.append(data_type_picture)
  677. break
  678. except Exception as e:
  679. return server_error_response(e)
  680. response = {"code": 200, "msg": "success", "data": data}
  681. return response
  682. except Exception as e:
  683. return server_error_response(e)
  684. @manager.route('/retrieval', methods=['POST']) # noqa: F821
  685. @validate_request("kb_id", "question")
  686. def retrieval():
  687. token = request.headers.get('Authorization').split()[1]
  688. objs = APIToken.query(token=token)
  689. if not objs:
  690. return get_json_result(
  691. data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
  692. req = request.json
  693. kb_ids = req.get("kb_id", [])
  694. doc_ids = req.get("doc_ids", [])
  695. question = req.get("question")
  696. page = int(req.get("page", 1))
  697. size = int(req.get("size", 30))
  698. similarity_threshold = float(req.get("similarity_threshold", 0.2))
  699. vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
  700. top = int(req.get("top_k", 1024))
  701. try:
  702. kbs = KnowledgebaseService.get_by_ids(kb_ids)
  703. embd_nms = list(set([kb.embd_id for kb in kbs]))
  704. if len(embd_nms) != 1:
  705. return get_json_result(
  706. data=False, message='Knowledge bases use different embedding models or does not exist."',
  707. code=settings.RetCode.AUTHENTICATION_ERROR)
  708. embd_mdl = TenantLLMService.model_instance(
  709. kbs[0].tenant_id, LLMType.EMBEDDING.value, llm_name=kbs[0].embd_id)
  710. rerank_mdl = None
  711. if req.get("rerank_id"):
  712. rerank_mdl = TenantLLMService.model_instance(
  713. kbs[0].tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"])
  714. if req.get("keyword", False):
  715. chat_mdl = TenantLLMService.model_instance(kbs[0].tenant_id, LLMType.CHAT)
  716. question += keyword_extraction(chat_mdl, question)
  717. ranks = settings.retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
  718. similarity_threshold, vector_similarity_weight, top,
  719. doc_ids, rerank_mdl=rerank_mdl)
  720. for c in ranks["chunks"]:
  721. c.pop("vector", None)
  722. return get_json_result(data=ranks)
  723. except Exception as e:
  724. if str(e).find("not_found") > 0:
  725. return get_json_result(data=False, message='No chunk found! Check the chunk status please!',
  726. code=settings.RetCode.DATA_ERROR)
  727. return server_error_response(e)