您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

file_app.py 13KB


  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License
  15. #
  16. import os
  17. import pathlib
  18. import re
  19. import flask
  20. from elasticsearch_dsl import Q
  21. from flask import request
  22. from flask_login import login_required, current_user
  23. from api.db.services.document_service import DocumentService
  24. from api.db.services.file2document_service import File2DocumentService
  25. from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
  26. from api.utils import get_uuid
  27. from api.db import FileType, FileSource
  28. from api.db.services import duplicate_name
  29. from api.db.services.file_service import FileService
  30. from api.settings import RetCode
  31. from api.utils.api_utils import get_json_result
  32. from api.utils.file_utils import filename_type
  33. from rag.nlp import search
  34. from rag.utils.es_conn import ELASTICSEARCH
  35. from rag.utils.minio_conn import MINIO
  36. @manager.route('/upload', methods=['POST'])
  37. @login_required
  38. # @validate_request("parent_id")
  39. def upload():
  40. pf_id = request.form.get("parent_id")
  41. if not pf_id:
  42. root_folder = FileService.get_root_folder(current_user.id)
  43. pf_id = root_folder["id"]
  44. if 'file' not in request.files:
  45. return get_json_result(
  46. data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
  47. file_objs = request.files.getlist('file')
  48. for file_obj in file_objs:
  49. if file_obj.filename == '':
  50. return get_json_result(
  51. data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
  52. file_res = []
  53. try:
  54. for file_obj in file_objs:
  55. e, file = FileService.get_by_id(pf_id)
  56. if not e:
  57. return get_data_error_result(
  58. retmsg="Can't find this folder!")
  59. MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
  60. if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(current_user.id) >= MAX_FILE_NUM_PER_USER:
  61. return get_data_error_result(
  62. retmsg="Exceed the maximum file number of a free user!")
  63. # split file name path
  64. if not file_obj.filename:
  65. e, file = FileService.get_by_id(pf_id)
  66. file_obj_names = [file.name, file_obj.filename]
  67. else:
  68. full_path = '/' + file_obj.filename
  69. file_obj_names = full_path.split('/')
  70. file_len = len(file_obj_names)
  71. # get folder
  72. file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
  73. len_id_list = len(file_id_list)
  74. # create folder
  75. if file_len != len_id_list:
  76. e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
  77. if not e:
  78. return get_data_error_result(retmsg="Folder not found!")
  79. last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
  80. len_id_list)
  81. else:
  82. e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
  83. if not e:
  84. return get_data_error_result(retmsg="Folder not found!")
  85. last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
  86. len_id_list)
  87. # file type
  88. filetype = filename_type(file_obj_names[file_len - 1])
  89. location = file_obj_names[file_len - 1]
  90. while MINIO.obj_exist(last_folder.id, location):
  91. location += "_"
  92. blob = file_obj.read()
  93. filename = duplicate_name(
  94. FileService.query,
  95. name=file_obj_names[file_len - 1],
  96. parent_id=last_folder.id)
  97. file = {
  98. "id": get_uuid(),
  99. "parent_id": last_folder.id,
  100. "tenant_id": current_user.id,
  101. "created_by": current_user.id,
  102. "type": filetype,
  103. "name": filename,
  104. "location": location,
  105. "size": len(blob),
  106. }
  107. file = FileService.insert(file)
  108. MINIO.put(last_folder.id, location, blob)
  109. file_res.append(file.to_json())
  110. return get_json_result(data=file_res)
  111. except Exception as e:
  112. return server_error_response(e)
  113. @manager.route('/create', methods=['POST'])
  114. @login_required
  115. @validate_request("name")
  116. def create():
  117. req = request.json
  118. pf_id = request.json.get("parent_id")
  119. input_file_type = request.json.get("type")
  120. if not pf_id:
  121. root_folder = FileService.get_root_folder(current_user.id)
  122. pf_id = root_folder["id"]
  123. try:
  124. if not FileService.is_parent_folder_exist(pf_id):
  125. return get_json_result(
  126. data=False, retmsg="Parent Folder Doesn't Exist!", retcode=RetCode.OPERATING_ERROR)
  127. if FileService.query(name=req["name"], parent_id=pf_id):
  128. return get_data_error_result(
  129. retmsg="Duplicated folder name in the same folder.")
  130. if input_file_type == FileType.FOLDER.value:
  131. file_type = FileType.FOLDER.value
  132. else:
  133. file_type = FileType.VIRTUAL.value
  134. file = FileService.insert({
  135. "id": get_uuid(),
  136. "parent_id": pf_id,
  137. "tenant_id": current_user.id,
  138. "created_by": current_user.id,
  139. "name": req["name"],
  140. "location": "",
  141. "size": 0,
  142. "type": file_type
  143. })
  144. return get_json_result(data=file.to_json())
  145. except Exception as e:
  146. return server_error_response(e)
  147. @manager.route('/list', methods=['GET'])
  148. @login_required
  149. def list_files():
  150. pf_id = request.args.get("parent_id")
  151. keywords = request.args.get("keywords", "")
  152. page_number = int(request.args.get("page", 1))
  153. items_per_page = int(request.args.get("page_size", 15))
  154. orderby = request.args.get("orderby", "create_time")
  155. desc = request.args.get("desc", True)
  156. if not pf_id:
  157. root_folder = FileService.get_root_folder(current_user.id)
  158. pf_id = root_folder["id"]
  159. FileService.init_knowledgebase_docs(pf_id, current_user.id)
  160. try:
  161. e, file = FileService.get_by_id(pf_id)
  162. if not e:
  163. return get_data_error_result(retmsg="Folder not found!")
  164. files, total = FileService.get_by_pf_id(
  165. current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
  166. parent_folder = FileService.get_parent_folder(pf_id)
  167. if not FileService.get_parent_folder(pf_id):
  168. return get_json_result(retmsg="File not found!")
  169. return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
  170. except Exception as e:
  171. return server_error_response(e)
  172. @manager.route('/root_folder', methods=['GET'])
  173. @login_required
  174. def get_root_folder():
  175. try:
  176. root_folder = FileService.get_root_folder(current_user.id)
  177. return get_json_result(data={"root_folder": root_folder})
  178. except Exception as e:
  179. return server_error_response(e)
  180. @manager.route('/parent_folder', methods=['GET'])
  181. @login_required
  182. def get_parent_folder():
  183. file_id = request.args.get("file_id")
  184. try:
  185. e, file = FileService.get_by_id(file_id)
  186. if not e:
  187. return get_data_error_result(retmsg="Folder not found!")
  188. parent_folder = FileService.get_parent_folder(file_id)
  189. return get_json_result(data={"parent_folder": parent_folder.to_json()})
  190. except Exception as e:
  191. return server_error_response(e)
  192. @manager.route('/all_parent_folder', methods=['GET'])
  193. @login_required
  194. def get_all_parent_folders():
  195. file_id = request.args.get("file_id")
  196. try:
  197. e, file = FileService.get_by_id(file_id)
  198. if not e:
  199. return get_data_error_result(retmsg="Folder not found!")
  200. parent_folders = FileService.get_all_parent_folders(file_id)
  201. parent_folders_res = []
  202. for parent_folder in parent_folders:
  203. parent_folders_res.append(parent_folder.to_json())
  204. return get_json_result(data={"parent_folders": parent_folders_res})
  205. except Exception as e:
  206. return server_error_response(e)
  207. @manager.route('/rm', methods=['POST'])
  208. @login_required
  209. @validate_request("file_ids")
  210. def rm():
  211. req = request.json
  212. file_ids = req["file_ids"]
  213. try:
  214. for file_id in file_ids:
  215. e, file = FileService.get_by_id(file_id)
  216. if not e:
  217. return get_data_error_result(retmsg="File or Folder not found!")
  218. if not file.tenant_id:
  219. return get_data_error_result(retmsg="Tenant not found!")
  220. if file.source_type == FileSource.KNOWLEDGEBASE:
  221. continue
  222. if file.type == FileType.FOLDER.value:
  223. file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
  224. for inner_file_id in file_id_list:
  225. e, file = FileService.get_by_id(inner_file_id)
  226. if not e:
  227. return get_data_error_result(retmsg="File not found!")
  228. MINIO.rm(file.parent_id, file.location)
  229. FileService.delete_folder_by_pf_id(current_user.id, file_id)
  230. else:
  231. if not FileService.delete(file):
  232. return get_data_error_result(
  233. retmsg="Database error (File removal)!")
  234. # delete file2document
  235. informs = File2DocumentService.get_by_file_id(file_id)
  236. for inform in informs:
  237. doc_id = inform.document_id
  238. e, doc = DocumentService.get_by_id(doc_id)
  239. if not e:
  240. return get_data_error_result(retmsg="Document not found!")
  241. tenant_id = DocumentService.get_tenant_id(doc_id)
  242. if not tenant_id:
  243. return get_data_error_result(retmsg="Tenant not found!")
  244. ELASTICSEARCH.deleteByQuery(
  245. Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
  246. DocumentService.increment_chunk_num(
  247. doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
  248. if not DocumentService.delete(doc):
  249. return get_data_error_result(
  250. retmsg="Database error (Document removal)!")
  251. File2DocumentService.delete_by_file_id(file_id)
  252. return get_json_result(data=True)
  253. except Exception as e:
  254. return server_error_response(e)
  255. @manager.route('/rename', methods=['POST'])
  256. @login_required
  257. @validate_request("file_id", "name")
  258. def rename():
  259. req = request.json
  260. try:
  261. e, file = FileService.get_by_id(req["file_id"])
  262. if not e:
  263. return get_data_error_result(retmsg="File not found!")
  264. if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
  265. file.name.lower()).suffix:
  266. return get_json_result(
  267. data=False,
  268. retmsg="The extension of file can't be changed",
  269. retcode=RetCode.ARGUMENT_ERROR)
  270. if FileService.query(name=req["name"], pf_id=file.parent_id):
  271. return get_data_error_result(
  272. retmsg="Duplicated file name in the same folder.")
  273. if not FileService.update_by_id(
  274. req["file_id"], {"name": req["name"]}):
  275. return get_data_error_result(
  276. retmsg="Database error (File rename)!")
  277. informs = File2DocumentService.get_by_file_id(req["file_id"])
  278. if informs:
  279. if not DocumentService.update_by_id(
  280. informs[0].document_id, {"name": req["name"]}):
  281. return get_data_error_result(
  282. retmsg="Database error (Document rename)!")
  283. return get_json_result(data=True)
  284. except Exception as e:
  285. return server_error_response(e)
  286. @manager.route('/get/<file_id>', methods=['GET'])
  287. # @login_required
  288. def get(file_id):
  289. try:
  290. e, file = FileService.get_by_id(file_id)
  291. if not e:
  292. return get_data_error_result(retmsg="Document not found!")
  293. response = flask.make_response(MINIO.get(file.parent_id, file.location))
  294. ext = re.search(r"\.([^.]+)$", file.name)
  295. if ext:
  296. if file.type == FileType.VISUAL.value:
  297. response.headers.set('Content-Type', 'image/%s' % ext.group(1))
  298. else:
  299. response.headers.set(
  300. 'Content-Type',
  301. 'application/%s' %
  302. ext.group(1))
  303. return response
  304. except Exception as e:
  305. return server_error_response(e)