| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878 | 
							- #
 - #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 - #
 - #  Licensed under the Apache License, Version 2.0 (the "License");
 - #  you may not use this file except in compliance with the License.
 - #  You may obtain a copy of the License at
 - #
 - #      http://www.apache.org/licenses/LICENSE-2.0
 - #
 - #  Unless required by applicable law or agreed to in writing, software
 - #  distributed under the License is distributed on an "AS IS" BASIS,
 - #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 - #  See the License for the specific language governing permissions and
 - #  limitations under the License.
 - import os
 - import pathlib
 - import re
 - import warnings
 - from functools import partial
 - from io import BytesIO
 - 
 - from elasticsearch_dsl import Q
 - from flask import request, send_file
 - from flask_login import login_required, current_user
 - from httpx import HTTPError
 - 
 - from api.contants import NAME_LENGTH_LIMIT
 - from api.db import FileType, ParserType, FileSource, TaskStatus
 - from api.db import StatusEnum
 - from api.db.db_models import File
 - from api.db.services import duplicate_name
 - from api.db.services.document_service import DocumentService
 - from api.db.services.file2document_service import File2DocumentService
 - from api.db.services.file_service import FileService
 - from api.db.services.knowledgebase_service import KnowledgebaseService
 - from api.db.services.user_service import TenantService
 - from api.settings import RetCode
 - from api.utils import get_uuid
 - from api.utils.api_utils import construct_json_result, construct_error_response
 - from api.utils.api_utils import construct_result, validate_request
 - from api.utils.file_utils import filename_type, thumbnail
 - from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
 - from rag.nlp import search
 - from rag.utils.es_conn import ELASTICSEARCH
 - from rag.utils.minio_conn import MINIO
 - 
 - MAXIMUM_OF_UPLOADING_FILES = 256
 - 
 - 
 - # ------------------------------ create a dataset ---------------------------------------
 - 
 - @manager.route("/", methods=["POST"])
 - @login_required  # use login
 - @validate_request("name")  # check name key
 - def create_dataset():
 -     # Check if Authorization header is present
 -     authorization_token = request.headers.get("Authorization")
 -     if not authorization_token:
 -         return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Authorization header is missing.")
 - 
 -     # TODO: Login or API key
 -     # objs = APIToken.query(token=authorization_token)
 -     #
 -     # # Authorization error
 -     # if not objs:
 -     #     return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Token is invalid.")
 -     #
 -     # tenant_id = objs[0].tenant_id
 - 
 -     tenant_id = current_user.id
 -     request_body = request.json
 - 
 -     # In case that there's no name
 -     if "name" not in request_body:
 -         return construct_json_result(code=RetCode.DATA_ERROR, message="Expected 'name' field in request body")
 - 
 -     dataset_name = request_body["name"]
 - 
 -     # empty dataset_name
 -     if not dataset_name:
 -         return construct_json_result(code=RetCode.DATA_ERROR, message="Empty dataset name")
 - 
 -     # In case that there's space in the head or the tail
 -     dataset_name = dataset_name.strip()
 - 
 -     # In case that the length of the name exceeds the limit
 -     dataset_name_length = len(dataset_name)
 -     if dataset_name_length > NAME_LENGTH_LIMIT:
 -         return construct_json_result(
 -             code=RetCode.DATA_ERROR,
 -             message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!")
 - 
 -     # In case that there are other fields in the data-binary
 -     if len(request_body.keys()) > 1:
 -         name_list = []
 -         for key_name in request_body.keys():
 -             if key_name != "name":
 -                 name_list.append(key_name)
 -         return construct_json_result(code=RetCode.DATA_ERROR,
 -                                      message=f"fields: {name_list}, are not allowed in request body.")
 - 
 -     # If there is a duplicate name, it will modify it to make it unique
 -     request_body["name"] = duplicate_name(
 -         KnowledgebaseService.query,
 -         name=dataset_name,
 -         tenant_id=tenant_id,
 -         status=StatusEnum.VALID.value)
 -     try:
 -         request_body["id"] = get_uuid()
 -         request_body["tenant_id"] = tenant_id
 -         request_body["created_by"] = tenant_id
 -         exist, t = TenantService.get_by_id(tenant_id)
 -         if not exist:
 -             return construct_result(code=RetCode.AUTHENTICATION_ERROR, message="Tenant not found.")
 -         request_body["embd_id"] = t.embd_id
 -         if not KnowledgebaseService.save(**request_body):
 -             # failed to create new dataset
 -             return construct_result()
 -         return construct_json_result(code=RetCode.SUCCESS,
 -                                      data={"dataset_name": request_body["name"], "dataset_id": request_body["id"]})
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # -----------------------------list datasets-------------------------------------------------------
 - 
 - @manager.route("/", methods=["GET"])
 - @login_required
 - def list_datasets():
 -     offset = request.args.get("offset", 0)
 -     count = request.args.get("count", -1)
 -     orderby = request.args.get("orderby", "create_time")
 -     desc = request.args.get("desc", True)
 -     try:
 -         tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
 -         datasets = KnowledgebaseService.get_by_tenant_ids_by_offset(
 -             [m["tenant_id"] for m in tenants], current_user.id, int(offset), int(count), orderby, desc)
 -         return construct_json_result(data=datasets, code=RetCode.SUCCESS, message=f"List datasets successfully!")
 -     except Exception as e:
 -         return construct_error_response(e)
 -     except HTTPError as http_err:
 -         return construct_json_result(http_err)
 - 
 - 
 - # ---------------------------------delete a dataset ----------------------------
 - 
 - @manager.route("/<dataset_id>", methods=["DELETE"])
 - @login_required
 - def remove_dataset(dataset_id):
 -     try:
 -         datasets = KnowledgebaseService.query(created_by=current_user.id, id=dataset_id)
 - 
 -         # according to the id, searching for the dataset
 -         if not datasets:
 -             return construct_json_result(message=f"The dataset cannot be found for your current account.",
 -                                          code=RetCode.OPERATING_ERROR)
 - 
 -         # Iterating the documents inside the dataset
 -         for doc in DocumentService.query(kb_id=dataset_id):
 -             if not DocumentService.remove_document(doc, datasets[0].tenant_id):
 -                 # the process of deleting failed
 -                 return construct_json_result(code=RetCode.DATA_ERROR,
 -                                              message="There was an error during the document removal process. "
 -                                                      "Please check the status of the RAGFlow server and try the removal again.")
 -             # delete the other files
 -             f2d = File2DocumentService.get_by_document_id(doc.id)
 -             FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
 -             File2DocumentService.delete_by_document_id(doc.id)
 - 
 -         # delete the dataset
 -         if not KnowledgebaseService.delete_by_id(dataset_id):
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message="There was an error during the dataset removal process. "
 -                                                  "Please check the status of the RAGFlow server and try the removal again.")
 -         # success
 -         return construct_json_result(code=RetCode.SUCCESS, message=f"Remove dataset: {dataset_id} successfully")
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ------------------------------ get details of a dataset ----------------------------------------
 - 
 - @manager.route("/<dataset_id>", methods=["GET"])
 - @login_required
 - def get_dataset(dataset_id):
 -     try:
 -         dataset = KnowledgebaseService.get_detail(dataset_id)
 -         if not dataset:
 -             return construct_json_result(code=RetCode.DATA_ERROR, message="Can't find this dataset!")
 -         return construct_json_result(data=dataset, code=RetCode.SUCCESS)
 -     except Exception as e:
 -         return construct_json_result(e)
 - 
 - 
 - # ------------------------------ update a dataset --------------------------------------------
 - 
 - @manager.route("/<dataset_id>", methods=["PUT"])
 - @login_required
 - def update_dataset(dataset_id):
 -     req = request.json
 -     try:
 -         # the request cannot be empty
 -         if not req:
 -             return construct_json_result(code=RetCode.DATA_ERROR, message="Please input at least one parameter that "
 -                                                                           "you want to update!")
 -         # check whether the dataset can be found
 -         if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id):
 -             return construct_json_result(message=f"Only the owner of knowledgebase is authorized for this operation!",
 -                                          code=RetCode.OPERATING_ERROR)
 - 
 -         exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
 -         # check whether there is this dataset
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!")
 - 
 -         if "name" in req:
 -             name = req["name"].strip()
 -             # check whether there is duplicate name
 -             if name.lower() != dataset.name.lower() \
 -                     and len(KnowledgebaseService.query(name=name, tenant_id=current_user.id,
 -                                                        status=StatusEnum.VALID.value)) > 1:
 -                 return construct_json_result(code=RetCode.DATA_ERROR,
 -                                              message=f"The name: {name.lower()} is already used by other "
 -                                                      f"datasets. Please choose a different name.")
 - 
 -         dataset_updating_data = {}
 -         chunk_num = req.get("chunk_num")
 -         # modify the value of 11 parameters
 - 
 -         # 2 parameters: embedding id and chunk method
 -         # only if chunk_num is 0, the user can update the embedding id
 -         if req.get("embedding_model_id"):
 -             if chunk_num == 0:
 -                 dataset_updating_data["embd_id"] = req["embedding_model_id"]
 -             else:
 -                 return construct_json_result(code=RetCode.DATA_ERROR,
 -                                              message="You have already parsed the document in this "
 -                                                      "dataset, so you cannot change the embedding "
 -                                                      "model.")
 -         # only if chunk_num is 0, the user can update the chunk_method
 -         if "chunk_method" in req:
 -             type_value = req["chunk_method"]
 -             if is_illegal_value_for_enum(type_value, ParserType):
 -                 return construct_json_result(message=f"Illegal value {type_value} for 'chunk_method' field.",
 -                                              code=RetCode.DATA_ERROR)
 -             if chunk_num != 0:
 -                 construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document "
 -                                                                        "in this dataset, so you cannot "
 -                                                                        "change the chunk method.")
 -             dataset_updating_data["parser_id"] = req["template_type"]
 - 
 -         # convert the photo parameter to avatar
 -         if req.get("photo"):
 -             dataset_updating_data["avatar"] = req["photo"]
 - 
 -         # layout_recognize
 -         if "layout_recognize" in req:
 -             if "parser_config" not in dataset_updating_data:
 -                 dataset_updating_data['parser_config'] = {}
 -             dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize']
 - 
 -         # TODO: updating use_raptor needs to construct a class
 - 
 -         # 6 parameters
 -         for key in ["name", "language", "description", "permission", "id", "token_num"]:
 -             if key in req:
 -                 dataset_updating_data[key] = req.get(key)
 - 
 -         # update
 -         if not KnowledgebaseService.update_by_id(dataset.id, dataset_updating_data):
 -             return construct_json_result(code=RetCode.OPERATING_ERROR, message="Failed to update! "
 -                                                                                "Please check the status of RAGFlow "
 -                                                                                "server and try again!")
 - 
 -         exist, dataset = KnowledgebaseService.get_by_id(dataset.id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR, message="Failed to get the dataset "
 -                                                                           "using the dataset ID.")
 - 
 -         return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # --------------------------------content management ----------------------------------------------
 - 
 - # ----------------------------upload files-----------------------------------------------------
 - @manager.route("/<dataset_id>/documents/", methods=["POST"])
 - @login_required
 - def upload_documents(dataset_id):
 -     # no files
 -     if not request.files:
 -         return construct_json_result(
 -             message="There is no file!", code=RetCode.ARGUMENT_ERROR)
 - 
 -     # the number of uploading files exceeds the limit
 -     file_objs = request.files.getlist("file")
 -     num_file_objs = len(file_objs)
 - 
 -     if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
 -         return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
 -                                                                       f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
 - 
 -     # no dataset
 -     exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
 -     if not exist:
 -         return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
 - 
 -     for file_obj in file_objs:
 -         file_name = file_obj.filename
 -         # no name
 -         if not file_name:
 -             return construct_json_result(
 -                 message="There is a file without name!", code=RetCode.ARGUMENT_ERROR)
 - 
 -         # TODO: support the remote files
 -         if 'http' in file_name:
 -             return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
 - 
 -     # get the root_folder
 -     root_folder = FileService.get_root_folder(current_user.id)
 -     # get the id of the root_folder
 -     parent_file_id = root_folder["id"]  # document id
 -     # this is for the new user, create '.knowledgebase' file
 -     FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
 -     # go inside this folder, get the kb_root_folder
 -     kb_root_folder = FileService.get_kb_folder(current_user.id)
 -     # link the file management to the kb_folder
 -     kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
 - 
 -     # grab all the errs
 -     err = []
 -     MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
 -     uploaded_docs_json = []
 -     for file in file_objs:
 -         try:
 -             # TODO: get this value from the database as some tenants have this limit while others don't
 -             if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
 -                 return construct_json_result(code=RetCode.DATA_ERROR,
 -                                              message="Exceed the maximum file number of a free user!")
 -             # deal with the duplicate name
 -             filename = duplicate_name(
 -                 DocumentService.query,
 -                 name=file.filename,
 -                 kb_id=dataset.id)
 - 
 -             # deal with the unsupported type
 -             filetype = filename_type(filename)
 -             if filetype == FileType.OTHER.value:
 -                 return construct_json_result(code=RetCode.DATA_ERROR,
 -                                              message="This type of file has not been supported yet!")
 - 
 -             # upload to the minio
 -             location = filename
 -             while MINIO.obj_exist(dataset_id, location):
 -                 location += "_"
 - 
 -             blob = file.read()
 - 
 -             # the content is empty, raising a warning
 -             if blob == b'':
 -                 warnings.warn(f"[WARNING]: The content of the file {filename} is empty.")
 - 
 -             MINIO.put(dataset_id, location, blob)
 - 
 -             doc = {
 -                 "id": get_uuid(),
 -                 "kb_id": dataset.id,
 -                 "parser_id": dataset.parser_id,
 -                 "parser_config": dataset.parser_config,
 -                 "created_by": current_user.id,
 -                 "type": filetype,
 -                 "name": filename,
 -                 "location": location,
 -                 "size": len(blob),
 -                 "thumbnail": thumbnail(filename, blob)
 -             }
 -             if doc["type"] == FileType.VISUAL:
 -                 doc["parser_id"] = ParserType.PICTURE.value
 -             if doc["type"] == FileType.AURAL:
 -                 doc["parser_id"] = ParserType.AUDIO.value
 -             if re.search(r"\.(ppt|pptx|pages)$", filename):
 -                 doc["parser_id"] = ParserType.PRESENTATION.value
 -             DocumentService.insert(doc)
 - 
 -             FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
 -             uploaded_docs_json.append(doc)
 -         except Exception as e:
 -             err.append(file.filename + ": " + str(e))
 - 
 -     if err:
 -         # return all the errors
 -         return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
 -     # success
 -     return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
 - 
 - 
 - # ----------------------------delete a file-----------------------------------------------------
 - @manager.route("/<dataset_id>/documents/<document_id>", methods=["DELETE"])
 - @login_required
 - def delete_document(document_id, dataset_id):  # string
 -     # get the root folder
 -     root_folder = FileService.get_root_folder(current_user.id)
 -     # parent file's id
 -     parent_file_id = root_folder["id"]
 -     # consider the new user
 -     FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
 -     # store all the errors that may have
 -     errors = ""
 -     try:
 -         # whether there is this document
 -         exist, doc = DocumentService.get_by_id(document_id)
 -         if not exist:
 -             return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
 -         # whether this doc is authorized by this tenant
 -         tenant_id = DocumentService.get_tenant_id(document_id)
 -         if not tenant_id:
 -             return construct_json_result(
 -                 message=f"You cannot delete this document {document_id} due to the authorization"
 -                         f" reason!", code=RetCode.AUTHENTICATION_ERROR)
 - 
 -         # get the doc's id and location
 -         real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
 - 
 -         if real_dataset_id != dataset_id:
 -             return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
 -                                                  f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
 - 
 -         # there is an issue when removing
 -         if not DocumentService.remove_document(doc, tenant_id):
 -             return construct_json_result(
 -                 message="There was an error during the document removal process. Please check the status of the "
 -                         "RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
 - 
 -         # fetch the File2Document record associated with the provided document ID.
 -         file_to_doc = File2DocumentService.get_by_document_id(document_id)
 -         # delete the associated File record.
 -         FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
 -         # delete the File2Document record itself using the document ID. This removes the
 -         # association between the document and the file after the File record has been deleted.
 -         File2DocumentService.delete_by_document_id(document_id)
 - 
 -         # delete it from minio
 -         MINIO.rm(dataset_id, location)
 -     except Exception as e:
 -         errors += str(e)
 -     if errors:
 -         return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
 - 
 -     return construct_json_result(data=True, code=RetCode.SUCCESS)
 - 
 - 
 - # ----------------------------list files-----------------------------------------------------
 - @manager.route('/<dataset_id>/documents/', methods=['GET'])
 - @login_required
 - def list_documents(dataset_id):
 -     if not dataset_id:
 -         return construct_json_result(
 -             data=False, message="Lack of 'dataset_id'", code=RetCode.ARGUMENT_ERROR)
 - 
 -     # searching keywords
 -     keywords = request.args.get("keywords", "")
 - 
 -     offset = request.args.get("offset", 0)
 -     count = request.args.get("count", -1)
 -     order_by = request.args.get("order_by", "create_time")
 -     descend = request.args.get("descend", True)
 -     try:
 -         docs, total = DocumentService.list_documents_in_dataset(dataset_id, int(offset), int(count), order_by,
 -                                                                 descend, keywords)
 - 
 -         return construct_json_result(data={"total": total, "docs": docs}, message=RetCode.SUCCESS)
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ----------------------------update: enable rename-----------------------------------------------------
 - @manager.route("/<dataset_id>/documents/<document_id>", methods=["PUT"])
 - @login_required
 - def update_document(dataset_id, document_id):
 -     req = request.json
 -     try:
 -         legal_parameters = set()
 -         legal_parameters.add("name")
 -         legal_parameters.add("enable")
 -         legal_parameters.add("template_type")
 - 
 -         for key in req.keys():
 -             if key not in legal_parameters:
 -                 return construct_json_result(code=RetCode.ARGUMENT_ERROR, message=f"{key} is an illegal parameter.")
 - 
 -         # The request body cannot be empty
 -         if not req:
 -             return construct_json_result(
 -                 code=RetCode.DATA_ERROR,
 -                 message="Please input at least one parameter that you want to update!")
 - 
 -         # Check whether there is this dataset
 -         exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR, message=f"This dataset {dataset_id} cannot be found!")
 - 
 -         # The document does not exist
 -         exist, document = DocumentService.get_by_id(document_id)
 -         if not exist:
 -             return construct_json_result(message=f"This document {document_id} cannot be found!",
 -                                          code=RetCode.ARGUMENT_ERROR)
 - 
 -         # Deal with the different keys
 -         updating_data = {}
 -         if "name" in req:
 -             new_name = req["name"]
 -             updating_data["name"] = new_name
 -             # Check whether the new_name is suitable
 -             # 1. no name value
 -             if not new_name:
 -                 return construct_json_result(code=RetCode.DATA_ERROR, message="There is no new name.")
 - 
 -             # 2. In case that there's space in the head or the tail
 -             new_name = new_name.strip()
 - 
 -             # 3. Check whether the new_name has the same extension of file as before
 -             if pathlib.Path(new_name.lower()).suffix != pathlib.Path(
 -                     document.name.lower()).suffix:
 -                 return construct_json_result(
 -                     data=False,
 -                     message="The extension of file cannot be changed",
 -                     code=RetCode.ARGUMENT_ERROR)
 - 
 -             # 4. Check whether the new name has already been occupied by other file
 -             for d in DocumentService.query(name=new_name, kb_id=document.kb_id):
 -                 if d.name == new_name:
 -                     return construct_json_result(
 -                         message="Duplicated document name in the same dataset.",
 -                         code=RetCode.ARGUMENT_ERROR)
 - 
 -         if "enable" in req:
 -             enable_value = req["enable"]
 -             if is_illegal_value_for_enum(enable_value, StatusEnum):
 -                 return construct_json_result(message=f"Illegal value {enable_value} for 'enable' field.",
 -                                              code=RetCode.DATA_ERROR)
 -             updating_data["status"] = enable_value
 - 
 -         # TODO: Chunk-method - update parameters inside the json object parser_config
 -         if "template_type" in req:
 -             type_value = req["template_type"]
 -             if is_illegal_value_for_enum(type_value, ParserType):
 -                 return construct_json_result(message=f"Illegal value {type_value} for 'template_type' field.",
 -                                              code=RetCode.DATA_ERROR)
 -             updating_data["parser_id"] = req["template_type"]
 - 
 -         # The process of updating
 -         if not DocumentService.update_by_id(document_id, updating_data):
 -             return construct_json_result(
 -                 code=RetCode.OPERATING_ERROR,
 -                 message="Failed to update document in the database! "
 -                         "Please check the status of RAGFlow server and try again!")
 - 
 -         # name part: file service
 -         if "name" in req:
 -             # Get file by document id
 -             file_information = File2DocumentService.get_by_document_id(document_id)
 -             if file_information:
 -                 exist, file = FileService.get_by_id(file_information[0].file_id)
 -                 FileService.update_by_id(file.id, {"name": req["name"]})
 - 
 -         exist, document = DocumentService.get_by_id(document_id)
 - 
 -         # Success
 -         return construct_json_result(data=document.to_json(), message="Success", code=RetCode.SUCCESS)
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # Helper method to judge whether it's an illegal value
 - def is_illegal_value_for_enum(value, enum_class):
 -     return value not in enum_class.__members__.values()
 - 
 - 
 - # ----------------------------download a file-----------------------------------------------------
 - @manager.route("/<dataset_id>/documents/<document_id>", methods=["GET"])
 - @login_required
 - def download_document(dataset_id, document_id):
 -     try:
 -         # Check whether there is this dataset
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset '{dataset_id}' cannot be found!")
 - 
 -         # Check whether there is this document
 -         exist, document = DocumentService.get_by_id(document_id)
 -         if not exist:
 -             return construct_json_result(message=f"This document '{document_id}' cannot be found!",
 -                                          code=RetCode.ARGUMENT_ERROR)
 - 
 -         # The process of downloading
 -         doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id)  # minio address
 -         file_stream = MINIO.get(doc_id, doc_location)
 -         if not file_stream:
 -             return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
 - 
 -         file = BytesIO(file_stream)
 - 
 -         # Use send_file with a proper filename and MIME type
 -         return send_file(
 -             file,
 -             as_attachment=True,
 -             download_name=document.name,
 -             mimetype='application/octet-stream'  # Set a default MIME type
 -         )
 - 
 -     # Error
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ----------------------------start parsing a document-----------------------------------------------------
 - # helper method for parsing
 - # callback method
 - def doc_parse_callback(doc_id, prog=None, msg=""):
 -     cancel = DocumentService.do_cancel(doc_id)
 -     if cancel:
 -         raise Exception("The parsing process has been cancelled!")
 - 
 - """
 - def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
 -     match parser_name:
 -         case "book":
 -             book.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "laws":
 -             laws.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "manual":
 -             manual.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "naive":
 -             # It's the mode by default, which is general in the front-end
 -             naive.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "one":
 -             one.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "paper":
 -             paper.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "picture":
 -             picture.chunk(doc_name, binary=binary, tenant_id=tenant_id, lang="Chinese",
 -                           callback=partial(doc_parse_callback, doc_id))
 -         case "presentation":
 -             presentation.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "qa":
 -             qa.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "resume":
 -             resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "table":
 -             table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "audio":
 -             audio.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case "email":
 -             email.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
 -         case _:
 -             return False
 - 
 -     return True
 -     """
 - 
 - 
 - @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])
 - @login_required
 - def parse_document(dataset_id, document_id):
 -     try:
 -         # valid dataset
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset '{dataset_id}' cannot be found!")
 - 
 -         return parsing_document_internal(document_id)
 - 
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ----------------------------start parsing documents-----------------------------------------------------
 - @manager.route("/<dataset_id>/documents/status", methods=["POST"])
 - @login_required
 - def parse_documents(dataset_id):
 -     doc_ids = request.json["doc_ids"]
 -     try:
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset '{dataset_id}' cannot be found!")
 -         # two conditions
 -         if not doc_ids:
 -             # documents inside the dataset
 -             docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
 -                                                                     True, "")
 -             doc_ids = [doc["id"] for doc in docs]
 - 
 -         message = ""
 -         # for loop
 -         for id in doc_ids:
 -             res = parsing_document_internal(id)
 -             res_body = res.json
 -             if res_body["code"] == RetCode.SUCCESS:
 -                 message += res_body["message"]
 -             else:
 -                 return res
 -         return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
 - 
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # helper method for parsing the document
 - def parsing_document_internal(id):
 -     message = ""
 -     try:
 -         # Check whether there is this document
 -         exist, document = DocumentService.get_by_id(id)
 -         if not exist:
 -             return construct_json_result(message=f"This document '{id}' cannot be found!",
 -                                          code=RetCode.ARGUMENT_ERROR)
 - 
 -         tenant_id = DocumentService.get_tenant_id(id)
 -         if not tenant_id:
 -             return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
 - 
 -         info = {"run": "1", "progress": 0}
 -         info["progress_msg"] = ""
 -         info["chunk_num"] = 0
 -         info["token_num"] = 0
 - 
 -         DocumentService.update_by_id(id, info)
 - 
 -         ELASTICSEARCH.deleteByQuery(Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
 - 
 -         _, doc_attributes = DocumentService.get_by_id(id)
 -         doc_attributes = doc_attributes.to_dict()
 -         doc_id = doc_attributes["id"]
 - 
 -         bucket, doc_name = File2DocumentService.get_minio_address(doc_id=doc_id)
 -         binary = MINIO.get(bucket, doc_name)
 -         parser_name = doc_attributes["parser_id"]
 -         if binary:
 -             res = doc_parse(binary, doc_name, parser_name, tenant_id, doc_id)
 -             if res is False:
 -                 message += f"The parser id: {parser_name} of the document {doc_id} is not supported; "
 -         else:
 -             message += f"Empty data in the document: {doc_name}; "
 -         # failed in parsing
 -         if doc_attributes["status"] == TaskStatus.FAIL.value:
 -             message += f"Failed in parsing the document: {doc_id}; "
 -         return construct_json_result(code=RetCode.SUCCESS, message=message)
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ----------------------------stop parsing a doc-----------------------------------------------------
 - @manager.route("<dataset_id>/documents/<document_id>/status", methods=["DELETE"])
 - @login_required
 - def stop_parsing_document(dataset_id, document_id):
 -     try:
 -         # valid dataset
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset '{dataset_id}' cannot be found!")
 - 
 -         return stop_parsing_document_internal(document_id)
 - 
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ----------------------------stop parsing docs-----------------------------------------------------
 - @manager.route("<dataset_id>/documents/status", methods=["DELETE"])
 - @login_required
 - def stop_parsing_documents(dataset_id):
 -     doc_ids = request.json["doc_ids"]
 -     try:
 -         # valid dataset?
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset '{dataset_id}' cannot be found!")
 -         if not doc_ids:
 -             # documents inside the dataset
 -             docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
 -                                                                         True, "")
 -             doc_ids = [doc["id"] for doc in docs]
 - 
 -         message = ""
 -         # for loop
 -         for id in doc_ids:
 -             res = stop_parsing_document_internal(id)
 -             res_body = res.json
 -             if res_body["code"] == RetCode.SUCCESS:
 -                 message += res_body["message"]
 -             else:
 -                 return res
 -         return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
 - 
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # Helper method
 - def stop_parsing_document_internal(document_id):
 -     try:
 -         # valid doc?
 -         exist, doc = DocumentService.get_by_id(document_id)
 -         if not exist:
 -             return construct_json_result(message=f"This document '{document_id}' cannot be found!",
 -                                          code=RetCode.ARGUMENT_ERROR)
 -         doc_attributes = doc.to_dict()
 - 
 -         # only when the status is parsing, we need to stop it
 -         if doc_attributes["status"] == TaskStatus.RUNNING.value:
 -             tenant_id = DocumentService.get_tenant_id(document_id)
 -             if not tenant_id:
 -                 return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
 - 
 -             # update successfully?
 -             if not DocumentService.update_by_id(document_id, {"status": "2"}):  # cancel
 -                 return construct_json_result(
 -                     code=RetCode.OPERATING_ERROR,
 -                     message="There was an error during the stopping parsing the document process. "
 -                             "Please check the status of the RAGFlow server and try the update again."
 -                 )
 - 
 -             _, doc_attributes = DocumentService.get_by_id(document_id)
 -             doc_attributes = doc_attributes.to_dict()
 - 
 -             # failed in stop parsing
 -             if doc_attributes["status"] == TaskStatus.RUNNING.value:
 -                 return construct_json_result(message=f"Failed in parsing the document: {document_id}; ", code=RetCode.SUCCESS)
 -         return construct_json_result(code=RetCode.SUCCESS, message="")
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - 
 - # ----------------------------show the status of the file-----------------------------------------------------
 - @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
 - @login_required
 - def show_parsing_status(dataset_id, document_id):
 -     try:
 -         # valid dataset
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset: '{dataset_id}' cannot be found!")
 -         # valid document
 -         exist, _ = DocumentService.get_by_id(document_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This document: '{document_id}' is not a valid document.")
 - 
 -         _, doc = DocumentService.get_by_id(document_id)  # get doc object
 -         doc_attributes = doc.to_dict()
 - 
 -         return construct_json_result(
 -             data={"progress": doc_attributes["progress"], "status": TaskStatus(doc_attributes["status"]).name},
 -             code=RetCode.SUCCESS
 -         )
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - # ----------------------------list the chunks of the file-----------------------------------------------------
 - 
 - # -- --------------------------delete the chunk-----------------------------------------------------
 - 
 - # ----------------------------edit the status of the chunk-----------------------------------------------------
 - 
 - # ----------------------------insert a new chunk-----------------------------------------------------
 - 
 - # ----------------------------upload a file-----------------------------------------------------
 - 
 - # ----------------------------get a specific chunk-----------------------------------------------------
 - 
 - # ----------------------------retrieval test-----------------------------------------------------
 
 
  |