| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 | 
							- from io import BytesIO
 - 
 - from flask import request,send_file
 - from api.utils.api_utils import get_json_result, construct_json_result, server_error_response
 - from api.utils.api_utils import get_json_result, token_required, get_data_error_result
 - from api.db import FileType, ParserType, FileSource, TaskStatus
 - from api.db.db_models import File
 - from api.db.services.document_service import DocumentService
 - from api.db.services.file2document_service import File2DocumentService
 - from api.db.services.file_service import FileService
 - from api.db.services.knowledgebase_service import KnowledgebaseService
 - from api.db.services.user_service import TenantService, UserTenantService
 - from api.settings import RetCode
 - from api.utils.api_utils import construct_json_result, construct_error_response
 - from rag.utils.storage_factory import STORAGE_IMPL
 - 
 - 
 - @manager.route('/dataset/<dataset_id>/documents/upload', methods=['POST'])
 - @token_required
 - def upload(dataset_id, tenant_id):
 -     if 'file' not in request.files:
 -         return get_json_result(
 -             data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
 -     file_objs = request.files.getlist('file')
 -     for file_obj in file_objs:
 -         if file_obj.filename == '':
 -             return get_json_result(
 -                 data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
 -     e, kb = KnowledgebaseService.get_by_id(dataset_id)
 -     if not e:
 -         raise LookupError(f"Can't find the knowledgebase with ID {dataset_id}!")
 -     err, _ = FileService.upload_document(kb, file_objs, tenant_id)
 -     if err:
 -         return get_json_result(
 -             data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
 -     return get_json_result(data=True)
 - 
 - 
 - @manager.route('/infos', methods=['GET'])
 - @token_required
 - def docinfos(tenant_id):
 -     req = request.args
 -     if "id" in req:
 -         doc_id = req["id"]
 -         e, doc = DocumentService.get_by_id(doc_id)
 -         return get_json_result(data=doc.to_json())
 -     if "name" in req:
 -         doc_name = req["name"]
 -         doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
 -         e, doc = DocumentService.get_by_id(doc_id)
 -         return get_json_result(data=doc.to_json())
 - 
 - 
 - @manager.route('/save', methods=['POST'])
 - @token_required
 - def save_doc(tenant_id):
 -     req = request.json  # Expecting JSON input
 -     if "id" in req:
 -         doc_id = req["id"]
 -     if "name" in req:
 -         doc_name = req["name"]
 -         doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
 -     data = request.json
 -     # Call the update method with the provided id and data
 -     try:
 -         num = DocumentService.update_by_id(doc_id, data)
 -         if num > 0:
 -             return get_json_result(retmsg="success", data={"updated_count": num})
 -         else:
 -             return get_json_result(retcode=404, retmsg="Document not found")
 -     except Exception as e:
 -         return get_json_result(retmsg=f"Error occurred: {str(e)}")
 - 
 - 
 - @manager.route("/<dataset_id>/documents/<document_id>", methods=["GET"])
 - @token_required
 - def download_document(dataset_id, document_id):
 -     try:
 -         # Check whether there is this dataset
 -         exist, _ = KnowledgebaseService.get_by_id(dataset_id)
 -         if not exist:
 -             return construct_json_result(code=RetCode.DATA_ERROR,
 -                                          message=f"This dataset '{dataset_id}' cannot be found!")
 - 
 -         # Check whether there is this document
 -         exist, document = DocumentService.get_by_id(document_id)
 -         if not exist:
 -             return construct_json_result(message=f"This document '{document_id}' cannot be found!",
 -                                          code=RetCode.ARGUMENT_ERROR)
 - 
 -         # The process of downloading
 -         doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id)  # minio address
 -         file_stream = STORAGE_IMPL.get(doc_id, doc_location)
 -         if not file_stream:
 -             return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
 - 
 -         file = BytesIO(file_stream)
 - 
 -         # Use send_file with a proper filename and MIME type
 -         return send_file(
 -             file,
 -             as_attachment=True,
 -             download_name=document.name,
 -             mimetype='application/octet-stream'  # Set a default MIME type
 -         )
 - 
 -     # Error
 -     except Exception as e:
 -         return construct_error_response(e)
 - 
 - @manager.route('/dataset/<dataset_id>/documents', methods=['GET'])
 - @token_required
 - def list_docs(dataset_id,tenant_id):
 -     kb_id = request.args.get("kb_id")
 -     if not kb_id:
 -         return get_json_result(
 -             data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR)
 -     tenants = UserTenantService.query(user_id=tenant_id)
 -     for tenant in tenants:
 -         if KnowledgebaseService.query(
 -                 tenant_id=tenant.tenant_id, id=kb_id):
 -             break
 -     else:
 -         return get_json_result(
 -             data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.',
 -             retcode=RetCode.OPERATING_ERROR)
 -     keywords = request.args.get("keywords", "")
 - 
 -     page_number = int(request.args.get("page", 1))
 -     items_per_page = int(request.args.get("page_size", 15))
 -     orderby = request.args.get("orderby", "create_time")
 -     desc = request.args.get("desc", True)
 -     try:
 -         docs, tol = DocumentService.get_by_kb_id(
 -             kb_id, page_number, items_per_page, orderby, desc, keywords)
 -         return get_json_result(data={"total": tol, "docs": docs})
 -     except Exception as e:
 -         return server_error_response(e)
 - 
 - 
 - @manager.route('/delete', methods=['DELETE'])
 - @token_required
 - def rm(tenant_id):
 -     req = request.args
 -     if "doc_id" not in req:
 -         return get_data_error_result(
 -             retmsg="doc_id is required")
 -     doc_ids = req["doc_id"]
 -     if isinstance(doc_ids, str): doc_ids = [doc_ids]
 -     root_folder = FileService.get_root_folder(tenant_id)
 -     pf_id = root_folder["id"]
 -     FileService.init_knowledgebase_docs(pf_id, tenant_id)
 -     errors = ""
 -     for doc_id in doc_ids:
 -         try:
 -             e, doc = DocumentService.get_by_id(doc_id)
 -             if not e:
 -                 return get_data_error_result(retmsg="Document not found!")
 -             tenant_id = DocumentService.get_tenant_id(doc_id)
 -             if not tenant_id:
 -                 return get_data_error_result(retmsg="Tenant not found!")
 - 
 -             b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
 - 
 -             if not DocumentService.remove_document(doc, tenant_id):
 -                 return get_data_error_result(
 -                     retmsg="Database error (Document removal)!")
 - 
 -             f2d = File2DocumentService.get_by_document_id(doc_id)
 -             FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
 -             File2DocumentService.delete_by_document_id(doc_id)
 - 
 -             STORAGE_IMPL.rm(b, n)
 -         except Exception as e:
 -             errors += str(e)
 - 
 -     if errors:
 -         return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
 - 
 -     return get_json_result(data=True,retmsg="success")
 
 
  |