|  |  | @@ -20,79 +20,73 @@ import re | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | import flask | 
		
	
		
			
			|  |  |  | from flask import request | 
		
	
		
			
			|  |  |  | from flask_login import login_required, current_user | 
		
	
		
			
			|  |  |  | from flask_login import current_user, login_required | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | from deepdoc.parser.html_parser import RAGFlowHtmlParser | 
		
	
		
			
			|  |  |  | from rag.nlp import search | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, TaskStatus, ParserType, FileSource | 
		
	
		
			
			|  |  |  | from api import settings | 
		
	
		
			
			|  |  |  | from api.constants import IMG_BASE64_PREFIX | 
		
	
		
			
			|  |  |  | from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus | 
		
	
		
			
			|  |  |  | from api.db.db_models import File, Task | 
		
	
		
			
			|  |  |  | from api.db.services import duplicate_name | 
		
	
		
			
			|  |  |  | from api.db.services.document_service import DocumentService, doc_upload_and_parse | 
		
	
		
			
			|  |  |  | from api.db.services.file2document_service import File2DocumentService | 
		
	
		
			
			|  |  |  | from api.db.services.file_service import FileService | 
		
	
		
			
			|  |  |  | from api.db.services.task_service import queue_tasks | 
		
	
		
			
			|  |  |  | from api.db.services.user_service import UserTenantService | 
		
	
		
			
			|  |  |  | from api.db.services import duplicate_name | 
		
	
		
			
			|  |  |  | from api.db.services.knowledgebase_service import KnowledgebaseService | 
		
	
		
			
			|  |  |  | from api.db.services.task_service import TaskService | 
		
	
		
			
			|  |  |  | from api.db.services.document_service import DocumentService, doc_upload_and_parse | 
		
	
		
			
			|  |  |  | from api.db.services.task_service import TaskService, queue_tasks | 
		
	
		
			
			|  |  |  | from api.db.services.user_service import UserTenantService | 
		
	
		
			
			|  |  |  | from api.utils import get_uuid | 
		
	
		
			
			|  |  |  | from api.utils.api_utils import ( | 
		
	
		
			
			|  |  |  | server_error_response, | 
		
	
		
			
			|  |  |  | get_data_error_result, | 
		
	
		
			
			|  |  |  | get_json_result, | 
		
	
		
			
			|  |  |  | server_error_response, | 
		
	
		
			
			|  |  |  | validate_request, | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | from api.utils import get_uuid | 
		
	
		
			
			|  |  |  | from api import settings | 
		
	
		
			
			|  |  |  | from api.utils.api_utils import get_json_result | 
		
	
		
			
			|  |  |  | from rag.utils.storage_factory import STORAGE_IMPL | 
		
	
		
			
			|  |  |  | from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory | 
		
	
		
			
			|  |  |  | from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail | 
		
	
		
			
			|  |  |  | from api.utils.web_utils import html2pdf, is_valid_url | 
		
	
		
			
			|  |  |  | from api.constants import IMG_BASE64_PREFIX | 
		
	
		
			
			|  |  |  | from deepdoc.parser.html_parser import RAGFlowHtmlParser | 
		
	
		
			
			|  |  |  | from rag.nlp import search | 
		
	
		
			
			|  |  |  | from rag.utils.storage_factory import STORAGE_IMPL | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/upload', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/upload", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("kb_id") | 
		
	
		
			
			|  |  |  | def upload(): | 
		
	
		
			
			|  |  |  | kb_id = request.form.get("kb_id") | 
		
	
		
			
			|  |  |  | if not kb_id: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if 'file' not in request.files: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if "file" not in request.files: | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No file part!", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | file_objs = request.files.getlist('file') | 
		
	
		
			
			|  |  |  | file_objs = request.files.getlist("file") | 
		
	
		
			
			|  |  |  | for file_obj in file_objs: | 
		
	
		
			
			|  |  |  | if file_obj.filename == '': | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if file_obj.filename == "": | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | e, kb = KnowledgebaseService.get_by_id(kb_id) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | raise LookupError("Can't find this knowledgebase!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | err, files = FileService.upload_document(kb, file_objs, current_user.id) | 
		
	
		
			
			|  |  |  | files = [f[0] for f in files] # remove the blob | 
		
	
		
			
			|  |  |  |  | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if not files: | 
		
	
		
			
			|  |  |  | return get_json_result(data=files, message="There seems to be an issue with your file format. Please verify it is correct and not corrupted.", code=settings.RetCode.DATA_ERROR) | 
		
	
		
			
			|  |  |  | files = [f[0] for f in files]  # remove the blob | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if err: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=files, message="\n".join(err), code=settings.RetCode.SERVER_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=files, message="\n".join(err), code=settings.RetCode.SERVER_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=files) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/web_crawl', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/web_crawl", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("kb_id", "name", "url") | 
		
	
		
			
			|  |  |  | def web_crawl(): | 
		
	
		
			
			|  |  |  | kb_id = request.form.get("kb_id") | 
		
	
		
			
			|  |  |  | if not kb_id: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | name = request.form.get("name") | 
		
	
		
			
			|  |  |  | url = request.form.get("url") | 
		
	
		
			
			|  |  |  | if not is_valid_url(url): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='The URL format is invalid', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="The URL format is invalid", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | e, kb = KnowledgebaseService.get_by_id(kb_id) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | raise LookupError("Can't find this knowledgebase!") | 
		
	
	
		
			
			|  |  | @@ -108,10 +102,7 @@ def web_crawl(): | 
		
	
		
			
			|  |  |  | kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"]) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | filename = duplicate_name( | 
		
	
		
			
			|  |  |  | DocumentService.query, | 
		
	
		
			
			|  |  |  | name=name + ".pdf", | 
		
	
		
			
			|  |  |  | kb_id=kb.id) | 
		
	
		
			
			|  |  |  | filename = duplicate_name(DocumentService.query, name=name + ".pdf", kb_id=kb.id) | 
		
	
		
			
			|  |  |  | filetype = filename_type(filename) | 
		
	
		
			
			|  |  |  | if filetype == FileType.OTHER.value: | 
		
	
		
			
			|  |  |  | raise RuntimeError("This type of file has not been supported yet!") | 
		
	
	
		
			
			|  |  | @@ -130,7 +121,7 @@ def web_crawl(): | 
		
	
		
			
			|  |  |  | "name": filename, | 
		
	
		
			
			|  |  |  | "location": location, | 
		
	
		
			
			|  |  |  | "size": len(blob), | 
		
	
		
			
			|  |  |  | "thumbnail": thumbnail(filename, blob) | 
		
	
		
			
			|  |  |  | "thumbnail": thumbnail(filename, blob), | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | if doc["type"] == FileType.VISUAL: | 
		
	
		
			
			|  |  |  | doc["parser_id"] = ParserType.PICTURE.value | 
		
	
	
		
			
			|  |  | @@ -147,58 +138,53 @@ def web_crawl(): | 
		
	
		
			
			|  |  |  | return get_json_result(data=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/create', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/create", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("name", "kb_id") | 
		
	
		
			
			|  |  |  | def create(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | kb_id = req["kb_id"] | 
		
	
		
			
			|  |  |  | if not kb_id: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | e, kb = KnowledgebaseService.get_by_id(kb_id) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Can't find this knowledgebase!") | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Can't find this knowledgebase!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if DocumentService.query(name=req["name"], kb_id=kb_id): | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Duplicated document name in the same knowledgebase.") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | doc = DocumentService.insert({ | 
		
	
		
			
			|  |  |  | "id": get_uuid(), | 
		
	
		
			
			|  |  |  | "kb_id": kb.id, | 
		
	
		
			
			|  |  |  | "parser_id": kb.parser_id, | 
		
	
		
			
			|  |  |  | "parser_config": kb.parser_config, | 
		
	
		
			
			|  |  |  | "created_by": current_user.id, | 
		
	
		
			
			|  |  |  | "type": FileType.VIRTUAL, | 
		
	
		
			
			|  |  |  | "name": req["name"], | 
		
	
		
			
			|  |  |  | "location": "", | 
		
	
		
			
			|  |  |  | "size": 0 | 
		
	
		
			
			|  |  |  | }) | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Duplicated document name in the same knowledgebase.") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | doc = DocumentService.insert( | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | "id": get_uuid(), | 
		
	
		
			
			|  |  |  | "kb_id": kb.id, | 
		
	
		
			
			|  |  |  | "parser_id": kb.parser_id, | 
		
	
		
			
			|  |  |  | "parser_config": kb.parser_config, | 
		
	
		
			
			|  |  |  | "created_by": current_user.id, | 
		
	
		
			
			|  |  |  | "type": FileType.VIRTUAL, | 
		
	
		
			
			|  |  |  | "name": req["name"], | 
		
	
		
			
			|  |  |  | "location": "", | 
		
	
		
			
			|  |  |  | "size": 0, | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=doc.to_json()) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/list', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/list", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | def list_docs(): | 
		
	
		
			
			|  |  |  | kb_id = request.args.get("kb_id") | 
		
	
		
			
			|  |  |  | if not kb_id: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | tenants = UserTenantService.query(user_id=current_user.id) | 
		
	
		
			
			|  |  |  | for tenant in tenants: | 
		
	
		
			
			|  |  |  | if KnowledgebaseService.query( | 
		
	
		
			
			|  |  |  | tenant_id=tenant.tenant_id, id=kb_id): | 
		
	
		
			
			|  |  |  | if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id): | 
		
	
		
			
			|  |  |  | break | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Only owner of knowledgebase authorized for this operation.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.OPERATING_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=settings.RetCode.OPERATING_ERROR) | 
		
	
		
			
			|  |  |  | keywords = request.args.get("keywords", "") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | page_number = int(request.args.get("page", 0)) | 
		
	
	
		
			
			|  |  | @@ -212,83 +198,67 @@ def list_docs(): | 
		
	
		
			
			|  |  |  | if run_status: | 
		
	
		
			
			|  |  |  | invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS} | 
		
	
		
			
			|  |  |  | if invalid_status: | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message=f"Invalid filter run status conditions: {', '.join(invalid_status)}" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_data_error_result(message=f"Invalid filter run status conditions: {', '.join(invalid_status)}") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | types = req.get("types", []) | 
		
	
		
			
			|  |  |  | if types: | 
		
	
		
			
			|  |  |  | invalid_types = {t for t in types if t not in VALID_FILE_TYPES} | 
		
	
		
			
			|  |  |  | if invalid_types: | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | docs, tol = DocumentService.get_by_kb_id( | 
		
	
		
			
			|  |  |  | kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types) | 
		
	
		
			
			|  |  |  | docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for doc_item in docs: | 
		
	
		
			
			|  |  |  | if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): | 
		
	
		
			
			|  |  |  | doc_item['thumbnail'] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}" | 
		
	
		
			
			|  |  |  | if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): | 
		
	
		
			
			|  |  |  | doc_item["thumbnail"] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return get_json_result(data={"total": tol, "docs": docs}) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/infos', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/infos", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | def docinfos(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | doc_ids = req["doc_ids"] | 
		
	
		
			
			|  |  |  | for doc_id in doc_ids: | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible(doc_id, current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | docs = DocumentService.get_by_ids(doc_ids) | 
		
	
		
			
			|  |  |  | return get_json_result(data=list(docs.dicts())) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/thumbnails', methods=['GET'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/thumbnails", methods=["GET"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | # @login_required | 
		
	
		
			
			|  |  |  | def thumbnails(): | 
		
	
		
			
			|  |  |  | doc_ids = request.args.get("doc_ids").split(",") | 
		
	
		
			
			|  |  |  | if not doc_ids: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Lack of "Document ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='Lack of "Document ID"', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | docs = DocumentService.get_thumbnails(doc_ids) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for doc_item in docs: | 
		
	
		
			
			|  |  |  | if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): | 
		
	
		
			
			|  |  |  | doc_item['thumbnail'] = f"/v1/document/image/{doc_item['kb_id']}-{doc_item['thumbnail']}" | 
		
	
		
			
			|  |  |  | if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): | 
		
	
		
			
			|  |  |  | doc_item["thumbnail"] = f"/v1/document/image/{doc_item['kb_id']}-{doc_item['thumbnail']}" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return get_json_result(data={d["id"]: d["thumbnail"] for d in docs}) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/change_status', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/change_status", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("doc_id", "status") | 
		
	
		
			
			|  |  |  | def change_status(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | if str(req["status"]) not in ["0", "1"]: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='"Status" must be either 0 or 1!', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='"Status" must be either 0 or 1!', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible(req["doc_id"], current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | e, doc = DocumentService.get_by_id(req["doc_id"]) | 
		
	
	
		
			
			|  |  | @@ -296,23 +266,19 @@ def change_status(): | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Document not found!") | 
		
	
		
			
			|  |  |  | e, kb = KnowledgebaseService.get_by_id(doc.kb_id) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Can't find this knowledgebase!") | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Can't find this knowledgebase!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if not DocumentService.update_by_id( | 
		
	
		
			
			|  |  |  | req["doc_id"], {"status": str(req["status"])}): | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Database error (Document update)!") | 
		
	
		
			
			|  |  |  | if not DocumentService.update_by_id(req["doc_id"], {"status": str(req["status"])}): | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Database error (Document update)!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | status = int(req["status"]) | 
		
	
		
			
			|  |  |  | settings.docStoreConn.update({"doc_id": req["doc_id"]}, {"available_int": status}, | 
		
	
		
			
			|  |  |  | search.index_name(kb.tenant_id), doc.kb_id) | 
		
	
		
			
			|  |  |  | settings.docStoreConn.update({"doc_id": req["doc_id"]}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id) | 
		
	
		
			
			|  |  |  | return get_json_result(data=True) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/rm', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/rm", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("doc_id") | 
		
	
		
			
			|  |  |  | def rm(): | 
		
	
	
		
			
			|  |  | @@ -323,11 +289,7 @@ def rm(): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for doc_id in doc_ids: | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible4deletion(doc_id, current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | root_folder = FileService.get_root_folder(current_user.id) | 
		
	
		
			
			|  |  |  | pf_id = root_folder["id"] | 
		
	
	
		
			
			|  |  | @@ -347,8 +309,7 @@ def rm(): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | TaskService.filter_delete([Task.doc_id == doc_id]) | 
		
	
		
			
			|  |  |  | if not DocumentService.remove_document(doc, tenant_id): | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Database error (Document removal)!") | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Database error (Document removal)!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | f2d = File2DocumentService.get_by_document_id(doc_id) | 
		
	
		
			
			|  |  |  | deleted_file_count = 0 | 
		
	
	
		
			
			|  |  | @@ -376,18 +337,14 @@ def rm(): | 
		
	
		
			
			|  |  |  | return get_json_result(data=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/run', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/run", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("doc_ids", "run") | 
		
	
		
			
			|  |  |  | def run(): | 
		
	
		
			
			|  |  |  | def run(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | for doc_id in req["doc_ids"]: | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible(doc_id, current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | kb_table_num_map = {} | 
		
	
		
			
			|  |  |  | for id in req["doc_ids"]: | 
		
	
	
		
			
			|  |  | @@ -421,7 +378,7 @@ def run(): | 
		
	
		
			
			|  |  |  | if kb_id not in kb_table_num_map: | 
		
	
		
			
			|  |  |  | count = DocumentService.count_by_kb_id(kb_id=kb_id, keywords="", run_status=[TaskStatus.DONE], types=[]) | 
		
	
		
			
			|  |  |  | kb_table_num_map[kb_id] = count | 
		
	
		
			
			|  |  |  | if kb_table_num_map[kb_id] <=0: | 
		
	
		
			
			|  |  |  | if kb_table_num_map[kb_id] <= 0: | 
		
	
		
			
			|  |  |  | KnowledgebaseService.delete_field_map(kb_id) | 
		
	
		
			
			|  |  |  | bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"]) | 
		
	
		
			
			|  |  |  | queue_tasks(doc, bucket, name, 0) | 
		
	
	
		
			
			|  |  | @@ -431,36 +388,25 @@ def run(): | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/rename', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/rename", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("doc_id", "name") | 
		
	
		
			
			|  |  |  | def rename(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible(req["doc_id"], current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | e, doc = DocumentService.get_by_id(req["doc_id"]) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Document not found!") | 
		
	
		
			
			|  |  |  | if pathlib.Path(req["name"].lower()).suffix != pathlib.Path( | 
		
	
		
			
			|  |  |  | doc.name.lower()).suffix: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message="The extension of file can't be changed", | 
		
	
		
			
			|  |  |  | code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): | 
		
	
		
			
			|  |  |  | if d.name == req["name"]: | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Duplicated document name in the same knowledgebase.") | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Duplicated document name in the same knowledgebase.") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if not DocumentService.update_by_id( | 
		
	
		
			
			|  |  |  | req["doc_id"], {"name": req["name"]}): | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Database error (Document rename)!") | 
		
	
		
			
			|  |  |  | if not DocumentService.update_by_id(req["doc_id"], {"name": req["name"]}): | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Database error (Document rename)!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | informs = File2DocumentService.get_by_document_id(req["doc_id"]) | 
		
	
		
			
			|  |  |  | if informs: | 
		
	
	
		
			
			|  |  | @@ -472,7 +418,7 @@ def rename(): | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/get/<doc_id>', methods=['GET'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/get/<doc_id>", methods=["GET"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | # @login_required | 
		
	
		
			
			|  |  |  | def get(doc_id): | 
		
	
		
			
			|  |  |  | try: | 
		
	
	
		
			
			|  |  | @@ -486,29 +432,22 @@ def get(doc_id): | 
		
	
		
			
			|  |  |  | ext = re.search(r"\.([^.]+)$", doc.name) | 
		
	
		
			
			|  |  |  | if ext: | 
		
	
		
			
			|  |  |  | if doc.type == FileType.VISUAL.value: | 
		
	
		
			
			|  |  |  | response.headers.set('Content-Type', 'image/%s' % ext.group(1)) | 
		
	
		
			
			|  |  |  | response.headers.set("Content-Type", "image/%s" % ext.group(1)) | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | response.headers.set( | 
		
	
		
			
			|  |  |  | 'Content-Type', | 
		
	
		
			
			|  |  |  | 'application/%s' % | 
		
	
		
			
			|  |  |  | ext.group(1)) | 
		
	
		
			
			|  |  |  | response.headers.set("Content-Type", "application/%s" % ext.group(1)) | 
		
	
		
			
			|  |  |  | return response | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/change_parser', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/change_parser", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("doc_id", "parser_id") | 
		
	
		
			
			|  |  |  | def change_parser(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible(req["doc_id"], current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | e, doc = DocumentService.get_by_id(req["doc_id"]) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
	
		
			
			|  |  | @@ -520,21 +459,16 @@ def change_parser(): | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | return get_json_result(data=True) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if ((doc.type == FileType.VISUAL and req["parser_id"] != "picture") | 
		
	
		
			
			|  |  |  | or (re.search( | 
		
	
		
			
			|  |  |  | r"\.(ppt|pptx|pages)$", doc.name) and req["parser_id"] != "presentation")): | 
		
	
		
			
			|  |  |  | if (doc.type == FileType.VISUAL and req["parser_id"] != "picture") or (re.search(r"\.(ppt|pptx|pages)$", doc.name) and req["parser_id"] != "presentation"): | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Not supported yet!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | e = DocumentService.update_by_id(doc.id, | 
		
	
		
			
			|  |  |  | {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", | 
		
	
		
			
			|  |  |  | "run": TaskStatus.UNSTART.value}) | 
		
	
		
			
			|  |  |  | e = DocumentService.update_by_id(doc.id, {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", "run": TaskStatus.UNSTART.value}) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Document not found!") | 
		
	
		
			
			|  |  |  | if "parser_config" in req: | 
		
	
		
			
			|  |  |  | DocumentService.update_parser_config(doc.id, req["parser_config"]) | 
		
	
		
			
			|  |  |  | if doc.token_num > 0: | 
		
	
		
			
			|  |  |  | e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, | 
		
	
		
			
			|  |  |  | doc.process_duation * -1) | 
		
	
		
			
			|  |  |  | e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, doc.process_duation * -1) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Document not found!") | 
		
	
		
			
			|  |  |  | tenant_id = DocumentService.get_tenant_id(req["doc_id"]) | 
		
	
	
		
			
			|  |  | @@ -548,7 +482,7 @@ def change_parser(): | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/image/<image_id>', methods=['GET'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/image/<image_id>", methods=["GET"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | # @login_required | 
		
	
		
			
			|  |  |  | def get_image(image_id): | 
		
	
		
			
			|  |  |  | try: | 
		
	
	
		
			
			|  |  | @@ -557,53 +491,46 @@ def get_image(image_id): | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Image not found.") | 
		
	
		
			
			|  |  |  | bkt, nm = image_id.split("-") | 
		
	
		
			
			|  |  |  | response = flask.make_response(STORAGE_IMPL.get(bkt, nm)) | 
		
	
		
			
			|  |  |  | response.headers.set('Content-Type', 'image/JPEG') | 
		
	
		
			
			|  |  |  | response.headers.set("Content-Type", "image/JPEG") | 
		
	
		
			
			|  |  |  | return response | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return server_error_response(e) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/upload_and_parse', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/upload_and_parse", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("conversation_id") | 
		
	
		
			
			|  |  |  | def upload_and_parse(): | 
		
	
		
			
			|  |  |  | if 'file' not in request.files: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if "file" not in request.files: | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No file part!", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | file_objs = request.files.getlist('file') | 
		
	
		
			
			|  |  |  | file_objs = request.files.getlist("file") | 
		
	
		
			
			|  |  |  | for file_obj in file_objs: | 
		
	
		
			
			|  |  |  | if file_obj.filename == '': | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='No file selected!', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if file_obj.filename == "": | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | doc_ids = doc_upload_and_parse(request.form.get("conversation_id"), file_objs, current_user.id) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return get_json_result(data=doc_ids) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/parse', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/parse", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | def parse(): | 
		
	
		
			
			|  |  |  | url = request.json.get("url") if request.json else "" | 
		
	
		
			
			|  |  |  | if url: | 
		
	
		
			
			|  |  |  | if not is_valid_url(url): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='The URL format is invalid', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="The URL format is invalid", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | download_path = os.path.join(get_project_base_directory(), "logs/downloads") | 
		
	
		
			
			|  |  |  | os.makedirs(download_path, exist_ok=True) | 
		
	
		
			
			|  |  |  | from seleniumwire.webdriver import Chrome, ChromeOptions | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | options = ChromeOptions() | 
		
	
		
			
			|  |  |  | options.add_argument('--headless') | 
		
	
		
			
			|  |  |  | options.add_argument('--disable-gpu') | 
		
	
		
			
			|  |  |  | options.add_argument('--no-sandbox') | 
		
	
		
			
			|  |  |  | options.add_argument('--disable-dev-shm-usage') | 
		
	
		
			
			|  |  |  | options.add_experimental_option('prefs', { | 
		
	
		
			
			|  |  |  | 'download.default_directory': download_path, | 
		
	
		
			
			|  |  |  | 'download.prompt_for_download': False, | 
		
	
		
			
			|  |  |  | 'download.directory_upgrade': True, | 
		
	
		
			
			|  |  |  | 'safebrowsing.enabled': True | 
		
	
		
			
			|  |  |  | }) | 
		
	
		
			
			|  |  |  | options.add_argument("--headless") | 
		
	
		
			
			|  |  |  | options.add_argument("--disable-gpu") | 
		
	
		
			
			|  |  |  | options.add_argument("--no-sandbox") | 
		
	
		
			
			|  |  |  | options.add_argument("--disable-dev-shm-usage") | 
		
	
		
			
			|  |  |  | options.add_experimental_option("prefs", {"download.default_directory": download_path, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True}) | 
		
	
		
			
			|  |  |  | driver = Chrome(options=options) | 
		
	
		
			
			|  |  |  | driver.get(url) | 
		
	
		
			
			|  |  |  | res_headers = [r.response.headers for r in driver.requests if r and r.response] | 
		
	
	
		
			
			|  |  | @@ -626,51 +553,41 @@ def parse(): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | r = re.search(r"filename=\"([^\"]+)\"", str(res_headers)) | 
		
	
		
			
			|  |  |  | if not r or not r.group(1): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message="Can't not identify downloaded file", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="Can't not identify downloaded file", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | f = File(r.group(1), os.path.join(download_path, r.group(1))) | 
		
	
		
			
			|  |  |  | txt = FileService.parse_docs([f], current_user.id) | 
		
	
		
			
			|  |  |  | return get_json_result(data=txt) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if 'file' not in request.files: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='No file part!', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if "file" not in request.files: | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No file part!", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | file_objs = request.files.getlist('file') | 
		
	
		
			
			|  |  |  | file_objs = request.files.getlist("file") | 
		
	
		
			
			|  |  |  | txt = FileService.parse_docs(file_objs, current_user.id) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return get_json_result(data=txt) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @manager.route('/set_meta', methods=['POST'])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @manager.route("/set_meta", methods=["POST"])  # noqa: F821 | 
		
	
		
			
			|  |  |  | @login_required | 
		
	
		
			
			|  |  |  | @validate_request("doc_id", "meta") | 
		
	
		
			
			|  |  |  | def set_meta(): | 
		
	
		
			
			|  |  |  | req = request.json | 
		
	
		
			
			|  |  |  | if not DocumentService.accessible(req["doc_id"], current_user.id): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, | 
		
	
		
			
			|  |  |  | message='No authorization.', | 
		
	
		
			
			|  |  |  | code=settings.RetCode.AUTHENTICATION_ERROR | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | meta = json.loads(req["meta"]) | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message=f'Json syntax error: {e}', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message=f"Json syntax error: {e}", code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | if not isinstance(meta, dict): | 
		
	
		
			
			|  |  |  | return get_json_result( | 
		
	
		
			
			|  |  |  | data=False, message='Meta data should be in Json map format, like {"key": "value"}', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | return get_json_result(data=False, message='Meta data should be in Json map format, like {"key": "value"}', code=settings.RetCode.ARGUMENT_ERROR) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | e, doc = DocumentService.get_by_id(req["doc_id"]) | 
		
	
		
			
			|  |  |  | if not e: | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Document not found!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if not DocumentService.update_by_id( | 
		
	
		
			
			|  |  |  | req["doc_id"], {"meta_fields": meta}): | 
		
	
		
			
			|  |  |  | return get_data_error_result( | 
		
	
		
			
			|  |  |  | message="Database error (meta updates)!") | 
		
	
		
			
			|  |  |  | if not DocumentService.update_by_id(req["doc_id"], {"meta_fields": meta}): | 
		
	
		
			
			|  |  |  | return get_data_error_result(message="Database error (meta updates)!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return get_json_result(data=True) | 
		
	
		
			
			|  |  |  | except Exception as e: |