### What problem does this PR solve? Enhance capability of `list_docs`. Breaking change: change method from `GET` to `POST`. ### Type of change - [x] Refactoring - [x] Enhancement with breaking changetags/v0.19.0
| @@ -21,7 +21,7 @@ from flask import request, Response | |||
| from api.db.services.llm_service import TenantLLMService | |||
| from flask_login import login_required, current_user | |||
| from api.db import FileType, LLMType, ParserType, FileSource | |||
| from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, LLMType, ParserType, FileSource | |||
| from api.db.db_models import APIToken, Task, File | |||
| from api.db.services import duplicate_name | |||
| from api.db.services.api_service import APITokenService, API4ConversationService | |||
| @@ -577,10 +577,23 @@ def list_kb_docs(): | |||
| orderby = req.get("orderby", "create_time") | |||
| desc = req.get("desc", True) | |||
| keywords = req.get("keywords", "") | |||
| status = req.get("status", []) | |||
| if status: | |||
| invalid_status = {s for s in status if s not in VALID_TASK_STATUS} | |||
| if invalid_status: | |||
| return get_data_error_result( | |||
| message=f"Invalid filter status conditions: {', '.join(invalid_status)}" | |||
| ) | |||
| types = req.get("types", []) | |||
| if types: | |||
| invalid_types = {t for t in types if t not in VALID_FILE_TYPES} | |||
| if invalid_types: | |||
| return get_data_error_result( | |||
| message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}" | |||
| ) | |||
| try: | |||
| docs, tol = DocumentService.get_by_kb_id( | |||
| kb_id, page_number, items_per_page, orderby, desc, keywords) | |||
| kb_id, page_number, items_per_page, orderby, desc, keywords, status, types) | |||
| docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs] | |||
| return get_json_result(data={"total": tol, "docs": docs}) | |||
| @@ -25,7 +25,7 @@ from flask_login import login_required, current_user | |||
| from deepdoc.parser.html_parser import RAGFlowHtmlParser | |||
| from rag.nlp import search | |||
| from api.db import FileType, TaskStatus, ParserType, FileSource | |||
| from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, TaskStatus, ParserType, FileSource | |||
| from api.db.db_models import File, Task | |||
| from api.db.services.file2document_service import File2DocumentService | |||
| from api.db.services.file_service import FileService | |||
| @@ -183,7 +183,7 @@ def create(): | |||
| return server_error_response(e) | |||
| @manager.route('/list', methods=['GET']) # noqa: F821 | |||
| @manager.route('/list', methods=['POST']) # noqa: F821 | |||
| @login_required | |||
| def list_docs(): | |||
| kb_id = request.args.get("kb_id") | |||
| @@ -201,13 +201,32 @@ def list_docs(): | |||
| code=settings.RetCode.OPERATING_ERROR) | |||
| keywords = request.args.get("keywords", "") | |||
| page_number = int(request.args.get("page", 1)) | |||
| items_per_page = int(request.args.get("page_size", 15)) | |||
| page_number = int(request.args.get("page", 0)) | |||
| items_per_page = int(request.args.get("page_size", 0)) | |||
| orderby = request.args.get("orderby", "create_time") | |||
| desc = request.args.get("desc", True) | |||
| req = request.get_json() | |||
| run_status = req.get("run_status", []) | |||
| if run_status: | |||
| invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS} | |||
| if invalid_status: | |||
| return get_data_error_result( | |||
| message=f"Invalid filter run status conditions: {', '.join(invalid_status)}" | |||
| ) | |||
| types = req.get("types", []) | |||
| if types: | |||
| invalid_types = {t for t in types if t not in VALID_FILE_TYPES} | |||
| if invalid_types: | |||
| return get_data_error_result( | |||
| message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}" | |||
| ) | |||
| try: | |||
| docs, tol = DocumentService.get_by_kb_id( | |||
| kb_id, page_number, items_per_page, orderby, desc, keywords) | |||
| kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types) | |||
| for doc_item in docs: | |||
| if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): | |||
| @@ -331,7 +350,9 @@ def rm(): | |||
| message="Database error (Document removal)!") | |||
| f2d = File2DocumentService.get_by_document_id(doc_id) | |||
| deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||
| deleted_file_count = 0 | |||
| if f2d: | |||
| deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||
| File2DocumentService.delete_by_document_id(doc_id) | |||
| if deleted_file_count > 0: | |||
| STORAGE_IMPL.rm(b, n) | |||
| @@ -49,6 +49,7 @@ class FileType(StrEnum): | |||
| FOLDER = 'folder' | |||
| OTHER = "other" | |||
| VALID_FILE_TYPES = {FileType.PDF, FileType.DOC, FileType.VISUAL, FileType.AURAL, FileType.VIRTUAL, FileType.FOLDER, FileType.OTHER} | |||
| class LLMType(StrEnum): | |||
| CHAT = 'chat' | |||
| @@ -73,6 +74,7 @@ class TaskStatus(StrEnum): | |||
| DONE = "3" | |||
| FAIL = "4" | |||
| VALID_TASK_STATUS = {TaskStatus.UNSTART, TaskStatus.RUNNING, TaskStatus.CANCEL, TaskStatus.DONE, TaskStatus.FAIL} | |||
| class ParserType(StrEnum): | |||
| PRESENTATION = "presentation" | |||
| @@ -70,7 +70,7 @@ class DocumentService(CommonService): | |||
| @classmethod | |||
| @DB.connection_context() | |||
| def get_by_kb_id(cls, kb_id, page_number, items_per_page, | |||
| orderby, desc, keywords): | |||
| orderby, desc, keywords, run_status, types): | |||
| if keywords: | |||
| docs = cls.model.select().where( | |||
| (cls.model.kb_id == kb_id), | |||
| @@ -78,13 +78,21 @@ class DocumentService(CommonService): | |||
| ) | |||
| else: | |||
| docs = cls.model.select().where(cls.model.kb_id == kb_id) | |||
| if run_status: | |||
| docs = docs.where(cls.model.run.in_(run_status)) | |||
| if types: | |||
| docs = docs.where(cls.model.type.in_(types)) | |||
| count = docs.count() | |||
| if desc: | |||
| docs = docs.order_by(cls.model.getter_by(orderby).desc()) | |||
| else: | |||
| docs = docs.order_by(cls.model.getter_by(orderby).asc()) | |||
| docs = docs.paginate(page_number, items_per_page) | |||
| if page_number and items_per_page: | |||
| docs = docs.paginate(page_number, items_per_page) | |||
| return list(docs.dicts()), count | |||
| @@ -97,7 +97,7 @@ class KnowledgebaseService(CommonService): | |||
| kb = kbs[0] | |||
| # Get all documents in the knowledge base | |||
| docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "") | |||
| docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "", [], []) | |||
| # Check parsing status of each document | |||
| for doc in docs: | |||
| @@ -226,7 +226,10 @@ class KnowledgebaseService(CommonService): | |||
| cls.model.chunk_num, | |||
| cls.model.parser_id, | |||
| cls.model.parser_config, | |||
| cls.model.pagerank] | |||
| cls.model.pagerank, | |||
| cls.model.create_time, | |||
| cls.model.update_time | |||
| ] | |||
| kbs = cls.model.select(*fields).join(Tenant, on=( | |||
| (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( | |||
| (cls.model.id == kb_id), | |||
| @@ -70,7 +70,8 @@ def upload_file(auth, dataset_id, path): | |||
| def list_document(auth, dataset_id): | |||
| authorization = {"Authorization": auth} | |||
| url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}" | |||
| res = requests.get(url=url, headers=authorization) | |||
| json = {} | |||
| res = requests.post(url=url, headers=authorization, json=json) | |||
| return res.json() | |||