### What problem does this PR solve? Adds the api of listing documentation. ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.8.0
| @@ -13,13 +13,17 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import re | |||
| import warnings | |||
| from flask import request | |||
| from flask_login import login_required, current_user | |||
| from httpx import HTTPError | |||
| from api.contants import NAME_LENGTH_LIMIT | |||
| from api.db import FileSource, StatusEnum | |||
| from api.db import FileType, ParserType, FileSource | |||
| from api.db import StatusEnum | |||
| from api.db.db_models import File | |||
| from api.db.services import duplicate_name | |||
| from api.db.services.document_service import DocumentService | |||
| @@ -29,8 +33,12 @@ from api.db.services.knowledgebase_service import KnowledgebaseService | |||
| from api.db.services.user_service import TenantService | |||
| from api.settings import RetCode | |||
| from api.utils import get_uuid | |||
| from api.utils.api_utils import construct_json_result, construct_result, construct_error_response, validate_request | |||
| from api.utils.api_utils import construct_json_result, construct_error_response | |||
| from api.utils.api_utils import construct_result, validate_request | |||
| from api.utils.file_utils import filename_type, thumbnail | |||
| from rag.utils.minio_conn import MINIO | |||
| MAXIMUM_OF_UPLOADING_FILES = 256 | |||
| # ------------------------------ create a dataset --------------------------------------- | |||
| @@ -253,3 +261,216 @@ def update_dataset(dataset_id): | |||
| return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS) | |||
| except Exception as e: | |||
| return construct_error_response(e) | |||
| # --------------------------------content management ---------------------------------------------- | |||
| # ----------------------------upload files----------------------------------------------------- | |||
| @manager.route('/<dataset_id>/documents/', methods=['POST']) | |||
| @login_required | |||
| def upload_documents(dataset_id): | |||
| # no files | |||
| if not request.files: | |||
| return construct_json_result( | |||
| message='There is no file!', code=RetCode.ARGUMENT_ERROR) | |||
| # the number of uploading files exceeds the limit | |||
| file_objs = request.files.getlist('file') | |||
| num_file_objs = len(file_objs) | |||
| if num_file_objs > MAXIMUM_OF_UPLOADING_FILES: | |||
| return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, " | |||
| f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}") | |||
| for file_obj in file_objs: | |||
| # the content of the file | |||
| file_content = file_obj.read() | |||
| file_name = file_obj.filename | |||
| # no name | |||
| if not file_name: | |||
| return construct_json_result( | |||
| message='There is a file without name!', code=RetCode.ARGUMENT_ERROR) | |||
| # TODO: support the remote files | |||
| if 'http' in file_name: | |||
| return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.") | |||
| # the content is empty, raising a warning | |||
| if file_content == b'': | |||
| warnings.warn(f"[WARNING]: The file {file_name} is empty.") | |||
| # no dataset | |||
| exist, dataset = KnowledgebaseService.get_by_id(dataset_id) | |||
| if not exist: | |||
| return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR) | |||
| # get the root_folder | |||
| root_folder = FileService.get_root_folder(current_user.id) | |||
| # get the id of the root_folder | |||
| parent_file_id = root_folder["id"] # document id | |||
| # this is for the new user, create '.knowledgebase' file | |||
| FileService.init_knowledgebase_docs(parent_file_id, current_user.id) | |||
| # go inside this folder, get the kb_root_folder | |||
| kb_root_folder = FileService.get_kb_folder(current_user.id) | |||
| # link the file management to the kb_folder | |||
| kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"]) | |||
| # grab all the errs | |||
| err = [] | |||
| MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) | |||
| uploaded_docs_json = [] | |||
| for file in file_objs: | |||
| try: | |||
| # TODO: get this value from the database as some tenants have this limit while others don't | |||
| if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER: | |||
| return construct_json_result(code=RetCode.DATA_ERROR, | |||
| message="Exceed the maximum file number of a free user!") | |||
| # deal with the duplicate name | |||
| filename = duplicate_name( | |||
| DocumentService.query, | |||
| name=file.filename, | |||
| kb_id=dataset.id) | |||
| # deal with the unsupported type | |||
| filetype = filename_type(filename) | |||
| if filetype == FileType.OTHER.value: | |||
| return construct_json_result(code=RetCode.DATA_ERROR, | |||
| message="This type of file has not been supported yet!") | |||
| # upload to the minio | |||
| location = filename | |||
| while MINIO.obj_exist(dataset_id, location): | |||
| location += "_" | |||
| blob = file.read() | |||
| MINIO.put(dataset_id, location, blob) | |||
| doc = { | |||
| "id": get_uuid(), | |||
| "kb_id": dataset.id, | |||
| "parser_id": dataset.parser_id, | |||
| "parser_config": dataset.parser_config, | |||
| "created_by": current_user.id, | |||
| "type": filetype, | |||
| "name": filename, | |||
| "location": location, | |||
| "size": len(blob), | |||
| "thumbnail": thumbnail(filename, blob) | |||
| } | |||
| if doc["type"] == FileType.VISUAL: | |||
| doc["parser_id"] = ParserType.PICTURE.value | |||
| if re.search(r"\.(ppt|pptx|pages)$", filename): | |||
| doc["parser_id"] = ParserType.PRESENTATION.value | |||
| DocumentService.insert(doc) | |||
| FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id) | |||
| uploaded_docs_json.append(doc) | |||
| except Exception as e: | |||
| err.append(file.filename + ": " + str(e)) | |||
| if err: | |||
| # return all the errors | |||
| return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR) | |||
| # success | |||
| return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS) | |||
| # ----------------------------delete a file----------------------------------------------------- | |||
| @manager.route('/<dataset_id>/documents/<document_id>', methods=['DELETE']) | |||
| @login_required | |||
| def delete_document(document_id, dataset_id): # string | |||
| # get the root folder | |||
| root_folder = FileService.get_root_folder(current_user.id) | |||
| # parent file's id | |||
| parent_file_id = root_folder["id"] | |||
| # consider the new user | |||
| FileService.init_knowledgebase_docs(parent_file_id, current_user.id) | |||
| # store all the errors that may have | |||
| errors = "" | |||
| try: | |||
| # whether there is this document | |||
| exist, doc = DocumentService.get_by_id(document_id) | |||
| if not exist: | |||
| return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR) | |||
| # whether this doc is authorized by this tenant | |||
| tenant_id = DocumentService.get_tenant_id(document_id) | |||
| if not tenant_id: | |||
| return construct_json_result( | |||
| message=f"You cannot delete this document {document_id} due to the authorization" | |||
| f" reason!", code=RetCode.AUTHENTICATION_ERROR) | |||
| # get the doc's id and location | |||
| real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id) | |||
| if real_dataset_id != dataset_id: | |||
| return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, " | |||
| f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR) | |||
| # there is an issue when removing | |||
| if not DocumentService.remove_document(doc, tenant_id): | |||
| return construct_json_result( | |||
| message="There was an error during the document removal process. Please check the status of the " | |||
| "RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR) | |||
| # fetch the File2Document record associated with the provided document ID. | |||
| file_to_doc = File2DocumentService.get_by_document_id(document_id) | |||
| # delete the associated File record. | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id]) | |||
| # delete the File2Document record itself using the document ID. This removes the | |||
| # association between the document and the file after the File record has been deleted. | |||
| File2DocumentService.delete_by_document_id(document_id) | |||
| # delete it from minio | |||
| MINIO.rm(dataset_id, location) | |||
| except Exception as e: | |||
| errors += str(e) | |||
| if errors: | |||
| return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR) | |||
| return construct_json_result(data=True, code=RetCode.SUCCESS) | |||
| # ----------------------------list files----------------------------------------------------- | |||
| @manager.route('/<dataset_id>/documents/', methods=['GET']) | |||
| @login_required | |||
| def list_documents(dataset_id): | |||
| if not dataset_id: | |||
| return construct_json_result( | |||
| data=False, message='Lack of "dataset_id"', code=RetCode.ARGUMENT_ERROR) | |||
| # searching keywords | |||
| keywords = request.args.get("keywords", "") | |||
| offset = request.args.get("offset", 0) | |||
| count = request.args.get("count", -1) | |||
| order_by = request.args.get("order_by", "create_time") | |||
| descend = request.args.get("descend", True) | |||
| try: | |||
| docs, total = DocumentService.list_documents_in_dataset(dataset_id, int(offset), int(count), order_by, | |||
| descend, keywords) | |||
| return construct_json_result(data={"total": total, "docs": docs}, message=RetCode.SUCCESS) | |||
| except Exception as e: | |||
| return construct_error_response(e) | |||
| # ----------------------------download a file----------------------------------------------------- | |||
| # ----------------------------enable rename----------------------------------------------------- | |||
| # ----------------------------start parsing----------------------------------------------------- | |||
| # ----------------------------stop parsing----------------------------------------------------- | |||
| # ----------------------------show the status of the file----------------------------------------------------- | |||
| # ----------------------------list the chunks of the file----------------------------------------------------- | |||
| # ----------------------------delete the chunk----------------------------------------------------- | |||
| # ----------------------------edit the status of the chunk----------------------------------------------------- | |||
| # ----------------------------insert a new chunk----------------------------------------------------- | |||
| # ----------------------------upload a file----------------------------------------------------- | |||
| # ----------------------------get a specific chunk----------------------------------------------------- | |||
| # ----------------------------retrieval test----------------------------------------------------- | |||
| @@ -1,228 +0,0 @@ | |||
| # | |||
| # Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License | |||
| # | |||
| import os | |||
| import re | |||
| import warnings | |||
| from flask import request | |||
| from flask_login import login_required, current_user | |||
| from api.db import FileType, ParserType | |||
| from api.db.services import duplicate_name | |||
| from api.db.services.document_service import DocumentService | |||
| from api.db.services.file2document_service import File2DocumentService | |||
| from api.db.services.file_service import FileService | |||
| from api.db.services.knowledgebase_service import KnowledgebaseService | |||
| from api.settings import RetCode | |||
| from api.utils import get_uuid | |||
| from api.utils.api_utils import construct_json_result | |||
| from api.utils.file_utils import filename_type, thumbnail | |||
| from rag.utils.minio_conn import MINIO | |||
| from api.db.db_models import Task, File | |||
| from api.db import FileType, TaskStatus, ParserType, FileSource | |||
| MAXIMUM_OF_UPLOADING_FILES = 256 | |||
| # ----------------------------upload local files----------------------------------------------------- | |||
| @manager.route('/<dataset_id>', methods=['POST']) | |||
| @login_required | |||
| def upload(dataset_id): | |||
| # no files | |||
| if not request.files: | |||
| return construct_json_result( | |||
| message='There is no file!', code=RetCode.ARGUMENT_ERROR) | |||
| # the number of uploading files exceeds the limit | |||
| file_objs = request.files.getlist('file') | |||
| num_file_objs = len(file_objs) | |||
| if num_file_objs > MAXIMUM_OF_UPLOADING_FILES: | |||
| return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, " | |||
| f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}") | |||
| for file_obj in file_objs: | |||
| # the content of the file | |||
| file_content = file_obj.read() | |||
| file_name = file_obj.filename | |||
| # no name | |||
| if not file_name: | |||
| return construct_json_result( | |||
| message='There is a file without name!', code=RetCode.ARGUMENT_ERROR) | |||
| # TODO: support the remote files | |||
| if 'http' in file_name: | |||
| return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.") | |||
| # the content is empty, raising a warning | |||
| if file_content == b'': | |||
| warnings.warn(f"[WARNING]: The file {file_name} is empty.") | |||
| # no dataset | |||
| exist, dataset = KnowledgebaseService.get_by_id(dataset_id) | |||
| if not exist: | |||
| return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR) | |||
| # get the root_folder | |||
| root_folder = FileService.get_root_folder(current_user.id) | |||
| # get the id of the root_folder | |||
| parent_file_id = root_folder["id"] # document id | |||
| # this is for the new user, create '.knowledgebase' file | |||
| FileService.init_knowledgebase_docs(parent_file_id, current_user.id) | |||
| # go inside this folder, get the kb_root_folder | |||
| kb_root_folder = FileService.get_kb_folder(current_user.id) | |||
| # link the file management to the kb_folder | |||
| kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"]) | |||
| # grab all the errs | |||
| err = [] | |||
| MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) | |||
| uploaded_docs_json = [] | |||
| for file in file_objs: | |||
| try: | |||
| # TODO: get this value from the database as some tenants have this limit while others don't | |||
| if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER: | |||
| return construct_json_result(code=RetCode.DATA_ERROR, | |||
| message="Exceed the maximum file number of a free user!") | |||
| # deal with the duplicate name | |||
| filename = duplicate_name( | |||
| DocumentService.query, | |||
| name=file.filename, | |||
| kb_id=dataset.id) | |||
| # deal with the unsupported type | |||
| filetype = filename_type(filename) | |||
| if filetype == FileType.OTHER.value: | |||
| return construct_json_result(code=RetCode.DATA_ERROR, | |||
| message="This type of file has not been supported yet!") | |||
| # upload to the minio | |||
| location = filename | |||
| while MINIO.obj_exist(dataset_id, location): | |||
| location += "_" | |||
| blob = file.read() | |||
| MINIO.put(dataset_id, location, blob) | |||
| doc = { | |||
| "id": get_uuid(), | |||
| "kb_id": dataset.id, | |||
| "parser_id": dataset.parser_id, | |||
| "parser_config": dataset.parser_config, | |||
| "created_by": current_user.id, | |||
| "type": filetype, | |||
| "name": filename, | |||
| "location": location, | |||
| "size": len(blob), | |||
| "thumbnail": thumbnail(filename, blob) | |||
| } | |||
| if doc["type"] == FileType.VISUAL: | |||
| doc["parser_id"] = ParserType.PICTURE.value | |||
| if re.search(r"\.(ppt|pptx|pages)$", filename): | |||
| doc["parser_id"] = ParserType.PRESENTATION.value | |||
| DocumentService.insert(doc) | |||
| FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id) | |||
| uploaded_docs_json.append(doc) | |||
| except Exception as e: | |||
| err.append(file.filename + ": " + str(e)) | |||
| if err: | |||
| # return all the errors | |||
| return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR) | |||
| # success | |||
| return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS) | |||
| # ----------------------------delete a file----------------------------------------------------- | |||
| @manager.route('/<dataset_id>/<document_id>', methods=['DELETE']) | |||
| @login_required | |||
| def delete(document_id, dataset_id): # string | |||
| # get the root folder | |||
| root_folder = FileService.get_root_folder(current_user.id) | |||
| # parent file's id | |||
| parent_file_id = root_folder["id"] | |||
| # consider the new user | |||
| FileService.init_knowledgebase_docs(parent_file_id, current_user.id) | |||
| # store all the errors that may have | |||
| errors = "" | |||
| try: | |||
| # whether there is this document | |||
| exist, doc = DocumentService.get_by_id(document_id) | |||
| if not exist: | |||
| return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR) | |||
| # whether this doc is authorized by this tenant | |||
| tenant_id = DocumentService.get_tenant_id(document_id) | |||
| if not tenant_id: | |||
| return construct_json_result(message=f"You cannot delete this document {document_id} due to the authorization" | |||
| f" reason!", code=RetCode.AUTHENTICATION_ERROR) | |||
| # get the doc's id and location | |||
| real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id) | |||
| if real_dataset_id != dataset_id: | |||
| return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, " | |||
| f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR) | |||
| # there is an issue when removing | |||
| if not DocumentService.remove_document(doc, tenant_id): | |||
| return construct_json_result( | |||
| message="There was an error during the document removal process. Please check the status of the " | |||
| "RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR) | |||
| # fetch the File2Document record associated with the provided document ID. | |||
| file_to_doc = File2DocumentService.get_by_document_id(document_id) | |||
| # delete the associated File record. | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id]) | |||
| # delete the File2Document record itself using the document ID. This removes the | |||
| # association between the document and the file after the File record has been deleted. | |||
| File2DocumentService.delete_by_document_id(document_id) | |||
| # delete it from minio | |||
| MINIO.rm(dataset_id, location) | |||
| except Exception as e: | |||
| errors += str(e) | |||
| if errors: | |||
| return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR) | |||
| return construct_json_result(data=True, code=RetCode.SUCCESS) | |||
| # ----------------------------upload online files------------------------------------------------ | |||
| # ----------------------------download a file----------------------------------------------------- | |||
| # ----------------------------enable rename----------------------------------------------------- | |||
| # ----------------------------list files----------------------------------------------------- | |||
| # ----------------------------start parsing----------------------------------------------------- | |||
| # ----------------------------stop parsing----------------------------------------------------- | |||
| # ----------------------------show the status of the file----------------------------------------------------- | |||
| # ----------------------------list the chunks of the file----------------------------------------------------- | |||
| # ----------------------------delete the chunk----------------------------------------------------- | |||
| # ----------------------------edit the status of the chunk----------------------------------------------------- | |||
| # ----------------------------insert a new chunk----------------------------------------------------- | |||
| # ----------------------------upload a file----------------------------------------------------- | |||
| # ----------------------------get a specific chunk----------------------------------------------------- | |||
| # ----------------------------retrieval test----------------------------------------------------- | |||
| @@ -59,6 +59,35 @@ class DocumentService(CommonService): | |||
| return list(docs.dicts()), count | |||
| @classmethod | |||
| @DB.connection_context() | |||
| def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords): | |||
| if keywords: | |||
| docs = cls.model.select().where( | |||
| (cls.model.kb_id == dataset_id), | |||
| (fn.LOWER(cls.model.name).contains(keywords.lower())) | |||
| ) | |||
| else: | |||
| docs = cls.model.select().where(cls.model.kb_id == dataset_id) | |||
| total = docs.count() | |||
| if descend == 'True': | |||
| docs = docs.order_by(cls.model.getter_by(order_by).desc()) | |||
| if descend == 'False': | |||
| docs = docs.order_by(cls.model.getter_by(order_by).asc()) | |||
| docs = list(docs.dicts()) | |||
| docs_length = len(docs) | |||
| if offset < 0 or offset > docs_length: | |||
| raise IndexError("Offset is out of the valid range.") | |||
| if count == -1: | |||
| return docs[offset:], total | |||
| return docs[offset:offset + count], total | |||
| @classmethod | |||
| @DB.connection_context() | |||
| def insert(cls, doc): | |||
| @@ -60,6 +60,9 @@ class KnowledgebaseService(CommonService): | |||
| if offset < 0 or offset > kbs_length: | |||
| raise IndexError("Offset is out of the valid range.") | |||
| if count == -1: | |||
| return kbs[offset:] | |||
| return kbs[offset:offset+count] | |||
| @classmethod | |||
| @@ -274,4 +274,6 @@ You are required to input at least one parameter. | |||
| "code": 102, | |||
| "message": "Please input at least one parameter that you want to update!" | |||
| } | |||
| ``` | |||
| ``` | |||
| @@ -26,12 +26,11 @@ class RAGFlow: | |||
| ''' | |||
| api_url: http://<host_address>/api/v1 | |||
| dataset_url: http://<host_address>/api/v1/dataset | |||
| document_url: http://<host_address>/api/v1/documents | |||
| document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents | |||
| ''' | |||
| self.user_key = user_key | |||
| self.api_url = f"{base_url}/api/{version}" | |||
| self.dataset_url = f"{self.api_url}/dataset" | |||
| self.document_url = f"{self.api_url}/documents" | |||
| self.authorization_header = {"Authorization": "{}".format(self.user_key)} | |||
| def create_dataset(self, dataset_name): | |||
| @@ -79,7 +78,7 @@ class RAGFlow: | |||
| response = requests.put(endpoint, json=params, headers=self.authorization_header) | |||
| return response.json() | |||
| # -------------------- content management ----------------------------------------------------- | |||
| # -------------------- content management ----------------------------------------------------- | |||
| # ----------------------------upload local files----------------------------------------------------- | |||
| def upload_local_file(self, dataset_id, file_paths): | |||
| @@ -95,7 +94,7 @@ class RAGFlow: | |||
| else: | |||
| return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"} | |||
| res = requests.request('POST', url=f"{self.document_url}/{dataset_id}", files=files, | |||
| res = requests.request('POST', url=f"{self.dataset_url}/{dataset_id}/documents", files=files, | |||
| headers=self.authorization_header) | |||
| result_dict = json.loads(res.text) | |||
| @@ -103,16 +102,27 @@ class RAGFlow: | |||
| # ----------------------------delete a file----------------------------------------------------- | |||
| def delete_files(self, document_id, dataset_id): | |||
| endpoint = f"{self.document_url}/{dataset_id}/{document_id}" | |||
| endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}" | |||
| res = requests.delete(endpoint, headers=self.authorization_header) | |||
| return res.json() | |||
| # ----------------------------list files----------------------------------------------------- | |||
| def list_files(self, dataset_id, offset=0, count=-1, order_by="create_time", descend=True, keywords=""): | |||
| params = { | |||
| "offset": offset, | |||
| "count": count, | |||
| "order_by": order_by, | |||
| "descend": descend, | |||
| "keywords": keywords | |||
| } | |||
| endpoint = f"{self.dataset_url}/{dataset_id}/documents/" | |||
| res = requests.get(endpoint, params=params, headers=self.authorization_header) | |||
| return res.json() | |||
| # ----------------------------download a file----------------------------------------------------- | |||
| # ----------------------------enable rename----------------------------------------------------- | |||
| # ----------------------------list files----------------------------------------------------- | |||
| # ----------------------------start parsing----------------------------------------------------- | |||
| # ----------------------------stop parsing----------------------------------------------------- | |||
| @@ -37,7 +37,7 @@ class TestFile(TestSdk): | |||
| dataset_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt", "test_data/test1.txt"] | |||
| res = ragflow.upload_local_file(dataset_id, file_paths) | |||
| assert res['code'] == RetCode.SUCCESS and res['data'] is True and res['message'] == 'success' | |||
| assert res['code'] == RetCode.SUCCESS and res['message'] == 'success' | |||
| def test_upload_one_file(self): | |||
| """ | |||
| @@ -48,7 +48,7 @@ class TestFile(TestSdk): | |||
| dataset_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt"] | |||
| res = ragflow.upload_local_file(dataset_id, file_paths) | |||
| assert res['code'] == RetCode.SUCCESS and res['data'] is True and res['message'] == 'success' | |||
| assert res['code'] == RetCode.SUCCESS and res['message'] == 'success' | |||
| def test_upload_nonexistent_files(self): | |||
| """ | |||
| @@ -237,12 +237,143 @@ class TestFile(TestSdk): | |||
| assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] == | |||
| f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.') | |||
| # ----------------------------list files----------------------------------------------------- | |||
| def test_list_documents_with_success(self): | |||
| """ | |||
| Test listing documents with a successful outcome. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| # upload a document | |||
| created_res = ragflow.create_dataset("test_list_documents_with_success") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt"] | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1 | |||
| def test_list_documents_with_checking_size(self): | |||
| """ | |||
| Test listing documents and verify the size and names of the documents. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| # upload 10 documents | |||
| created_res = ragflow.create_dataset("test_list_documents_with_checking_size") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt"] * 10 | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10 | |||
| def test_list_documents_with_getting_empty_result(self): | |||
| """ | |||
| Test listing documents that should be empty. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| # upload 0 documents | |||
| created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0 | |||
| def test_list_documents_with_creating_100_documents(self): | |||
| """ | |||
| Test listing 100 documents and verify the size of these documents. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| # upload 100 documents | |||
| created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt"] * 100 | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100 | |||
| def test_list_document_with_failure(self): | |||
| """ | |||
| Test listing documents with IndexError. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| created_res = ragflow.create_dataset("test_list_document_with_failure") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| response = ragflow.list_files(created_res_id, offset=-1, count=-1) | |||
| assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR | |||
| def test_list_document_with_verifying_offset_and_count(self): | |||
| """ | |||
| Test listing documents with verifying the functionalities of offset and count. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10 | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id, offset=2, count=10) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10 | |||
| def test_list_document_with_verifying_keywords(self): | |||
| """ | |||
| Test listing documents with verifying the functionality of searching keywords. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt", "test_data/empty.txt"] | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id, keywords="empty") | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1 | |||
| def test_list_document_with_verifying_order_by_and_descend(self): | |||
| """ | |||
| Test listing documents with verifying the functionality of order_by and descend. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt", "test_data/empty.txt"] | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2 | |||
| docs = response['data']['docs'] | |||
| # reverse | |||
| i = 1 | |||
| for doc in docs: | |||
| assert doc['name'] in file_paths[i] | |||
| i -= 1 | |||
| def test_list_document_with_verifying_order_by_and_ascend(self): | |||
| """ | |||
| Test listing documents with verifying the functionality of order_by and ascend. | |||
| """ | |||
| ragflow = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend") | |||
| created_res_id = created_res['data']['dataset_id'] | |||
| file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"] | |||
| ragflow.upload_local_file(created_res_id, file_paths) | |||
| # Call the list_document method | |||
| response = ragflow.list_files(created_res_id, descend=False) | |||
| assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3 | |||
| docs = response['data']['docs'] | |||
| i = 0 | |||
| for doc in docs: | |||
| assert doc['name'] in file_paths[i] | |||
| i += 1 | |||
| # TODO: have to set the limitation of the number of documents | |||
| # ----------------------------download a file----------------------------------------------------- | |||
| # ----------------------------enable rename----------------------------------------------------- | |||
| # ----------------------------list files----------------------------------------------------- | |||
| # ----------------------------start parsing----------------------------------------------------- | |||
| # ----------------------------stop parsing----------------------------------------------------- | |||
| @@ -257,8 +388,6 @@ class TestFile(TestSdk): | |||
| # ----------------------------insert a new chunk----------------------------------------------------- | |||
| # ----------------------------upload a file----------------------------------------------------- | |||
| # ----------------------------get a specific chunk----------------------------------------------------- | |||
| # ----------------------------retrieval test----------------------------------------------------- | |||