### What problem does this PR solve? Refactor Dataset API ### Type of change - [x] Refactoring --------- Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>tags/v0.13.0
| sys.modules[module_name] = page | sys.modules[module_name] = page | ||||
| spec.loader.exec_module(page) | spec.loader.exec_module(page) | ||||
| page_name = getattr(page, 'page_name', page_name) | page_name = getattr(page, 'page_name', page_name) | ||||
| url_prefix = f'/api/{API_VERSION}/{page_name}' if "/sdk/" in path else f'/{API_VERSION}/{page_name}' | |||||
| url_prefix = f'/api/{API_VERSION}' if "/sdk/" in path else f'/{API_VERSION}/{page_name}' | |||||
| app.register_blueprint(page.manager, url_prefix=url_prefix) | app.register_blueprint(page.manager, url_prefix=url_prefix) | ||||
| return url_prefix | return url_prefix |
| from api.db.services.user_service import TenantService | from api.db.services.user_service import TenantService | ||||
| from api.settings import RetCode | from api.settings import RetCode | ||||
| from api.utils import get_uuid | from api.utils import get_uuid | ||||
| from api.utils.api_utils import get_json_result, token_required, get_data_error_result | |||||
| from api.utils.api_utils import get_result, token_required,get_error_data_result | |||||
| @manager.route('/save', methods=['POST']) | |||||
| @manager.route('/dataset', methods=['POST']) | |||||
| @token_required | @token_required | ||||
| def save(tenant_id): | |||||
| def create(tenant_id): | |||||
| req = request.json | req = request.json | ||||
| e, t = TenantService.get_by_id(tenant_id) | e, t = TenantService.get_by_id(tenant_id) | ||||
| if "id" not in req: | |||||
| if "tenant_id" in req or "embedding_model" in req: | |||||
| return get_data_error_result( | |||||
| retmsg="Tenant_id or embedding_model must not be provided") | |||||
| if "name" not in req: | |||||
| return get_data_error_result( | |||||
| retmsg="Name is not empty!") | |||||
| req['id'] = get_uuid() | |||||
| req["name"] = req["name"].strip() | |||||
| if req["name"] == "": | |||||
| return get_data_error_result( | |||||
| retmsg="Name is not empty string!") | |||||
| if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value): | |||||
| return get_data_error_result( | |||||
| retmsg="Duplicated knowledgebase name in creating dataset.") | |||||
| req["tenant_id"] = req['created_by'] = tenant_id | |||||
| req['embedding_model'] = t.embd_id | |||||
| key_mapping = { | |||||
| "chunk_num": "chunk_count", | |||||
| "doc_num": "document_count", | |||||
| "parser_id": "parse_method", | |||||
| "embd_id": "embedding_model" | |||||
| } | |||||
| mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req} | |||||
| req.update(mapped_keys) | |||||
| if not KnowledgebaseService.save(**req): | |||||
| return get_data_error_result(retmsg="Create dataset error.(Database error)") | |||||
| renamed_data = {} | |||||
| e, k = KnowledgebaseService.get_by_id(req["id"]) | |||||
| for key, value in k.to_dict().items(): | |||||
| new_key = key_mapping.get(key, key) | |||||
| renamed_data[new_key] = value | |||||
| return get_json_result(data=renamed_data) | |||||
| else: | |||||
| invalid_keys = {"embd_id", "chunk_num", "doc_num", "parser_id"} | |||||
| if any(key in req for key in invalid_keys): | |||||
| return get_data_error_result(retmsg="The input parameters are invalid.") | |||||
| if "tenant_id" in req: | |||||
| if req["tenant_id"] != tenant_id: | |||||
| return get_data_error_result( | |||||
| retmsg="Can't change tenant_id.") | |||||
| if "embedding_model" in req: | |||||
| if req["embedding_model"] != t.embd_id: | |||||
| return get_data_error_result( | |||||
| retmsg="Can't change embedding_model.") | |||||
| req.pop("embedding_model") | |||||
| if not KnowledgebaseService.query( | |||||
| created_by=tenant_id, id=req["id"]): | |||||
| return get_json_result( | |||||
| data=False, retmsg='You do not own the dataset.', | |||||
| retcode=RetCode.OPERATING_ERROR) | |||||
| if not req["id"]: | |||||
| return get_data_error_result( | |||||
| retmsg="id can not be empty.") | |||||
| e, kb = KnowledgebaseService.get_by_id(req["id"]) | |||||
| if "chunk_count" in req: | |||||
| if req["chunk_count"] != kb.chunk_num: | |||||
| return get_data_error_result( | |||||
| retmsg="Can't change chunk_count.") | |||||
| req.pop("chunk_count") | |||||
| if "document_count" in req: | |||||
| if req['document_count'] != kb.doc_num: | |||||
| return get_data_error_result( | |||||
| retmsg="Can't change document_count.") | |||||
| req.pop("document_count") | |||||
| if "parse_method" in req: | |||||
| if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id: | |||||
| return get_data_error_result( | |||||
| retmsg="If chunk count is not 0, parse method is not changable.") | |||||
| req['parser_id'] = req.pop('parse_method') | |||||
| if "name" in req: | |||||
| req["name"] = req["name"].strip() | |||||
| if req["name"].lower() != kb.name.lower() \ | |||||
| and len(KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, | |||||
| status=StatusEnum.VALID.value)) > 0: | |||||
| return get_data_error_result( | |||||
| retmsg="Duplicated knowledgebase name in updating dataset.") | |||||
| del req["id"] | |||||
| if not KnowledgebaseService.update_by_id(kb.id, req): | |||||
| return get_data_error_result(retmsg="Update dataset error.(Database error)") | |||||
| return get_json_result(data=True) | |||||
| if "tenant_id" in req or "embedding_model" in req: | |||||
| return get_error_data_result( | |||||
| retmsg="Tenant_id or embedding_model must not be provided") | |||||
| chunk_count=req.get("chunk_count") | |||||
| document_count=req.get("document_count") | |||||
| if chunk_count or document_count: | |||||
| return get_error_data_result(retmsg="chunk_count or document_count must be 0 or not be provided") | |||||
| if "name" not in req: | |||||
| return get_error_data_result( | |||||
| retmsg="Name is not empty!") | |||||
| req['id'] = get_uuid() | |||||
| req["name"] = req["name"].strip() | |||||
| if req["name"] == "": | |||||
| return get_error_data_result( | |||||
| retmsg="Name is not empty string!") | |||||
| if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value): | |||||
| return get_error_data_result( | |||||
| retmsg="Duplicated knowledgebase name in creating dataset.") | |||||
| req["tenant_id"] = req['created_by'] = tenant_id | |||||
| req['embedding_model'] = t.embd_id | |||||
| key_mapping = { | |||||
| "chunk_num": "chunk_count", | |||||
| "doc_num": "document_count", | |||||
| "parser_id": "parse_method", | |||||
| "embd_id": "embedding_model" | |||||
| } | |||||
| mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req} | |||||
| req.update(mapped_keys) | |||||
| if not KnowledgebaseService.save(**req): | |||||
| return get_error_data_result(retmsg="Create dataset error.(Database error)") | |||||
| renamed_data = {} | |||||
| e, k = KnowledgebaseService.get_by_id(req["id"]) | |||||
| for key, value in k.to_dict().items(): | |||||
| new_key = key_mapping.get(key, key) | |||||
| renamed_data[new_key] = value | |||||
| return get_result(data=renamed_data) | |||||
| @manager.route('/delete', methods=['DELETE']) | |||||
| @manager.route('/dataset', methods=['DELETE']) | |||||
| @token_required | @token_required | ||||
| def delete(tenant_id): | def delete(tenant_id): | ||||
| req = request.args | |||||
| if "id" not in req: | |||||
| return get_data_error_result( | |||||
| retmsg="id is required") | |||||
| kbs = KnowledgebaseService.query( | |||||
| created_by=tenant_id, id=req["id"]) | |||||
| if not kbs: | |||||
| return get_json_result( | |||||
| data=False, retmsg='You do not own the dataset', | |||||
| retcode=RetCode.OPERATING_ERROR) | |||||
| for doc in DocumentService.query(kb_id=req["id"]): | |||||
| if not DocumentService.remove_document(doc, kbs[0].tenant_id): | |||||
| return get_data_error_result( | |||||
| retmsg="Remove document error.(Database error)") | |||||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||||
| File2DocumentService.delete_by_document_id(doc.id) | |||||
| if not KnowledgebaseService.delete_by_id(req["id"]): | |||||
| return get_data_error_result( | |||||
| retmsg="Delete dataset error.(Database serror)") | |||||
| return get_json_result(data=True) | |||||
| @manager.route('/list', methods=['GET']) | |||||
| req = request.json | |||||
| names=req.get("names") | |||||
| ids = req.get("ids") | |||||
| if not ids and not names: | |||||
| return get_error_data_result( | |||||
| retmsg="ids or names is required") | |||||
| id_list=[] | |||||
| if names: | |||||
| for name in names: | |||||
| kbs=KnowledgebaseService.query(name=name,tenant_id=tenant_id) | |||||
| if not kbs: | |||||
| return get_error_data_result(retmsg=f"You don't own the dataset {name}") | |||||
| id_list.append(kbs[0].id) | |||||
| if ids: | |||||
| for id in ids: | |||||
| kbs=KnowledgebaseService.query(id=id,tenant_id=tenant_id) | |||||
| if not kbs: | |||||
| return get_error_data_result(retmsg=f"You don't own the dataset {id}") | |||||
| id_list.extend(ids) | |||||
| for id in id_list: | |||||
| for doc in DocumentService.query(kb_id=id): | |||||
| if not DocumentService.remove_document(doc, tenant_id): | |||||
| return get_error_data_result( | |||||
| retmsg="Remove document error.(Database error)") | |||||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||||
| File2DocumentService.delete_by_document_id(doc.id) | |||||
| if not KnowledgebaseService.delete_by_id(id): | |||||
| return get_error_data_result( | |||||
| retmsg="Delete dataset error.(Database serror)") | |||||
| return get_result(retcode=RetCode.SUCCESS) | |||||
| @manager.route('/dataset/<dataset_id>', methods=['PUT']) | |||||
| @token_required | |||||
| def update(tenant_id,dataset_id): | |||||
| if not KnowledgebaseService.query(id=dataset_id,tenant_id=tenant_id): | |||||
| return get_error_data_result(retmsg="You don't own the dataset") | |||||
| req = request.json | |||||
| e, t = TenantService.get_by_id(tenant_id) | |||||
| invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"} | |||||
| if any(key in req for key in invalid_keys): | |||||
| return get_error_data_result(retmsg="The input parameters are invalid.") | |||||
| if "tenant_id" in req: | |||||
| if req["tenant_id"] != tenant_id: | |||||
| return get_error_data_result( | |||||
| retmsg="Can't change tenant_id.") | |||||
| if "embedding_model" in req: | |||||
| if req["embedding_model"] != t.embd_id: | |||||
| return get_error_data_result( | |||||
| retmsg="Can't change embedding_model.") | |||||
| req.pop("embedding_model") | |||||
| e, kb = KnowledgebaseService.get_by_id(dataset_id) | |||||
| if "chunk_count" in req: | |||||
| if req["chunk_count"] != kb.chunk_num: | |||||
| return get_error_data_result( | |||||
| retmsg="Can't change chunk_count.") | |||||
| req.pop("chunk_count") | |||||
| if "document_count" in req: | |||||
| if req['document_count'] != kb.doc_num: | |||||
| return get_error_data_result( | |||||
| retmsg="Can't change document_count.") | |||||
| req.pop("document_count") | |||||
| if "parse_method" in req: | |||||
| if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id: | |||||
| return get_error_data_result( | |||||
| retmsg="If chunk count is not 0, parse method is not changable.") | |||||
| req['parser_id'] = req.pop('parse_method') | |||||
| if "name" in req: | |||||
| req["name"] = req["name"].strip() | |||||
| if req["name"].lower() != kb.name.lower() \ | |||||
| and len(KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, | |||||
| status=StatusEnum.VALID.value)) > 0: | |||||
| return get_error_data_result( | |||||
| retmsg="Duplicated knowledgebase name in updating dataset.") | |||||
| if not KnowledgebaseService.update_by_id(kb.id, req): | |||||
| return get_error_data_result(retmsg="Update dataset error.(Database error)") | |||||
| return get_result(retcode=RetCode.SUCCESS) | |||||
| @manager.route('/dataset', methods=['GET']) | |||||
| @token_required | @token_required | ||||
| def list_datasets(tenant_id): | |||||
| def list(tenant_id): | |||||
| id = request.args.get("id") | |||||
| name = request.args.get("name") | |||||
| kbs = KnowledgebaseService.query(id=id,name=name,status=1) | |||||
| if not kbs: | |||||
| return get_error_data_result(retmsg="The dataset doesn't exist") | |||||
| page_number = int(request.args.get("page", 1)) | page_number = int(request.args.get("page", 1)) | ||||
| items_per_page = int(request.args.get("page_size", 1024)) | items_per_page = int(request.args.get("page_size", 1024)) | ||||
| orderby = request.args.get("orderby", "create_time") | orderby = request.args.get("orderby", "create_time") | ||||
| desc = bool(request.args.get("desc", True)) | desc = bool(request.args.get("desc", True)) | ||||
| tenants = TenantService.get_joined_tenants_by_user_id(tenant_id) | tenants = TenantService.get_joined_tenants_by_user_id(tenant_id) | ||||
| kbs = KnowledgebaseService.get_by_tenant_ids( | |||||
| [m["tenant_id"] for m in tenants], tenant_id, page_number, items_per_page, orderby, desc) | |||||
| kbs = KnowledgebaseService.get_list( | |||||
| [m["tenant_id"] for m in tenants], tenant_id, page_number, items_per_page, orderby, desc, id, name) | |||||
| renamed_list = [] | renamed_list = [] | ||||
| for kb in kbs: | for kb in kbs: | ||||
| key_mapping = { | key_mapping = { | ||||
| new_key = key_mapping.get(key, key) | new_key = key_mapping.get(key, key) | ||||
| renamed_data[new_key] = value | renamed_data[new_key] = value | ||||
| renamed_list.append(renamed_data) | renamed_list.append(renamed_data) | ||||
| return get_json_result(data=renamed_list) | |||||
| @manager.route('/detail', methods=['GET']) | |||||
| @token_required | |||||
| def detail(tenant_id): | |||||
| req = request.args | |||||
| key_mapping = { | |||||
| "chunk_num": "chunk_count", | |||||
| "doc_num": "document_count", | |||||
| "parser_id": "parse_method", | |||||
| "embd_id": "embedding_model" | |||||
| } | |||||
| renamed_data = {} | |||||
| if "id" in req: | |||||
| id = req["id"] | |||||
| kb = KnowledgebaseService.query(created_by=tenant_id, id=req["id"]) | |||||
| if not kb: | |||||
| return get_json_result( | |||||
| data=False, retmsg='You do not own the dataset.', | |||||
| retcode=RetCode.OPERATING_ERROR) | |||||
| if "name" in req: | |||||
| name = req["name"] | |||||
| if kb[0].name != name: | |||||
| return get_json_result( | |||||
| data=False, retmsg='You do not own the dataset.', | |||||
| retcode=RetCode.OPERATING_ERROR) | |||||
| e, k = KnowledgebaseService.get_by_id(id) | |||||
| for key, value in k.to_dict().items(): | |||||
| new_key = key_mapping.get(key, key) | |||||
| renamed_data[new_key] = value | |||||
| return get_json_result(data=renamed_data) | |||||
| else: | |||||
| if "name" in req: | |||||
| name = req["name"] | |||||
| e, k = KnowledgebaseService.get_by_name(kb_name=name, tenant_id=tenant_id) | |||||
| if not e: | |||||
| return get_json_result( | |||||
| data=False, retmsg='You do not own the dataset.', | |||||
| retcode=RetCode.OPERATING_ERROR) | |||||
| for key, value in k.to_dict().items(): | |||||
| new_key = key_mapping.get(key, key) | |||||
| renamed_data[new_key] = value | |||||
| return get_json_result(data=renamed_data) | |||||
| else: | |||||
| return get_data_error_result( | |||||
| retmsg="At least one of `id` or `name` must be provided.") | |||||
| return get_result(data=renamed_list) |
| @DB.connection_context() | @DB.connection_context() | ||||
| def get_all_ids(cls): | def get_all_ids(cls): | ||||
| return [m["id"] for m in cls.model.select(cls.model.id).dicts()] | return [m["id"] for m in cls.model.select(cls.model.id).dicts()] | ||||
| @classmethod | |||||
| @DB.connection_context() | |||||
| def get_list(cls, joined_tenant_ids, user_id, | |||||
| page_number, items_per_page, orderby, desc, id , name): | |||||
| kbs = cls.model.select() | |||||
| if id: | |||||
| kbs = kbs.where(cls.model.id == id) | |||||
| if name: | |||||
| kbs = kbs.where(cls.model.name == name) | |||||
| kbs = kbs.where( | |||||
| ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == | |||||
| TenantPermission.TEAM.value)) | ( | |||||
| cls.model.tenant_id == user_id)) | |||||
| & (cls.model.status == StatusEnum.VALID.value) | |||||
| ) | |||||
| if desc: | |||||
| kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) | |||||
| else: | |||||
| kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) | |||||
| kbs = kbs.paginate(page_number, items_per_page) | |||||
| return list(kbs.dicts()) |
| **POST** `/api/v1/dataset` | **POST** `/api/v1/dataset` | ||||
| Creates a dataset with a name. If dataset of the same name already exists, the new dataset will be renamed by RAGFlow automatically. | |||||
| Creates a dataset. | |||||
| ### Request | ### Request | ||||
| - Method: POST | - Method: POST | ||||
| - URL: `/api/v1/dataset` | |||||
| - URL: `http://{address}/api/v1/dataset` | |||||
| - Headers: | - Headers: | ||||
| - `content-Type: application/json` | - `content-Type: application/json` | ||||
| - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | ||||
| - Body: | - Body: | ||||
| - `"dataset_name"`: `string` | |||||
| - `"id"`: `string` | |||||
| - `"name"`: `string` | |||||
| - `"avatar"`: `string` | |||||
| - `"tenant_id"`: `string` | - `"tenant_id"`: `string` | ||||
| - `"description"`: `string` | |||||
| - `"language"`: `string` | |||||
| - `"embedding_model"`: `string` | - `"embedding_model"`: `string` | ||||
| - `"chunk_count"`: `integer` | |||||
| - `"permission"`: `string` | |||||
| - `"document_count"`: `integer` | - `"document_count"`: `integer` | ||||
| - `"chunk_count"`: `integer` | |||||
| - `"parse_method"`: `string` | - `"parse_method"`: `string` | ||||
| - `"parser_config"`: `Dataset.ParserConfig` | |||||
| #### Request example | #### Request example | ||||
| ```shell | |||||
| ```bash | |||||
| # "id": id must not be provided. | |||||
| # "name": name is required and can't be duplicated. | |||||
| # "tenant_id": tenant_id must not be provided. | |||||
| # "embedding_model": embedding_model must not be provided. | |||||
| # "navie" means general. | |||||
| curl --request POST \ | curl --request POST \ | ||||
| --url http://{address}/api/v1/dataset \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \ | |||||
| --data-binary '{ | |||||
| "dataset_name": "test", | |||||
| "tenant_id": "4fb0cd625f9311efba4a0242ac120006", | |||||
| "embedding_model": "BAAI/bge--zh-v1.5", | |||||
| "chunk_count": 0, | |||||
| "document_count": 0, | |||||
| "parse_method": "general" | |||||
| --url http://{address}/api/v1/dataset \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \ | |||||
| --data '{ | |||||
| "name": "test", | |||||
| "chunk_count": 0, | |||||
| "document_count": 0, | |||||
| "parse_method": "naive" | |||||
| }' | }' | ||||
| ``` | ``` | ||||
| #### Request parameters | #### Request parameters | ||||
| - `"dataset_name"`: (*Body parameter*) | |||||
| - `"id"`: (*Body parameter*) | |||||
| The ID of the created dataset used to uniquely identify different datasets. | |||||
| - If creating a dataset, `id` must not be provided. | |||||
| - `"name"`: (*Body parameter*) | |||||
| The name of the dataset, which must adhere to the following requirements: | The name of the dataset, which must adhere to the following requirements: | ||||
| - Maximum 65,535 characters. | |||||
| - Required when creating a dataset and must be unique. | |||||
| - If updating a dataset, `name` must still be unique. | |||||
| - `"avatar"`: (*Body parameter*) | |||||
| Base64 encoding of the avatar. | |||||
| - `"tenant_id"`: (*Body parameter*) | - `"tenant_id"`: (*Body parameter*) | ||||
| The ID of the tenant. | |||||
| The ID of the tenant associated with the dataset, used to link it with specific users. | |||||
| - If creating a dataset, `tenant_id` must not be provided. | |||||
| - If updating a dataset, `tenant_id` cannot be changed. | |||||
| - `"description"`: (*Body parameter*) | |||||
| The description of the dataset. | |||||
| - `"language"`: (*Body parameter*) | |||||
| The language setting for the dataset. | |||||
| - `"embedding_model"`: (*Body parameter*) | - `"embedding_model"`: (*Body parameter*) | ||||
| Embedding model used in the dataset. | |||||
| - `"chunk_count"`: (*Body parameter*) | |||||
| Chunk count of the dataset. | |||||
| Embedding model used in the dataset to generate vector embeddings. | |||||
| - If creating a dataset, `embedding_model` must not be provided. | |||||
| - If updating a dataset, `embedding_model` cannot be changed. | |||||
| - `"permission"`: (*Body parameter*) | |||||
| Specifies who can manipulate the dataset. | |||||
| - `"document_count"`: (*Body parameter*) | - `"document_count"`: (*Body parameter*) | ||||
| Document count of the dataset. | |||||
| - `"parse_mehtod"`: (*Body parameter*) | |||||
| Parsing method of the dataset. | |||||
| Document count of the dataset. | |||||
| - If updating a dataset, `document_count` cannot be changed. | |||||
| - `"chunk_count"`: (*Body parameter*) | |||||
| Chunk count of the dataset. | |||||
| - If updating a dataset, `chunk_count` cannot be changed. | |||||
| - `"parse_method"`: (*Body parameter*) | |||||
| Parsing method of the dataset. | |||||
| - If updating `parse_method`, `chunk_count` must be greater than 0. | |||||
| - `"parser_config"`: (*Body parameter*) | |||||
| The configuration settings for the dataset parser. | |||||
| ### Response | ### Response | ||||
| The successful response includes a JSON object like the following: | The successful response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 0 | |||||
| "code": 0, | |||||
| "data": { | |||||
| "avatar": null, | |||||
| "chunk_count": 0, | |||||
| "create_date": "Thu, 10 Oct 2024 05:57:37 GMT", | |||||
| "create_time": 1728539857641, | |||||
| "created_by": "69736c5e723611efb51b0242ac120007", | |||||
| "description": null, | |||||
| "document_count": 0, | |||||
| "embedding_model": "BAAI/bge-large-zh-v1.5", | |||||
| "id": "8d73076886cc11ef8c270242ac120006", | |||||
| "language": "English", | |||||
| "name": "test_1", | |||||
| "parse_method": "naive", | |||||
| "parser_config": { | |||||
| "pages": [ | |||||
| [ | |||||
| 1, | |||||
| 1000000 | |||||
| ] | |||||
| ] | |||||
| }, | |||||
| "permission": "me", | |||||
| "similarity_threshold": 0.2, | |||||
| "status": "1", | |||||
| "tenant_id": "69736c5e723611efb51b0242ac120007", | |||||
| "token_num": 0, | |||||
| "update_date": "Thu, 10 Oct 2024 05:57:37 GMT", | |||||
| "update_time": 1728539857641, | |||||
| "vector_similarity_weight": 0.3 | |||||
| } | |||||
| } | } | ||||
| ``` | ``` | ||||
| The error response includes a JSON object like the following: | The error response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 3016, | |||||
| "message": "Can't connect database" | |||||
| "code": 102, | |||||
| "message": "Duplicated knowledgebase name in creating dataset." | |||||
| } | } | ||||
| ``` | ``` | ||||
| **DELETE** `/api/v1/dataset` | **DELETE** `/api/v1/dataset` | ||||
| Deletes a dataset by its id or name. | |||||
| Deletes datasets by ids or names. | |||||
| ### Request | ### Request | ||||
| - Method: DELETE | - Method: DELETE | ||||
| - URL: `/api/v1/dataset/{dataset_id}` | |||||
| - URL: `http://{address}/api/v1/dataset` | |||||
| - Headers: | - Headers: | ||||
| - `content-Type: application/json` | - `content-Type: application/json` | ||||
| - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | ||||
| - Body: | |||||
| - `"names"`: `List[string]` | |||||
| - `"ids"`: `List[string]` | |||||
| #### Request example | #### Request example | ||||
| ```shell | |||||
| ```bash | |||||
| # Either id or name must be provided, but not both. | |||||
| curl --request DELETE \ | curl --request DELETE \ | ||||
| --url http://{address}/api/v1/dataset/0 \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | |||||
| --data ' { | |||||
| "names": ["ds1", "ds2"] | |||||
| }' | |||||
| --url http://{address}/api/v1/dataset \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \ | |||||
| --data '{ | |||||
| "names": ["test_1", "test_2"] | |||||
| }' | |||||
| ``` | ``` | ||||
| #### Request parameters | #### Request parameters | ||||
| The successful response includes a JSON object like the following: | The successful response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 0 | "code": 0 | ||||
| } | } | ||||
| The error response includes a JSON object like the following: | The error response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 3016, | |||||
| "message": "Try to delete non-existent dataset." | |||||
| "code": 102, | |||||
| "message": "You don't own the dataset." | |||||
| } | } | ||||
| ``` | ``` | ||||
| ### Request | ### Request | ||||
| - Method: PUT | - Method: PUT | ||||
| - URL: `/api/v1/dataset/{dataset_id}` | |||||
| - URL: `http://{address}/api/v1/dataset/{dataset_id}` | |||||
| - Headers: | - Headers: | ||||
| - `content-Type: application/json` | - `content-Type: application/json` | ||||
| - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | ||||
| - Body: (Refer to the "Create Dataset" for the complete structure of the request body.) | |||||
| #### Request example | #### Request example | ||||
| ```shell | |||||
| ```bash | |||||
| # "id": id is required. | |||||
| # "name": If you update name, it can't be duplicated. | |||||
| # "tenant_id": If you update tenant_id, it can't be changed | |||||
| # "embedding_model": If you update embedding_model, it can't be changed. | |||||
| # "chunk_count": If you update chunk_count, it can't be changed. | |||||
| # "document_count": If you update document_count, it can't be changed. | |||||
| # "parse_method": If you update parse_method, chunk_count must be 0. | |||||
| # "navie" means general. | |||||
| curl --request PUT \ | curl --request PUT \ | ||||
| --url http://{address}/api/v1/dataset/0 \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | |||||
| --data-binary '{ | |||||
| "dataset_name": "test", | |||||
| "tenant_id": "4fb0cd625f9311efba4a0242ac120006", | |||||
| "embedding_model": "BAAI/bge--zh-v1.5", | |||||
| "chunk_count": 0, | |||||
| "document_count": 0, | |||||
| "parse_method": "general" | |||||
| --url http://{address}/api/v1/dataset/{dataset_id} \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \ | |||||
| --data '{ | |||||
| "name": "test", | |||||
| "tenant_id": "4fb0cd625f9311efba4a0242ac120006", | |||||
| "embedding_model": "BAAI/bge-zh-v1.5", | |||||
| "chunk_count": 0, | |||||
| "document_count": 0, | |||||
| "parse_method": "navie" | |||||
| }' | }' | ||||
| ``` | ``` | ||||
| #### Request parameters | #### Request parameters | ||||
| (Refer to the "Create Dataset" for the complete structure of the request parameters.) | |||||
| - `"dataset_name"`: (*Body parameter*) | |||||
| The name of the dataset, which must adhere to the following requirements: | |||||
| - Maximum 65,535 characters. | |||||
| - `"tenant_id"`: (*Body parameter*) | |||||
| The ID of the tenant. | |||||
| - `"embedding_model"`: (*Body parameter*) | |||||
| Embedding model used in the dataset. | |||||
| - `"chunk_count"`: (*Body parameter*) | |||||
| Chunk count of the dataset. | |||||
| - `"document_count"`: (*Body parameter*) | |||||
| Document count of the dataset. | |||||
| - `"parse_mehtod"`: (*Body parameter*) | |||||
| Parsing method of the dataset. | |||||
| ### Response | ### Response | ||||
| The successful response includes a JSON object like the following: | The successful response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 0 | "code": 0 | ||||
| } | } | ||||
| The error response includes a JSON object like the following: | The error response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 3016, | |||||
| "message": "Can't change embedding model since some files already use it." | |||||
| "code": 102, | |||||
| "message": "Can't change tenant_id." | |||||
| } | } | ||||
| ``` | ``` | ||||
| ## List datasets | ## List datasets | ||||
| **GET** `/api/v1/dataset?name={name}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}` | |||||
| **GET** `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}` | |||||
| List all datasets | List all datasets | ||||
| ### Request | ### Request | ||||
| - Method: GET | - Method: GET | ||||
| - URL: `/api/v1/dataset?name={name}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}` | |||||
| - URL: `http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}` | |||||
| - Headers: | - Headers: | ||||
| - `content-Type: application/json` | |||||
| - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | ||||
| #### Request example | #### Request example | ||||
| ```shell | |||||
| ```bash | |||||
| # If no page parameter is passed, the default is 1 | |||||
| # If no page_size parameter is passed, the default is 1024 | |||||
| # If no order_by parameter is passed, the default is "create_time" | |||||
| # If no desc parameter is passed, the default is True | |||||
| curl --request GET \ | curl --request GET \ | ||||
| --url http://{address}/api/v1/dataset?page=0&page_size=50&orderby=create_time&desc=false \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | |||||
| --url http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \ | |||||
| --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' | |||||
| ``` | ``` | ||||
| #### Request parameters | #### Request parameters | ||||
| A boolean flag indicating whether the sorting should be in descending order. | A boolean flag indicating whether the sorting should be in descending order. | ||||
| - `name`: (*Path parameter*) | - `name`: (*Path parameter*) | ||||
| Dataset name | Dataset name | ||||
| - - `"id"`: (*Path parameter*) | |||||
| The ID of the dataset to be retrieved. | |||||
| - `"name"`: (*Path parameter*) | |||||
| The name of the dataset to be retrieved. | |||||
| ### Response | ### Response | ||||
| The successful response includes a JSON object like the following: | The successful response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 0, | "code": 0, | ||||
| "data": [ | "data": [ | ||||
| { | { | ||||
| "avatar": "", | |||||
| "chunk_count": 0, | |||||
| "create_date": "Thu, 29 Aug 2024 03:13:07 GMT", | |||||
| "create_time": 1724901187843, | |||||
| "created_by": "4fb0cd625f9311efba4a0242ac120006", | |||||
| "description": "", | |||||
| "document_count": 0, | |||||
| "embedding_model": "BAAI/bge-large-zh-v1.5", | |||||
| "id": "9d3d906665b411ef87d10242ac120006", | |||||
| "language": "English", | |||||
| "name": "Test", | |||||
| "parser_config": { | |||||
| "chunk_token_count": 128, | |||||
| "delimiter": "\n!?。;!?", | |||||
| "layout_recognize": true, | |||||
| "task_page_size": 12 | |||||
| }, | |||||
| "parse_method": "naive", | |||||
| "permission": "me", | |||||
| "similarity_threshold": 0.2, | |||||
| "status": "1", | |||||
| "tenant_id": "4fb0cd625f9311efba4a0242ac120006", | |||||
| "token_count": 0, | |||||
| "update_date": "Thu, 29 Aug 2024 03:13:07 GMT", | |||||
| "update_time": 1724901187843, | |||||
| "vector_similarity_weight": 0.3 | |||||
| "avatar": "", | |||||
| "chunk_count": 59, | |||||
| "create_date": "Sat, 14 Sep 2024 01:12:37 GMT", | |||||
| "create_time": 1726276357324, | |||||
| "created_by": "69736c5e723611efb51b0242ac120007", | |||||
| "description": null, | |||||
| "document_count": 1, | |||||
| "embedding_model": "BAAI/bge-large-zh-v1.5", | |||||
| "id": "6e211ee0723611efa10a0242ac120007", | |||||
| "language": "English", | |||||
| "name": "mysql", | |||||
| "parse_method": "knowledge_graph", | |||||
| "parser_config": { | |||||
| "chunk_token_num": 8192, | |||||
| "delimiter": "\\n!?;。;!?", | |||||
| "entity_types": [ | |||||
| "organization", | |||||
| "person", | |||||
| "location", | |||||
| "event", | |||||
| "time" | |||||
| ] | |||||
| }, | |||||
| "permission": "me", | |||||
| "similarity_threshold": 0.2, | |||||
| "status": "1", | |||||
| "tenant_id": "69736c5e723611efb51b0242ac120007", | |||||
| "token_num": 12744, | |||||
| "update_date": "Thu, 10 Oct 2024 04:07:23 GMT", | |||||
| "update_time": 1728533243536, | |||||
| "vector_similarity_weight": 0.3 | |||||
| } | } | ||||
| ], | |||||
| ] | |||||
| } | } | ||||
| ``` | ``` | ||||
| The error response includes a JSON object like the following: | The error response includes a JSON object like the following: | ||||
| ```shell | |||||
| ```json | |||||
| { | { | ||||
| "code": 3016, | |||||
| "message": "Can't access database to get the dataset list." | |||||
| "code": 102, | |||||
| "message": "The dataset doesn't exist" | |||||
| } | } | ||||
| ``` | ``` | ||||
| #### avatar: `str` | #### avatar: `str` | ||||
| The url or ???????????????????????? path to the avatar image associated with the created dataset. Defaults to `""` | |||||
| Base64 encoding of the avatar. Defaults to `""` | |||||
| #### tenant_id: `str` ????????????????? | |||||
| #### tenant_id: `str` | |||||
| The id of the tenant associated with the created dataset is used to identify different users. Defaults to `None`. | The id of the tenant associated with the created dataset is used to identify different users. Defaults to `None`. | ||||
| The language setting of the created dataset. Defaults to `"English"`. ???????????? | The language setting of the created dataset. Defaults to `"English"`. ???????????? | ||||
| #### embedding_model: `str` ???????????????? | |||||
| #### embedding_model: `str` | |||||
| The specific model or algorithm used by the dataset to generate vector embeddings. Defaults to `""`. | |||||
| The specific model used by the dataset to generate vector embeddings. Defaults to `""`. | |||||
| - If creating a dataset, embedding_model must not be provided. | - If creating a dataset, embedding_model must not be provided. | ||||
| - If updating a dataset, embedding_model can't be changed. | - If updating a dataset, embedding_model can't be changed. | ||||
| The configuration settings for the parser used by the dataset. | The configuration settings for the parser used by the dataset. | ||||
| ### Returns | ### Returns | ||||
| - Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. | |||||
| - Failure: `InfinityException` | |||||
| - `error_code`: `int` A non-zero value indicating a specific error condition. | |||||
| - `error_msg`: `str` A message providing additional details about the error. | |||||
| ```python | |||||
| DataSet | |||||
| description: dataset object | |||||
| ``` | |||||
| ### Examples | ### Examples | ||||
| ```python | ```python | ||||
| --- | --- | ||||
| ## Delete knowledge base | |||||
| ## Delete knowledge bases | |||||
| ```python | ```python | ||||
| DataSet.delete() -> bool | |||||
| RAGFlow.delete_dataset(ids: List[str] = None, names: List[str] = None) | |||||
| ``` | ``` | ||||
| Deletes knowledge bases. | |||||
| ### Parameters | |||||
| Deletes a knowledge base. | |||||
| #### ids: `List[str]` | |||||
| ### Returns | |||||
| The ids of the datasets to be deleted. | |||||
| #### names: `List[str]` | |||||
| `bool` | |||||
| The names of the datasets to be deleted. | |||||
| description:the case of updating an dateset, `True` or `False`. | |||||
| Either `ids` or `names` must be provided, but not both. | |||||
| ### Returns | |||||
| ```python | |||||
| no return | |||||
| ``` | |||||
| ### Examples | ### Examples | ||||
| from ragflow import RAGFlow | from ragflow import RAGFlow | ||||
| rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | ||||
| ds = rag.create_dataset(name="kb_1") | |||||
| ds.delete() | |||||
| rag.delete_dataset(names=["name_1","name_2"]) | |||||
| rag.delete_dataset(ids=["id_1","id_2"]) | |||||
| ``` | ``` | ||||
| --- | --- | ||||
| page: int = 1, | page: int = 1, | ||||
| page_size: int = 1024, | page_size: int = 1024, | ||||
| orderby: str = "create_time", | orderby: str = "create_time", | ||||
| desc: bool = True | |||||
| desc: bool = True, | |||||
| id: str = None, | |||||
| name: str = None | |||||
| ) -> List[DataSet] | ) -> List[DataSet] | ||||
| ``` | ``` | ||||
| Whether the sorting should be in descending order. Defaults to `True`. | Whether the sorting should be in descending order. Defaults to `True`. | ||||
| ### Returns | |||||
| ```python | |||||
| List[DataSet] | |||||
| description:the list of datasets. | |||||
| ``` | |||||
| ### Examples | |||||
| ```python | |||||
| from ragflow import RAGFlow | |||||
| rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | |||||
| for ds in rag.list_datasets(): | |||||
| print(ds) | |||||
| ``` | |||||
| --- | |||||
| ## Retrieve knowledge base | |||||
| ```python | |||||
| RAGFlow.get_dataset( | |||||
| id: str = None, | |||||
| name: str = None | |||||
| ) -> DataSet | |||||
| ``` | |||||
| #### id: `str` | |||||
| Retrieves a knowledge base by name. | |||||
| ### Parameters | |||||
| The id of the dataset to be got. Defaults to `None`. | |||||
| #### name: `str` | #### name: `str` | ||||
| The name of the dataset to be got. If `id` is not provided, `name` is required. | |||||
| #### id: `str` | |||||
| The id of the dataset to be got. If `name` is not provided, `id` is required. | |||||
| The name of the dataset to be got. Defaults to `None`. | |||||
| ### Returns | ### Returns | ||||
| ```python | ```python | ||||
| DataSet | |||||
| description: dataset object | |||||
| List[DataSet] | |||||
| description:the list of datasets. | |||||
| ``` | ``` | ||||
| ### Examples | ### Examples | ||||
| from ragflow import RAGFlow | from ragflow import RAGFlow | ||||
| rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | ||||
| ds = rag.get_dataset(name="ragflow") | |||||
| print(ds) | |||||
| for ds in rag.list_datasets(): | |||||
| print(ds) | |||||
| ``` | ``` | ||||
| --- | --- | ||||
| ## Save knowledge base configurations | |||||
| ## Update knowledge base | |||||
| ```python | ```python | ||||
| DataSet.save() -> bool | |||||
| DataSet.update(update_message: dict) | |||||
| ``` | ``` | ||||
| ### Returns | ### Returns | ||||
| ```python | ```python | ||||
| bool | |||||
| description:the case of updating an dateset, True or False. | |||||
| no return | |||||
| ``` | ``` | ||||
| ### Examples | ### Examples | ||||
| rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380") | ||||
| ds = rag.get_dataset(name="kb_1") | ds = rag.get_dataset(name="kb_1") | ||||
| ds.parse_method = "manual" | |||||
| ds.save() | |||||
| ds.update({"parse_method":"manual", ...}} | |||||
| ``` | ``` | ||||
| --- | --- |
| return func(*args, **kwargs) | return func(*args, **kwargs) | ||||
| return decorated_function | return decorated_function | ||||
| def get_result(retcode=RetCode.SUCCESS, retmsg='error', data=None): | |||||
| if retcode == 0: | |||||
| if data is not None: | |||||
| response = {"code": retcode, "data": data} | |||||
| else: | |||||
| response = {"code": retcode} | |||||
| else: | |||||
| response = {"code": retcode, "message": retmsg} | |||||
| return jsonify(response) | |||||
| def get_error_data_result(retcode=RetCode.DATA_ERROR, | |||||
| retmsg='Sorry! Data missing!'): | |||||
| import re | |||||
| result_dict = { | |||||
| "code": retcode, | |||||
| "message": re.sub( | |||||
| r"rag", | |||||
| "seceum", | |||||
| retmsg, | |||||
| flags=re.IGNORECASE)} | |||||
| response = {} | |||||
| for key, value in result_dict.items(): | |||||
| if value is None and key != "code": | |||||
| continue | |||||
| else: | |||||
| response[key] = value | |||||
| return jsonify(response) | |||||
| res = self.rag.delete(path, params) | res = self.rag.delete(path, params) | ||||
| return res | return res | ||||
| def put(self,path, json): | |||||
| res = self.rag.put(path,json) | |||||
| return res | |||||
| def __str__(self): | def __str__(self): | ||||
| return str(self.to_json()) | return str(self.to_json()) |
| res_dict.pop(k) | res_dict.pop(k) | ||||
| super().__init__(rag, res_dict) | super().__init__(rag, res_dict) | ||||
| def save(self) -> bool: | |||||
| res = self.post('/dataset/save', | |||||
| {"id": self.id, "name": self.name, "avatar": self.avatar, "tenant_id": self.tenant_id, | |||||
| "description": self.description, "language": self.language, "embedding_model": self.embedding_model, | |||||
| "permission": self.permission, | |||||
| "document_count": self.document_count, "chunk_count": self.chunk_count, "parse_method": self.parse_method, | |||||
| "parser_config": self.parser_config.to_json() | |||||
| }) | |||||
| def update(self, update_message: dict): | |||||
| res = self.put(f'/dataset/{self.id}', | |||||
| update_message) | |||||
| res = res.json() | res = res.json() | ||||
| if res.get("retmsg") == "success": return True | |||||
| raise Exception(res["retmsg"]) | |||||
| if res.get("code") != 0: | |||||
| raise Exception(res["message"]) | |||||
| def delete(self) -> bool: | |||||
| res = self.rm('/dataset/delete', | |||||
| {"id": self.id}) | |||||
| res = res.json() | |||||
| if res.get("retmsg") == "success": return True | |||||
| raise Exception(res["retmsg"]) | |||||
| def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]: | def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]: | ||||
| """ | """ |
| import requests | import requests | ||||
| from .modules.assistant import Assistant | from .modules.assistant import Assistant | ||||
| from .modules.chunk import Chunk | |||||
| from .modules.dataset import DataSet | from .modules.dataset import DataSet | ||||
| from .modules.document import Document | from .modules.document import Document | ||||
| from .modules.chunk import Chunk | |||||
| class RAGFlow: | class RAGFlow: | ||||
| return res | return res | ||||
| def delete(self, path, params): | def delete(self, path, params): | ||||
| res = requests.delete(url=self.api_url + path, params=params, headers=self.authorization_header) | |||||
| res = requests.delete(url=self.api_url + path, json=params, headers=self.authorization_header) | |||||
| return res | |||||
| def put(self, path, json): | |||||
| res = requests.put(url=self.api_url + path, json= json,headers=self.authorization_header) | |||||
| return res | return res | ||||
| def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English", | def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English", | ||||
| parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True, | parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True, | ||||
| "delimiter": "\n!?。;!?", "task_page_size": 12}) | "delimiter": "\n!?。;!?", "task_page_size": 12}) | ||||
| parser_config = parser_config.to_json() | parser_config = parser_config.to_json() | ||||
| res = self.post("/dataset/save", | |||||
| res = self.post("/dataset", | |||||
| {"name": name, "avatar": avatar, "description": description, "language": language, | {"name": name, "avatar": avatar, "description": description, "language": language, | ||||
| "permission": permission, | "permission": permission, | ||||
| "document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method, | "document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method, | ||||
| } | } | ||||
| ) | ) | ||||
| res = res.json() | res = res.json() | ||||
| if res.get("retmsg") == "success": | |||||
| if res.get("code") == 0: | |||||
| return DataSet(self, res["data"]) | return DataSet(self, res["data"]) | ||||
| raise Exception(res["retmsg"]) | |||||
| raise Exception(res["message"]) | |||||
| def delete_dataset(self, ids: List[str] = None, names: List[str] = None): | |||||
| res = self.delete("/dataset",{"ids": ids, "names": names}) | |||||
| res=res.json() | |||||
| if res.get("code") != 0: | |||||
| raise Exception(res["message"]) | |||||
| def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True) -> \ | |||||
| def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True, | |||||
| id: str = None, name: str = None) -> \ | |||||
| List[DataSet]: | List[DataSet]: | ||||
| res = self.get("/dataset/list", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc}) | |||||
| res = self.get("/dataset", | |||||
| {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name}) | |||||
| res = res.json() | res = res.json() | ||||
| result_list = [] | result_list = [] | ||||
| if res.get("retmsg") == "success": | |||||
| if res.get("code") == 0: | |||||
| for data in res['data']: | for data in res['data']: | ||||
| result_list.append(DataSet(self, data)) | result_list.append(DataSet(self, data)) | ||||
| return result_list | return result_list | ||||
| raise Exception(res["retmsg"]) | |||||
| def get_dataset(self, id: str = None, name: str = None) -> DataSet: | |||||
| res = self.get("/dataset/detail", {"id": id, "name": name}) | |||||
| res = res.json() | |||||
| if res.get("retmsg") == "success": | |||||
| return DataSet(self, res['data']) | |||||
| raise Exception(res["retmsg"]) | |||||
| raise Exception(res["message"]) | |||||
| def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [], | def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [], | ||||
| llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant: | llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant: | ||||
| except Exception as e: | except Exception as e: | ||||
| print(f"An error occurred during retrieval: {e}") | print(f"An error occurred during retrieval: {e}") | ||||
| raise | raise | ||||
| API_KEY = 'ragflow-k0YzUxMGY4NjY5YTExZWY5MjI5MDI0Mm' | |||||
| API_KEY = 'ragflow-NiYmZjNTVjODYwNzExZWZiODEwMDI0Mm' | |||||
| HOST_ADDRESS = 'http://127.0.0.1:9380' | HOST_ADDRESS = 'http://127.0.0.1:9380' |
| ds = rag.create_dataset("ABC") | ds = rag.create_dataset("ABC") | ||||
| if isinstance(ds, DataSet): | if isinstance(ds, DataSet): | ||||
| assert ds.name == "ABC", "Name does not match." | assert ds.name == "ABC", "Name does not match." | ||||
| ds.name = 'DEF' | |||||
| res = ds.save() | |||||
| assert res is True, f"Failed to update dataset, error: {res}" | |||||
| res = ds.update({"name":"DEF"}) | |||||
| assert res is None, f"Failed to update dataset, error: {res}" | |||||
| else: | else: | ||||
| assert False, f"Failed to create dataset, error: {ds}" | assert False, f"Failed to create dataset, error: {ds}" | ||||
| ds = rag.create_dataset("MA") | ds = rag.create_dataset("MA") | ||||
| if isinstance(ds, DataSet): | if isinstance(ds, DataSet): | ||||
| assert ds.name == "MA", "Name does not match." | assert ds.name == "MA", "Name does not match." | ||||
| res = ds.delete() | |||||
| assert res is True, f"Failed to delete dataset, error: {res}" | |||||
| res = rag.delete_dataset(names=["MA"]) | |||||
| assert res is None, f"Failed to delete dataset, error: {res}" | |||||
| else: | else: | ||||
| assert False, f"Failed to create dataset, error: {ds}" | assert False, f"Failed to create dataset, error: {ds}" | ||||
| assert len(list_datasets) > 0, "Do not exist any dataset" | assert len(list_datasets) > 0, "Do not exist any dataset" | ||||
| for ds in list_datasets: | for ds in list_datasets: | ||||
| assert isinstance(ds, DataSet), "Existence type is not dataset." | assert isinstance(ds, DataSet), "Existence type is not dataset." | ||||
| def test_get_detail_dataset_with_success(self): | |||||
| """ | |||||
| Test getting a dataset's detail with success | |||||
| """ | |||||
| rag = RAGFlow(API_KEY, HOST_ADDRESS) | |||||
| ds = rag.get_dataset(name="God") | |||||
| assert isinstance(ds, DataSet), f"Failed to get dataset, error: {ds}." | |||||
| assert ds.name == "God", "Name does not match" |