浏览代码

Refa: remove dataset language and validate dataset name length. (#5707)

### What problem does this PR solve?

#5686
#5702

### Type of change

- [x] Refactoring
tags/v0.17.1
Kevin Hu 8 个月前
父节点
当前提交
ff35c140dc
没有帐户链接到提交者的电子邮件

+ 8
- 16
api/apps/sdk/dataset.py 查看文件

type: string type: string
enum: ['me', 'team'] enum: ['me', 'team']
description: Dataset permission. description: Dataset permission.
language:
type: string
enum: ['Chinese', 'English']
description: Language of the dataset.
chunk_method: chunk_method:
type: string type: string
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
req = request.json req = request.json
e, t = TenantService.get_by_id(tenant_id) e, t = TenantService.get_by_id(tenant_id)
permission = req.get("permission") permission = req.get("permission")
language = req.get("language")
chunk_method = req.get("chunk_method") chunk_method = req.get("chunk_method")
parser_config = req.get("parser_config") parser_config = req.get("parser_config")
valid_permission = ["me", "team"] valid_permission = ["me", "team"]
valid_language = ["Chinese", "English"]
valid_chunk_method = [ valid_chunk_method = [
"naive", "naive",
"manual", "manual",
check_validation = valid( check_validation = valid(
permission, permission,
valid_permission, valid_permission,
language,
valid_language,
chunk_method, chunk_method,
valid_chunk_method, valid_chunk_method,
) )
req["name"] = req["name"].strip() req["name"] = req["name"].strip()
if req["name"] == "": if req["name"] == "":
return get_error_data_result(message="`name` is not empty string!") return get_error_data_result(message="`name` is not empty string!")
if len(req["name"]) >= 128:
return get_error_data_result(
message="Dataset name should not be longer than 128 characters."
)
if KnowledgebaseService.query( if KnowledgebaseService.query(
name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value
): ):
type: string type: string
enum: ['me', 'team'] enum: ['me', 'team']
description: Updated permission. description: Updated permission.
language:
type: string
enum: ['Chinese', 'English']
description: Updated language.
chunk_method: chunk_method:
type: string type: string
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
if any(key in req for key in invalid_keys): if any(key in req for key in invalid_keys):
return get_error_data_result(message="The input parameters are invalid.") return get_error_data_result(message="The input parameters are invalid.")
permission = req.get("permission") permission = req.get("permission")
language = req.get("language")
chunk_method = req.get("chunk_method") chunk_method = req.get("chunk_method")
parser_config = req.get("parser_config") parser_config = req.get("parser_config")
valid_permission = ["me", "team"] valid_permission = ["me", "team"]
valid_language = ["Chinese", "English"]
valid_chunk_method = [ valid_chunk_method = [
"naive", "naive",
"manual", "manual",
check_validation = valid( check_validation = valid(
permission, permission,
valid_permission, valid_permission,
language,
valid_language,
chunk_method, chunk_method,
valid_chunk_method, valid_chunk_method,
) )
req["embd_id"] = req.pop("embedding_model") req["embd_id"] = req.pop("embedding_model")
if "name" in req: if "name" in req:
req["name"] = req["name"].strip() req["name"] = req["name"].strip()
if len(req["name"]) >= 128:
return get_error_data_result(
message="Dataset name should not be longer than 128 characters."
)
if ( if (
req["name"].lower() != kb.name.lower() req["name"].lower() != kb.name.lower()
and len( and len(

+ 1
- 3
api/utils/api_utils.py 查看文件

return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34] return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]




def valid(permission, valid_permission, language, valid_language, chunk_method, valid_chunk_method):
def valid(permission, valid_permission, chunk_method, valid_chunk_method):
if valid_parameter(permission, valid_permission): if valid_parameter(permission, valid_permission):
return valid_parameter(permission, valid_permission) return valid_parameter(permission, valid_permission)
if valid_parameter(language, valid_language):
return valid_parameter(language, valid_language)
if valid_parameter(chunk_method, valid_chunk_method): if valid_parameter(chunk_method, valid_chunk_method):
return valid_parameter(chunk_method, valid_chunk_method) return valid_parameter(chunk_method, valid_chunk_method)



+ 0
- 6
docs/references/http_api_reference.md 查看文件

- `"name"`: `string` - `"name"`: `string`
- `"avatar"`: `string` - `"avatar"`: `string`
- `"description"`: `string` - `"description"`: `string`
- `"language"`: `string`
- `"embedding_model"`: `string` - `"embedding_model"`: `string`
- `"permission"`: `string` - `"permission"`: `string`
- `"chunk_method"`: `string` - `"chunk_method"`: `string`
- `"description"`: (*Body parameter*), `string` - `"description"`: (*Body parameter*), `string`
A brief description of the dataset to create. A brief description of the dataset to create.


- `"language"`: (*Body parameter*), `string`
The language setting of the dataset to create. Available options:
- `"English"` (default)
- `"Chinese"`

- `"embedding_model"`: (*Body parameter*), `string` - `"embedding_model"`: (*Body parameter*), `string`
The name of the embedding model to use. For example: `"BAAI/bge-zh-v1.5"` The name of the embedding model to use. For example: `"BAAI/bge-zh-v1.5"`



+ 0
- 7
docs/references/python_api_reference.md 查看文件

avatar: str = "", avatar: str = "",
description: str = "", description: str = "",
embedding_model: str = "BAAI/bge-large-zh-v1.5", embedding_model: str = "BAAI/bge-large-zh-v1.5",
language: str = "English",
permission: str = "me", permission: str = "me",
chunk_method: str = "naive", chunk_method: str = "naive",
parser_config: DataSet.ParserConfig = None parser_config: DataSet.ParserConfig = None


A brief description of the dataset to create. Defaults to `""`. A brief description of the dataset to create. Defaults to `""`.


##### language: `str`

The language setting of the dataset to create. Available options:

- `"English"` (default)
- `"Chinese"`


##### permission ##### permission



+ 0
- 1
sdk/python/ragflow_sdk/modules/dataset.py 查看文件

self.avatar = "" self.avatar = ""
self.tenant_id = None self.tenant_id = None
self.description = "" self.description = ""
self.language = "English"
self.embedding_model = "" self.embedding_model = ""
self.permission = "me" self.permission = "me"
self.document_count = 0 self.document_count = 0

正在加载...
取消
保存