Browse Source

Refa: knowledge_graph chunk method is deprecated (#7220)

### What problem does this PR solve?

The knowledge_graph chunk method is deprecated and should no longer be
used. #7184.

### Type of change

- [x] Refactoring
tags/v0.18.0
Yongteng Lei 6 months ago
parent
commit
94181a990b
No account linked to committer's email address

+ 2
- 4
api/apps/sdk/dataset.py View File

@@ -69,7 +69,7 @@ def create(tenant_id):
chunk_method:
type: string
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
"presentation", "picture", "one", "email", "tag"
]
description: Chunking method.
parser_config:
@@ -105,7 +105,6 @@ def create(tenant_id):
"presentation",
"picture",
"one",
"knowledge_graph",
"email",
"tag"
]
@@ -315,7 +314,7 @@ def update(tenant_id, dataset_id):
chunk_method:
type: string
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
"presentation", "picture", "one", "email", "tag"
]
description: Updated chunking method.
parser_config:
@@ -353,7 +352,6 @@ def update(tenant_id, dataset_id):
"presentation",
"picture",
"one",
"knowledge_graph",
"email",
"tag"
]

+ 1
- 1
api/settings.py View File

@@ -109,7 +109,7 @@ def init_settings():
API_KEY = LLM.get("api_key", "")
PARSERS = LLM.get(
"parsers",
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")

HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")

+ 1
- 12
docs/references/http_api_reference.md View File

@@ -393,7 +393,6 @@ curl --request POST \
- `"presentation"`: Presentation
- `"picture"`: Picture
- `"one"`: One
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
- `"email"`: Email

@@ -409,10 +408,6 @@ curl --request POST \
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
- `"chunk_token_count"`: Defaults to `128`.
- `"delimiter"`: Defaults to `"\n"`.
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`

#### Response

@@ -573,8 +568,6 @@ curl --request PUT \
- `"picture"`: Picture
- `"one"`:One
- `"email"`: Email
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!

#### Response

@@ -655,7 +648,7 @@ Success:
"id": "6e211ee0723611efa10a0242ac120007",
"language": "English",
"name": "mysql",
"chunk_method": "knowledge_graph",
"chunk_method": "naive",
"parser_config": {
"chunk_token_num": 8192,
"delimiter": "\\n",
@@ -841,10 +834,6 @@ curl --request PUT \
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
- `"chunk_token_count"`: Defaults to `128`.
- `"delimiter"`: Defaults to `"\n"`.
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`

#### Response


+ 0
- 4
docs/references/python_api_reference.md View File

@@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options:
- `"presentation"`: Presentation
- `"picture"`: Picture
- `"one"`: One
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
- `"email"`: Email

##### parser_config
@@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys:
- `"presentation"`: Presentation
- `"picture"`: Picture
- `"one"`: One
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
- `"email"`: Email
- `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
- `"chunk_method"`=`"naive"`:

+ 0
- 1
sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py View File

@@ -122,7 +122,6 @@ class TestDatasetCreation:
("presentation", "presentation", 0),
("picture", "picture", 0),
("one", "one", 0),
("picknowledge_graphture", "knowledge_graph", 0),
("email", "email", 0),
("tag", "tag", 0),
("empty_chunk_method", "", 0),

+ 1
- 2
sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py View File

@@ -114,14 +114,13 @@ class TestDatasetUpdate:
("presentation", 0, ""),
("picture", 0, ""),
("one", 0, ""),
("knowledge_graph", 0, ""),
("email", 0, ""),
("tag", 0, ""),
("", 0, ""),
(
"other_chunk_method",
102,
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
),
],
)

+ 2
- 4
sdk/python/test/test_sdk_api/t_dataset.py View File

@@ -38,8 +38,7 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture):
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
"knowledge_graph", "email"]
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
random_chunk_method = random.choice(valid_chunk_methods)
rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)

@@ -47,8 +46,7 @@ def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
"knowledge_graph", "email", "tag"]
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
chunk_method = "invalid_chunk_method"
with pytest.raises(Exception) as exc_info:
rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)

Loading…
Cancel
Save