### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoringtags/v0.18.0
| @@ -69,7 +69,7 @@ def create(tenant_id): | |||
| chunk_method: | |||
| type: string | |||
| enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", | |||
| "presentation", "picture", "one", "knowledge_graph", "email", "tag" | |||
| "presentation", "picture", "one", "email", "tag" | |||
| ] | |||
| description: Chunking method. | |||
| parser_config: | |||
| @@ -105,7 +105,6 @@ def create(tenant_id): | |||
| "presentation", | |||
| "picture", | |||
| "one", | |||
| "knowledge_graph", | |||
| "email", | |||
| "tag" | |||
| ] | |||
| @@ -315,7 +314,7 @@ def update(tenant_id, dataset_id): | |||
| chunk_method: | |||
| type: string | |||
| enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", | |||
| "presentation", "picture", "one", "knowledge_graph", "email", "tag" | |||
| "presentation", "picture", "one", "email", "tag" | |||
| ] | |||
| description: Updated chunking method. | |||
| parser_config: | |||
| @@ -353,7 +352,6 @@ def update(tenant_id, dataset_id): | |||
| "presentation", | |||
| "picture", | |||
| "one", | |||
| "knowledge_graph", | |||
| "email", | |||
| "tag" | |||
| ] | |||
| @@ -109,7 +109,7 @@ def init_settings(): | |||
| API_KEY = LLM.get("api_key", "") | |||
| PARSERS = LLM.get( | |||
| "parsers", | |||
| "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag") | |||
| "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag") | |||
| HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1") | |||
| HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port") | |||
| @@ -393,7 +393,6 @@ curl --request POST \ | |||
| - `"presentation"`: Presentation | |||
| - `"picture"`: Picture | |||
| - `"one"`: One | |||
| - `"knowledge_graph"`: Knowledge Graph | |||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||
| - `"email"`: Email | |||
| @@ -409,10 +408,6 @@ curl --request POST \ | |||
| - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: | |||
| - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. | |||
| - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. | |||
| - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: | |||
| - `"chunk_token_count"`: Defaults to `128`. | |||
| - `"delimiter"`: Defaults to `"\n"`. | |||
| - `"entity_types"`: Defaults to `["organization","person","location","event","time"]` | |||
| #### Response | |||
| @@ -573,8 +568,6 @@ curl --request PUT \ | |||
| - `"picture"`: Picture | |||
| - `"one"`:One | |||
| - `"email"`: Email | |||
| - `"knowledge_graph"`: Knowledge Graph | |||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||
| #### Response | |||
| @@ -655,7 +648,7 @@ Success: | |||
| "id": "6e211ee0723611efa10a0242ac120007", | |||
| "language": "English", | |||
| "name": "mysql", | |||
| "chunk_method": "knowledge_graph", | |||
| "chunk_method": "naive", | |||
| "parser_config": { | |||
| "chunk_token_num": 8192, | |||
| "delimiter": "\\n", | |||
| @@ -841,10 +834,6 @@ curl --request PUT \ | |||
| - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: | |||
| - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. | |||
| - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. | |||
| - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: | |||
| - `"chunk_token_count"`: Defaults to `128`. | |||
| - `"delimiter"`: Defaults to `"\n"`. | |||
| - `"entity_types"`: Defaults to `["organization","person","location","event","time"]` | |||
| #### Response | |||
| @@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options: | |||
| - `"presentation"`: Presentation | |||
| - `"picture"`: Picture | |||
| - `"one"`: One | |||
| - `"knowledge_graph"`: Knowledge Graph | |||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||
| - `"email"`: Email | |||
| ##### parser_config | |||
| @@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys: | |||
| - `"presentation"`: Presentation | |||
| - `"picture"`: Picture | |||
| - `"one"`: One | |||
| - `"knowledge_graph"`: Knowledge Graph | |||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||
| - `"email"`: Email | |||
| - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`: | |||
| - `"chunk_method"`=`"naive"`: | |||
| @@ -122,7 +122,6 @@ class TestDatasetCreation: | |||
| ("presentation", "presentation", 0), | |||
| ("picture", "picture", 0), | |||
| ("one", "one", 0), | |||
| ("picknowledge_graphture", "knowledge_graph", 0), | |||
| ("email", "email", 0), | |||
| ("tag", "tag", 0), | |||
| ("empty_chunk_method", "", 0), | |||
| @@ -114,14 +114,13 @@ class TestDatasetUpdate: | |||
| ("presentation", 0, ""), | |||
| ("picture", 0, ""), | |||
| ("one", 0, ""), | |||
| ("knowledge_graph", 0, ""), | |||
| ("email", 0, ""), | |||
| ("tag", 0, ""), | |||
| ("", 0, ""), | |||
| ( | |||
| "other_chunk_method", | |||
| 102, | |||
| "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']", | |||
| "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']", | |||
| ), | |||
| ], | |||
| ) | |||
| @@ -38,8 +38,7 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture): | |||
| def test_create_dataset_with_random_chunk_method(get_api_key_fixture): | |||
| API_KEY = get_api_key_fixture | |||
| rag = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", | |||
| "knowledge_graph", "email"] | |||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"] | |||
| random_chunk_method = random.choice(valid_chunk_methods) | |||
| rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method) | |||
| @@ -47,8 +46,7 @@ def test_create_dataset_with_random_chunk_method(get_api_key_fixture): | |||
| def test_create_dataset_with_invalid_parameter(get_api_key_fixture): | |||
| API_KEY = get_api_key_fixture | |||
| rag = RAGFlow(API_KEY, HOST_ADDRESS) | |||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", | |||
| "knowledge_graph", "email", "tag"] | |||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"] | |||
| chunk_method = "invalid_chunk_method" | |||
| with pytest.raises(Exception) as exc_info: | |||
| rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method) | |||