### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoringtags/v0.18.0
| chunk_method: | chunk_method: | ||||
| type: string | type: string | ||||
| enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", | enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", | ||||
| "presentation", "picture", "one", "knowledge_graph", "email", "tag" | |||||
| "presentation", "picture", "one", "email", "tag" | |||||
| ] | ] | ||||
| description: Chunking method. | description: Chunking method. | ||||
| parser_config: | parser_config: | ||||
| "presentation", | "presentation", | ||||
| "picture", | "picture", | ||||
| "one", | "one", | ||||
| "knowledge_graph", | |||||
| "email", | "email", | ||||
| "tag" | "tag" | ||||
| ] | ] | ||||
| chunk_method: | chunk_method: | ||||
| type: string | type: string | ||||
| enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", | enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", | ||||
| "presentation", "picture", "one", "knowledge_graph", "email", "tag" | |||||
| "presentation", "picture", "one", "email", "tag" | |||||
| ] | ] | ||||
| description: Updated chunking method. | description: Updated chunking method. | ||||
| parser_config: | parser_config: | ||||
| "presentation", | "presentation", | ||||
| "picture", | "picture", | ||||
| "one", | "one", | ||||
| "knowledge_graph", | |||||
| "email", | "email", | ||||
| "tag" | "tag" | ||||
| ] | ] |
| API_KEY = LLM.get("api_key", "") | API_KEY = LLM.get("api_key", "") | ||||
| PARSERS = LLM.get( | PARSERS = LLM.get( | ||||
| "parsers", | "parsers", | ||||
| "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag") | |||||
| "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag") | |||||
| HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1") | HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1") | ||||
| HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port") | HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port") |
| - `"presentation"`: Presentation | - `"presentation"`: Presentation | ||||
| - `"picture"`: Picture | - `"picture"`: Picture | ||||
| - `"one"`: One | - `"one"`: One | ||||
| - `"knowledge_graph"`: Knowledge Graph | |||||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | ||||
| - `"email"`: Email | - `"email"`: Email | ||||
| - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: | - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: | ||||
| - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. | - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. | ||||
| - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. | - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. | ||||
| - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: | |||||
| - `"chunk_token_count"`: Defaults to `128`. | |||||
| - `"delimiter"`: Defaults to `"\n"`. | |||||
| - `"entity_types"`: Defaults to `["organization","person","location","event","time"]` | |||||
| #### Response | #### Response | ||||
| - `"picture"`: Picture | - `"picture"`: Picture | ||||
| - `"one"`:One | - `"one"`:One | ||||
| - `"email"`: Email | - `"email"`: Email | ||||
| - `"knowledge_graph"`: Knowledge Graph | |||||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||||
| #### Response | #### Response | ||||
| "id": "6e211ee0723611efa10a0242ac120007", | "id": "6e211ee0723611efa10a0242ac120007", | ||||
| "language": "English", | "language": "English", | ||||
| "name": "mysql", | "name": "mysql", | ||||
| "chunk_method": "knowledge_graph", | |||||
| "chunk_method": "naive", | |||||
| "parser_config": { | "parser_config": { | ||||
| "chunk_token_num": 8192, | "chunk_token_num": 8192, | ||||
| "delimiter": "\\n", | "delimiter": "\\n", | ||||
| - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: | - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: | ||||
| - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. | - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. | ||||
| - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. | - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. | ||||
| - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: | |||||
| - `"chunk_token_count"`: Defaults to `128`. | |||||
| - `"delimiter"`: Defaults to `"\n"`. | |||||
| - `"entity_types"`: Defaults to `["organization","person","location","event","time"]` | |||||
| #### Response | #### Response | ||||
| - `"presentation"`: Presentation | - `"presentation"`: Presentation | ||||
| - `"picture"`: Picture | - `"picture"`: Picture | ||||
| - `"one"`: One | - `"one"`: One | ||||
| - `"knowledge_graph"`: Knowledge Graph | |||||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||||
| - `"email"`: Email | - `"email"`: Email | ||||
| ##### parser_config | ##### parser_config | ||||
| - `"presentation"`: Presentation | - `"presentation"`: Presentation | ||||
| - `"picture"`: Picture | - `"picture"`: Picture | ||||
| - `"one"`: One | - `"one"`: One | ||||
| - `"knowledge_graph"`: Knowledge Graph | |||||
| Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! | |||||
| - `"email"`: Email | - `"email"`: Email | ||||
| - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`: | - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`: | ||||
| - `"chunk_method"`=`"naive"`: | - `"chunk_method"`=`"naive"`: |
| ("presentation", "presentation", 0), | ("presentation", "presentation", 0), | ||||
| ("picture", "picture", 0), | ("picture", "picture", 0), | ||||
| ("one", "one", 0), | ("one", "one", 0), | ||||
| ("picknowledge_graphture", "knowledge_graph", 0), | |||||
| ("email", "email", 0), | ("email", "email", 0), | ||||
| ("tag", "tag", 0), | ("tag", "tag", 0), | ||||
| ("empty_chunk_method", "", 0), | ("empty_chunk_method", "", 0), |
| ("presentation", 0, ""), | ("presentation", 0, ""), | ||||
| ("picture", 0, ""), | ("picture", 0, ""), | ||||
| ("one", 0, ""), | ("one", 0, ""), | ||||
| ("knowledge_graph", 0, ""), | |||||
| ("email", 0, ""), | ("email", 0, ""), | ||||
| ("tag", 0, ""), | ("tag", 0, ""), | ||||
| ("", 0, ""), | ("", 0, ""), | ||||
| ( | ( | ||||
| "other_chunk_method", | "other_chunk_method", | ||||
| 102, | 102, | ||||
| "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']", | |||||
| "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']", | |||||
| ), | ), | ||||
| ], | ], | ||||
| ) | ) |
| def test_create_dataset_with_random_chunk_method(get_api_key_fixture): | def test_create_dataset_with_random_chunk_method(get_api_key_fixture): | ||||
| API_KEY = get_api_key_fixture | API_KEY = get_api_key_fixture | ||||
| rag = RAGFlow(API_KEY, HOST_ADDRESS) | rag = RAGFlow(API_KEY, HOST_ADDRESS) | ||||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", | |||||
| "knowledge_graph", "email"] | |||||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"] | |||||
| random_chunk_method = random.choice(valid_chunk_methods) | random_chunk_method = random.choice(valid_chunk_methods) | ||||
| rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method) | rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method) | ||||
| def test_create_dataset_with_invalid_parameter(get_api_key_fixture): | def test_create_dataset_with_invalid_parameter(get_api_key_fixture): | ||||
| API_KEY = get_api_key_fixture | API_KEY = get_api_key_fixture | ||||
| rag = RAGFlow(API_KEY, HOST_ADDRESS) | rag = RAGFlow(API_KEY, HOST_ADDRESS) | ||||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", | |||||
| "knowledge_graph", "email", "tag"] | |||||
| valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"] | |||||
| chunk_method = "invalid_chunk_method" | chunk_method = "invalid_chunk_method" | ||||
| with pytest.raises(Exception) as exc_info: | with pytest.raises(Exception) as exc_info: | ||||
| rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method) | rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method) |