### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoring

6 月之前 · 94181a990b
--- a/api/apps/sdk/dataset.py
+++ b/api/apps/sdk/dataset.py
            chunk_method:
              type: string
              enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
                     "presentation", "picture", "one", "knowledge_graph", "email", "tag"
                     "presentation", "picture", "one", "email", "tag"
                     ]
              description: Chunking method.
            parser_config:
        "presentation",
        "picture",
        "one",
        "knowledge_graph",
        "email",
        "tag"
    ]
            chunk_method:
              type: string
              enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
                     "presentation", "picture", "one", "knowledge_graph", "email", "tag"
                     "presentation", "picture", "one", "email", "tag"
                     ]
              description: Updated chunking method.
            parser_config:
        "presentation",
        "picture",
        "one",
        "knowledge_graph",
        "email",
        "tag"
    ]
--- a/api/settings.py
+++ b/api/settings.py
    API_KEY = LLM.get("api_key", "")
    PARSERS = LLM.get(
        "parsers",
        "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
        "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")
    HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
    HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
  - `"presentation"`: Presentation
  - `"picture"`: Picture
  - `"one"`: One
  - `"knowledge_graph"`: Knowledge Graph  
    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
  - `"email"`: Email
  - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:  
    - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
  - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
  - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:  
    - `"chunk_token_count"`: Defaults to `128`.
    - `"delimiter"`: Defaults to `"\n"`.
    - `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
 #### Response
  - `"picture"`: Picture
  - `"one"`:One
  - `"email"`: Email
  - `"knowledge_graph"`: Knowledge Graph  
    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
 #### Response
            "id": "6e211ee0723611efa10a0242ac120007",
            "language": "English",
            "name": "mysql",
            "chunk_method": "knowledge_graph",
            "chunk_method": "naive",
            "parser_config": {
                "chunk_token_num": 8192,
                "delimiter": "\\n",
  - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
    - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
  - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
  - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
    - `"chunk_token_count"`: Defaults to `128`.
    - `"delimiter"`: Defaults to `"\n"`.
    - `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
 #### Response
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
 - `"presentation"`: Presentation
 - `"picture"`: Picture
 - `"one"`: One
 - `"knowledge_graph"`: Knowledge Graph  
  Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
 - `"email"`: Email
 ##### parser_config
  - `"presentation"`: Presentation
  - `"picture"`: Picture
  - `"one"`: One
  - `"knowledge_graph"`: Knowledge Graph  
    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
  - `"email"`: Email
 - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
  - `"chunk_method"`=`"naive"`:  
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
            ("presentation", "presentation", 0),
            ("picture", "picture", 0),
            ("one", "one", 0),
            ("picknowledge_graphture", "knowledge_graph", 0),
            ("email", "email", 0),
            ("tag", "tag", 0),
            ("empty_chunk_method", "", 0),
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
            ("presentation", 0, ""),
            ("picture", 0, ""),
            ("one", 0, ""),
            ("knowledge_graph", 0, ""),
            ("email", 0, ""),
            ("tag", 0, ""),
            ("", 0, ""),
            (
                "other_chunk_method",
                102,
                "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
                "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
            ),
        ],
    )
--- a/sdk/python/test/test_sdk_api/t_dataset.py
+++ b/sdk/python/test/test_sdk_api/t_dataset.py
 def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
                           "knowledge_graph", "email"]
    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
    random_chunk_method = random.choice(valid_chunk_methods)
    rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)
 def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
                           "knowledge_graph", "email", "tag"]
    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
    chunk_method = "invalid_chunk_method"
    with pytest.raises(Exception) as exc_info:
        rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)