|
|
|
@@ -311,8 +311,7 @@ class TestDatasetCreation: |
|
|
|
("filename_embd_weight_mid", {"filename_embd_weight": 0.5}), |
|
|
|
("filename_embd_weight_max", {"filename_embd_weight": 1.0}), |
|
|
|
("task_page_size_min", {"task_page_size": 1}), |
|
|
|
("task_page_size_mid", {"task_page_size": 5_000}), |
|
|
|
("task_page_size_max", {"task_page_size": 10_000}), |
|
|
|
("task_page_size_None", {"task_page_size": None}), |
|
|
|
("pages", {"pages": [[1, 100]]}), |
|
|
|
("pages_none", None), |
|
|
|
("graphrag_true", {"graphrag": {"use_graphrag": True}}), |
|
|
|
@@ -337,8 +336,6 @@ class TestDatasetCreation: |
|
|
|
("raptor_max_cluster_mid", {"raptor": {"max_cluster": 512}}), |
|
|
|
("raptor_max_cluster_max", {"raptor": {"max_cluster": 1024}}), |
|
|
|
("raptor_random_seed_min", {"raptor": {"random_seed": 0}}), |
|
|
|
("raptor_random_seed_mid", {"raptor": {"random_seed": 5_000}}), |
|
|
|
("raptor_random_seed_max", {"raptor": {"random_seed": 10_000}}), |
|
|
|
], |
|
|
|
ids=[ |
|
|
|
"default_none", |
|
|
|
@@ -366,8 +363,7 @@ class TestDatasetCreation: |
|
|
|
"filename_embd_weight_mid", |
|
|
|
"filename_embd_weight_max", |
|
|
|
"task_page_size_min", |
|
|
|
"task_page_size_mid", |
|
|
|
"task_page_size_max", |
|
|
|
"task_page_size_None", |
|
|
|
"pages", |
|
|
|
"pages_none", |
|
|
|
"graphrag_true", |
|
|
|
@@ -392,8 +388,6 @@ class TestDatasetCreation: |
|
|
|
"raptor_max_cluster_mid", |
|
|
|
"raptor_max_cluster_max", |
|
|
|
"raptor_random_seed_min", |
|
|
|
"raptor_random_seed_mid", |
|
|
|
"raptor_random_seed_max", |
|
|
|
], |
|
|
|
) |
|
|
|
def test_valid_parser_config(self, get_http_api_auth, name, parser_config): |
|
|
|
@@ -462,7 +456,6 @@ class TestDatasetCreation: |
|
|
|
("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"), |
|
|
|
("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"), |
|
|
|
("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"), |
|
|
|
("task_page_size_max_limit", {"task_page_size": 10_001}, "Input should be less than or equal to 10000"), |
|
|
|
("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"), |
|
|
|
("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"), |
|
|
|
("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"), |
|
|
|
@@ -490,7 +483,6 @@ class TestDatasetCreation: |
|
|
|
("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"), |
|
|
|
("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), |
|
|
|
("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"), |
|
|
|
("raptor_random_seed_max_limit", {"raptor": {"random_seed": 10_001}}, "Input should be less than or equal to 10000"), |
|
|
|
("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"), |
|
|
|
("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"), |
|
|
|
("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config have at most 65535 characters"), |
|
|
|
@@ -520,7 +512,6 @@ class TestDatasetCreation: |
|
|
|
"filename_embd_weight_max_limit", |
|
|
|
"filename_embd_weight_type_invalid", |
|
|
|
"task_page_size_min_limit", |
|
|
|
"task_page_size_max_limit", |
|
|
|
"task_page_size_float_not_allowed", |
|
|
|
"task_page_size_type_invalid", |
|
|
|
"pages_not_list", |
|
|
|
@@ -548,7 +539,6 @@ class TestDatasetCreation: |
|
|
|
"raptor_max_cluster_float_not_allowed", |
|
|
|
"raptor_max_cluster_type_invalid", |
|
|
|
"raptor_random_seed_min_limit", |
|
|
|
"raptor_random_seed_max_limit", |
|
|
|
"raptor_random_seed_float_not_allowed", |
|
|
|
"raptor_random_seed_type_invalid", |
|
|
|
"parser_config_type_invalid", |