Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>tags/1.7.2
| @marshal_with(dataset_metadata_fields) | @marshal_with(dataset_metadata_fields) | ||||
| def post(self, dataset_id): | def post(self, dataset_id): | ||||
| parser = reqparse.RequestParser() | parser = reqparse.RequestParser() | ||||
| parser.add_argument("type", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("type", type=str, required=True, nullable=False, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| metadata_args = MetadataArgs(**args) | metadata_args = MetadataArgs(**args) | ||||
| @marshal_with(dataset_metadata_fields) | @marshal_with(dataset_metadata_fields) | ||||
| def patch(self, dataset_id, metadata_id): | def patch(self, dataset_id, metadata_id): | ||||
| parser = reqparse.RequestParser() | parser = reqparse.RequestParser() | ||||
| parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| dataset_id_str = str(dataset_id) | dataset_id_str = str(dataset_id) | ||||
| DatasetService.check_dataset_permission(dataset, current_user) | DatasetService.check_dataset_permission(dataset, current_user) | ||||
| parser = reqparse.RequestParser() | parser = reqparse.RequestParser() | ||||
| parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json") | |||||
| parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| metadata_args = MetadataOperationData(**args) | metadata_args = MetadataOperationData(**args) | ||||
| @cloud_edition_billing_rate_limit_check("knowledge", "dataset") | @cloud_edition_billing_rate_limit_check("knowledge", "dataset") | ||||
| def post(self, tenant_id, dataset_id): | def post(self, tenant_id, dataset_id): | ||||
| parser = reqparse.RequestParser() | parser = reqparse.RequestParser() | ||||
| parser.add_argument("type", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("type", type=str, required=True, nullable=False, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| metadata_args = MetadataArgs(**args) | metadata_args = MetadataArgs(**args) | ||||
| @cloud_edition_billing_rate_limit_check("knowledge", "dataset") | @cloud_edition_billing_rate_limit_check("knowledge", "dataset") | ||||
| def patch(self, tenant_id, dataset_id, metadata_id): | def patch(self, tenant_id, dataset_id, metadata_id): | ||||
| parser = reqparse.RequestParser() | parser = reqparse.RequestParser() | ||||
| parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| dataset_id_str = str(dataset_id) | dataset_id_str = str(dataset_id) | ||||
| DatasetService.check_dataset_permission(dataset, current_user) | DatasetService.check_dataset_permission(dataset, current_user) | ||||
| parser = reqparse.RequestParser() | parser = reqparse.RequestParser() | ||||
| parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json") | |||||
| parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| metadata_args = MetadataOperationData(**args) | metadata_args = MetadataOperationData(**args) | ||||
| from unittest.mock import Mock, patch | |||||
| import pytest | |||||
| from flask_restful import reqparse | |||||
| from werkzeug.exceptions import BadRequest | |||||
| from services.entities.knowledge_entities.knowledge_entities import MetadataArgs | |||||
| from services.metadata_service import MetadataService | |||||
| class TestMetadataBugCompleteValidation: | |||||
| """Complete test suite to verify the metadata nullable bug and its fix.""" | |||||
| def test_1_pydantic_layer_validation(self): | |||||
| """Test Layer 1: Pydantic model validation correctly rejects None values.""" | |||||
| # Pydantic should reject None values for required fields | |||||
| with pytest.raises((ValueError, TypeError)): | |||||
| MetadataArgs(type=None, name=None) | |||||
| with pytest.raises((ValueError, TypeError)): | |||||
| MetadataArgs(type="string", name=None) | |||||
| with pytest.raises((ValueError, TypeError)): | |||||
| MetadataArgs(type=None, name="test") | |||||
| # Valid values should work | |||||
| valid_args = MetadataArgs(type="string", name="test_name") | |||||
| assert valid_args.type == "string" | |||||
| assert valid_args.name == "test_name" | |||||
| def test_2_business_logic_layer_crashes_on_none(self): | |||||
| """Test Layer 2: Business logic crashes when None values slip through.""" | |||||
| # Create mock that bypasses Pydantic validation | |||||
| mock_metadata_args = Mock() | |||||
| mock_metadata_args.name = None | |||||
| mock_metadata_args.type = "string" | |||||
| with patch("services.metadata_service.current_user") as mock_user: | |||||
| mock_user.current_tenant_id = "tenant-123" | |||||
| mock_user.id = "user-456" | |||||
| # Should crash with TypeError | |||||
| with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): | |||||
| MetadataService.create_metadata("dataset-123", mock_metadata_args) | |||||
| # Test update method as well | |||||
| with patch("services.metadata_service.current_user") as mock_user: | |||||
| mock_user.current_tenant_id = "tenant-123" | |||||
| mock_user.id = "user-456" | |||||
| with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): | |||||
| MetadataService.update_metadata_name("dataset-123", "metadata-456", None) | |||||
| def test_3_database_constraints_verification(self): | |||||
| """Test Layer 3: Verify database model has nullable=False constraints.""" | |||||
| from sqlalchemy import inspect | |||||
| from models.dataset import DatasetMetadata | |||||
| # Get table info | |||||
| mapper = inspect(DatasetMetadata) | |||||
| # Check that type and name columns are not nullable | |||||
| type_column = mapper.columns["type"] | |||||
| name_column = mapper.columns["name"] | |||||
| assert type_column.nullable is False, "type column should be nullable=False" | |||||
| assert name_column.nullable is False, "name column should be nullable=False" | |||||
| def test_4_fixed_api_layer_rejects_null(self, app): | |||||
| """Test Layer 4: Fixed API configuration properly rejects null values.""" | |||||
| # Test Console API create endpoint (fixed) | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("type", type=str, required=True, nullable=False, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): | |||||
| with pytest.raises(BadRequest): | |||||
| parser.parse_args() | |||||
| # Test with just name being null | |||||
| with app.test_request_context(json={"type": "string", "name": None}, content_type="application/json"): | |||||
| with pytest.raises(BadRequest): | |||||
| parser.parse_args() | |||||
| # Test with just type being null | |||||
| with app.test_request_context(json={"type": None, "name": "test"}, content_type="application/json"): | |||||
| with pytest.raises(BadRequest): | |||||
| parser.parse_args() | |||||
| def test_5_fixed_api_accepts_valid_values(self, app): | |||||
| """Test that fixed API still accepts valid non-null values.""" | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("type", type=str, required=True, nullable=False, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| with app.test_request_context(json={"type": "string", "name": "valid_name"}, content_type="application/json"): | |||||
| args = parser.parse_args() | |||||
| assert args["type"] == "string" | |||||
| assert args["name"] == "valid_name" | |||||
| def test_6_simulated_buggy_behavior(self, app): | |||||
| """Test simulating the original buggy behavior with nullable=True.""" | |||||
| # Simulate the old buggy configuration | |||||
| buggy_parser = reqparse.RequestParser() | |||||
| buggy_parser.add_argument("type", type=str, required=True, nullable=True, location="json") | |||||
| buggy_parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): | |||||
| # This would pass in the buggy version | |||||
| args = buggy_parser.parse_args() | |||||
| assert args["type"] is None | |||||
| assert args["name"] is None | |||||
| # But would crash when trying to create MetadataArgs | |||||
| with pytest.raises((ValueError, TypeError)): | |||||
| MetadataArgs(**args) | |||||
| def test_7_end_to_end_validation_layers(self): | |||||
| """Test all validation layers work together correctly.""" | |||||
| # Layer 1: API should reject null at parameter level (with fix) | |||||
| # Layer 2: Pydantic should reject null at model level | |||||
| # Layer 3: Business logic expects non-null | |||||
| # Layer 4: Database enforces non-null | |||||
| # Test that valid data flows through all layers | |||||
| valid_data = {"type": "string", "name": "test_metadata"} | |||||
| # Should create valid Pydantic object | |||||
| metadata_args = MetadataArgs(**valid_data) | |||||
| assert metadata_args.type == "string" | |||||
| assert metadata_args.name == "test_metadata" | |||||
| # Should not crash in business logic length check | |||||
| assert len(metadata_args.name) <= 255 # This should not crash | |||||
| assert len(metadata_args.type) > 0 # This should not crash | |||||
| def test_8_verify_specific_fix_locations(self): | |||||
| """Verify that the specific locations mentioned in bug report are fixed.""" | |||||
| # Read the actual files to verify fixes | |||||
| import os | |||||
| # Console API create | |||||
| console_create_file = "api/controllers/console/datasets/metadata.py" | |||||
| if os.path.exists(console_create_file): | |||||
| with open(console_create_file) as f: | |||||
| content = f.read() | |||||
| # Should contain nullable=False, not nullable=True | |||||
| assert "nullable=True" not in content.split("class DatasetMetadataCreateApi")[1].split("class")[0] | |||||
| # Service API create | |||||
| service_create_file = "api/controllers/service_api/dataset/metadata.py" | |||||
| if os.path.exists(service_create_file): | |||||
| with open(service_create_file) as f: | |||||
| content = f.read() | |||||
| # Should contain nullable=False, not nullable=True | |||||
| create_api_section = content.split("class DatasetMetadataCreateServiceApi")[1].split("class")[0] | |||||
| assert "nullable=True" not in create_api_section | |||||
| class TestMetadataValidationSummary: | |||||
| """Summary tests that demonstrate the complete validation architecture.""" | |||||
| def test_validation_layer_architecture(self): | |||||
| """Document and test the 4-layer validation architecture.""" | |||||
| # Layer 1: API Parameter Validation (Flask-RESTful reqparse) | |||||
| # - Role: First line of defense, validates HTTP request parameters | |||||
| # - Fixed: nullable=False ensures null values are rejected at API boundary | |||||
| # Layer 2: Pydantic Model Validation | |||||
| # - Role: Validates data structure and types before business logic | |||||
| # - Working: Required fields without Optional[] reject None values | |||||
| # Layer 3: Business Logic Validation | |||||
| # - Role: Domain-specific validation (length checks, uniqueness, etc.) | |||||
| # - Vulnerable: Direct len() calls crash on None values | |||||
| # Layer 4: Database Constraints | |||||
| # - Role: Final data integrity enforcement | |||||
| # - Working: nullable=False prevents None values in database | |||||
| # The bug was: Layer 1 allowed None, but Layers 2-4 expected non-None | |||||
| # The fix: Make Layer 1 consistent with Layers 2-4 | |||||
| assert True # This test documents the architecture | |||||
| if __name__ == "__main__": | |||||
| pytest.main([__file__, "-v"]) |
| from unittest.mock import Mock, patch | |||||
| import pytest | |||||
| from flask_restful import reqparse | |||||
| from services.entities.knowledge_entities.knowledge_entities import MetadataArgs | |||||
| from services.metadata_service import MetadataService | |||||
| class TestMetadataNullableBug: | |||||
| """Test case to reproduce the metadata nullable validation bug.""" | |||||
| def test_metadata_args_with_none_values_should_fail(self): | |||||
| """Test that MetadataArgs validation should reject None values.""" | |||||
| # This test demonstrates the expected behavior - should fail validation | |||||
| with pytest.raises((ValueError, TypeError)): | |||||
| # This should fail because Pydantic expects non-None values | |||||
| MetadataArgs(type=None, name=None) | |||||
| def test_metadata_service_create_with_none_name_crashes(self): | |||||
| """Test that MetadataService.create_metadata crashes when name is None.""" | |||||
| # Mock the MetadataArgs to bypass Pydantic validation | |||||
| mock_metadata_args = Mock() | |||||
| mock_metadata_args.name = None # This will cause len() to crash | |||||
| mock_metadata_args.type = "string" | |||||
| with patch("services.metadata_service.current_user") as mock_user: | |||||
| mock_user.current_tenant_id = "tenant-123" | |||||
| mock_user.id = "user-456" | |||||
| # This should crash with TypeError when calling len(None) | |||||
| with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): | |||||
| MetadataService.create_metadata("dataset-123", mock_metadata_args) | |||||
| def test_metadata_service_update_with_none_name_crashes(self): | |||||
| """Test that MetadataService.update_metadata_name crashes when name is None.""" | |||||
| with patch("services.metadata_service.current_user") as mock_user: | |||||
| mock_user.current_tenant_id = "tenant-123" | |||||
| mock_user.id = "user-456" | |||||
| # This should crash with TypeError when calling len(None) | |||||
| with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): | |||||
| MetadataService.update_metadata_name("dataset-123", "metadata-456", None) | |||||
| def test_api_parser_accepts_null_values(self, app): | |||||
| """Test that API parser configuration incorrectly accepts null values.""" | |||||
| # Simulate the current API parser configuration | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("type", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| # Simulate request data with null values | |||||
| with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): | |||||
| # This should parse successfully due to nullable=True | |||||
| args = parser.parse_args() | |||||
| # Verify that null values are accepted | |||||
| assert args["type"] is None | |||||
| assert args["name"] is None | |||||
| # This demonstrates the bug: API accepts None but business logic will crash | |||||
| def test_integration_bug_scenario(self, app): | |||||
| """Test the complete bug scenario from API to service layer.""" | |||||
| # Step 1: API parser accepts null values (current buggy behavior) | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("type", type=str, required=True, nullable=True, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=True, location="json") | |||||
| with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): | |||||
| args = parser.parse_args() | |||||
| # Step 2: Try to create MetadataArgs with None values | |||||
| # This should fail at Pydantic validation level | |||||
| with pytest.raises((ValueError, TypeError)): | |||||
| metadata_args = MetadataArgs(**args) | |||||
| # Step 3: If we bypass Pydantic (simulating the bug scenario) | |||||
| # Move this outside the request context to avoid Flask-Login issues | |||||
| mock_metadata_args = Mock() | |||||
| mock_metadata_args.name = None # From args["name"] | |||||
| mock_metadata_args.type = None # From args["type"] | |||||
| with patch("services.metadata_service.current_user") as mock_user: | |||||
| mock_user.current_tenant_id = "tenant-123" | |||||
| mock_user.id = "user-456" | |||||
| # Step 4: Service layer crashes on len(None) | |||||
| with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): | |||||
| MetadataService.create_metadata("dataset-123", mock_metadata_args) | |||||
| def test_correct_nullable_false_configuration_works(self, app): | |||||
| """Test that the correct nullable=False configuration works as expected.""" | |||||
| # This tests the FIXED configuration | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("type", type=str, required=True, nullable=False, location="json") | |||||
| parser.add_argument("name", type=str, required=True, nullable=False, location="json") | |||||
| with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): | |||||
| # This should fail with BadRequest due to nullable=False | |||||
| from werkzeug.exceptions import BadRequest | |||||
| with pytest.raises(BadRequest): | |||||
| parser.parse_args() | |||||
| if __name__ == "__main__": | |||||
| pytest.main([__file__, "-v"]) |