Ver código fonte

refactor(console): add spec controller import and enhance tool output schema resolution

tags/2.0.0-beta.1
Harry 2 meses atrás
pai
commit
c911ac8c01

+ 1
- 1
api/controllers/console/__init__.py Ver arquivo

@@ -43,7 +43,7 @@ api.add_resource(AppImportConfirmApi, "/apps/imports/<string:import_id>/confirm"
api.add_resource(AppImportCheckDependenciesApi, "/apps/imports/<string:app_id>/check-dependencies")

# Import other controllers
from . import admin, apikey, extension, feature, ping, setup, version
from . import admin, apikey, extension, feature, ping, setup, spec, version

# Import app controllers
from .app import (

+ 35
- 0
api/controllers/console/spec.py Ver arquivo

@@ -0,0 +1,35 @@
import logging

from flask_restful import Resource

from controllers.console import api
from controllers.console.wraps import (
account_initialization_required,
setup_required,
)
from core.schemas.schema_manager import SchemaManager
from libs.login import login_required

logger = logging.getLogger(__name__)


class SpecSchemaDefinitionsApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
"""
Get system JSON Schema definitions specification
Used for frontend component type mapping
"""
try:
schema_manager = SchemaManager()
schema_definitions = schema_manager.get_all_schema_definitions()
return schema_definitions, 200
except Exception:
logger.exception("Failed to get schema definitions from local registry")
# Return empty array as fallback
return [], 200


api.add_resource(SpecSchemaDefinitionsApi, "/spec/schema-definitions")

+ 7
- 0
api/core/plugin/impl/tool.py Ver arquivo

@@ -9,6 +9,7 @@ from core.plugin.entities.plugin_daemon import (
PluginToolProviderEntity,
)
from core.plugin.impl.base import BasePluginClient
from core.schemas.resolver import resolve_dify_schema_refs
from core.tools.entities.tool_entities import CredentialType, ToolInvokeMessage, ToolParameter


@@ -24,6 +25,9 @@ class PluginToolManager(BasePluginClient):
provider_name = declaration.get("identity", {}).get("name")
for tool in declaration.get("tools", []):
tool["identity"]["provider"] = provider_name
# resolve refs
if tool.get("output_schema"):
tool["output_schema"] = resolve_dify_schema_refs(tool["output_schema"])

return json_response

@@ -55,6 +59,9 @@ class PluginToolManager(BasePluginClient):
if data:
for tool in data.get("declaration", {}).get("tools", []):
tool["identity"]["provider"] = tool_provider_id.provider_name
# resolve refs
if tool.get("output_schema"):
tool["output_schema"] = resolve_dify_schema_refs(tool["output_schema"])

return json_response


+ 5
- 0
api/core/schemas/__init__.py Ver arquivo

@@ -0,0 +1,5 @@
# Schema management package

from .resolver import resolve_dify_schema_refs

__all__ = ["resolve_dify_schema_refs"]

+ 43
- 0
api/core/schemas/builtin/schemas/v1/file.json Ver arquivo

@@ -0,0 +1,43 @@
{
"$id": "https://dify.ai/schemas/v1/file.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"version": "1.0.0",
"type": "object",
"title": "File Schema",
"description": "Schema for file objects (v1)",
"properties": {
"name": {
"type": "string",
"description": "file name"
},
"size": {
"type": "number",
"description": "file size"
},
"extension": {
"type": "string",
"description": "file extension"
},
"type": {
"type": "string",
"description": "file type"
},
"mime_type": {
"type": "string",
"description": "file mime type"
},
"transfer_method": {
"type": "string",
"description": "file transfer method"
},
"url": {
"type": "string",
"description": "file url"
},
"related_id": {
"type": "string",
"description": "file related id"
}
},
"required": ["name"]
}

+ 11
- 0
api/core/schemas/builtin/schemas/v1/general_structure.json Ver arquivo

@@ -0,0 +1,11 @@
{
"$id": "https://dify.ai/schemas/v1/general_structure.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"version": "1.0.0",
"type": "array",
"title": "General Structure Schema",
"description": "Schema for general structure (v1) - array of strings",
"items": {
"type": "string"
}
}

+ 36
- 0
api/core/schemas/builtin/schemas/v1/parent_child_structure.json Ver arquivo

@@ -0,0 +1,36 @@
{
"$id": "https://dify.ai/schemas/v1/parent_child_structure.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"version": "1.0.0",
"type": "object",
"title": "Parent-Child Structure Schema",
"description": "Schema for parent-child structure (v1)",
"properties": {
"parent_mode": {
"type": "string",
"description": "The mode of parent-child relationship"
},
"parent_child_chunks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"parent_content": {
"type": "string",
"description": "The parent content"
},
"child_contents": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of child contents"
}
},
"required": ["parent_content", "child_contents"]
},
"description": "List of parent-child chunk pairs"
}
},
"required": ["parent_mode", "parent_child_chunks"]
}

+ 29
- 0
api/core/schemas/builtin/schemas/v1/qa_structure.json Ver arquivo

@@ -0,0 +1,29 @@
{
"$id": "https://dify.ai/schemas/v1/qa_structure.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"version": "1.0.0",
"type": "object",
"title": "Q&A Structure Schema",
"description": "Schema for question-answer structure (v1)",
"properties": {
"qa_chunks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"question": {
"type": "string",
"description": "The question"
},
"answer": {
"type": "string",
"description": "The answer"
}
},
"required": ["question", "answer"]
},
"description": "List of question-answer pairs"
}
},
"required": ["qa_chunks"]
}

+ 139
- 0
api/core/schemas/registry.py Ver arquivo

@@ -0,0 +1,139 @@
import json
import threading
from collections.abc import Mapping, MutableMapping
from pathlib import Path
from typing import Any, ClassVar, Optional


class SchemaRegistry:
"""Schema registry manages JSON schemas with version support"""
_default_instance: ClassVar[Optional["SchemaRegistry"]] = None
_lock: ClassVar[threading.Lock] = threading.Lock()

def __init__(self, base_dir: str):
self.base_dir = Path(base_dir)
self.versions: MutableMapping[str, MutableMapping[str, Any]] = {}
self.metadata: MutableMapping[str, MutableMapping[str, Any]] = {}

@classmethod
def default_registry(cls) -> "SchemaRegistry":
"""Returns the default schema registry for builtin schemas (thread-safe singleton)"""
if cls._default_instance is None:
with cls._lock:
# Double-checked locking pattern
if cls._default_instance is None:
current_dir = Path(__file__).parent
schema_dir = current_dir / "builtin" / "schemas"
registry = cls(str(schema_dir))
registry.load_all_versions()
cls._default_instance = registry
return cls._default_instance

def load_all_versions(self) -> None:
"""Scans the schema directory and loads all versions"""
if not self.base_dir.exists():
return
for entry in self.base_dir.iterdir():
if not entry.is_dir():
continue
version = entry.name
if not version.startswith("v"):
continue
self._load_version_dir(version, entry)

def _load_version_dir(self, version: str, version_dir: Path) -> None:
"""Loads all schemas in a version directory"""
if not version_dir.exists():
return
if version not in self.versions:
self.versions[version] = {}
for entry in version_dir.iterdir():
if entry.suffix != ".json":
continue
schema_name = entry.stem
self._load_schema(version, schema_name, entry)

def _load_schema(self, version: str, schema_name: str, schema_path: Path) -> None:
"""Loads a single schema file"""
try:
with open(schema_path, encoding="utf-8") as f:
schema = json.load(f)
# Store the schema
self.versions[version][schema_name] = schema
# Extract and store metadata
uri = f"https://dify.ai/schemas/{version}/{schema_name}.json"
metadata = {
"version": version,
"title": schema.get("title", ""),
"description": schema.get("description", ""),
"deprecated": schema.get("deprecated", False),
}
self.metadata[uri] = metadata
except (OSError, json.JSONDecodeError) as e:
print(f"Warning: failed to load schema {version}/{schema_name}: {e}")


def get_schema(self, uri: str) -> Optional[Any]:
"""Retrieves a schema by URI with version support"""
version, schema_name = self._parse_uri(uri)
if not version or not schema_name:
return None
version_schemas = self.versions.get(version)
if not version_schemas:
return None
return version_schemas.get(schema_name)

def _parse_uri(self, uri: str) -> tuple[str, str]:
"""Parses a schema URI to extract version and schema name"""
import re
pattern = r"^https://dify\.ai/schemas/(v\d+)/(.+)\.json$"
match = re.match(pattern, uri)
if not match:
return "", ""
version = match.group(1)
schema_name = match.group(2)
return version, schema_name

def list_versions(self) -> list[str]:
"""Returns all available versions"""
return sorted(self.versions.keys())

def list_schemas(self, version: str) -> list[str]:
"""Returns all schemas in a specific version"""
version_schemas = self.versions.get(version)
if not version_schemas:
return []
return sorted(version_schemas.keys())

def get_all_schemas_for_version(self, version: str = "v1") -> list[Mapping[str, Any]]:
"""Returns all schemas for a version in the API format"""
version_schemas = self.versions.get(version, {})
result = []
for schema_name, schema in version_schemas.items():
result.append({
"name": schema_name,
"schema": schema
})
return result

+ 109
- 0
api/core/schemas/resolver.py Ver arquivo

@@ -0,0 +1,109 @@
import re
from typing import Any, Optional

from core.schemas.registry import SchemaRegistry


def resolve_dify_schema_refs(schema: Any, registry: Optional[SchemaRegistry] = None, max_depth: int = 10) -> Any:
"""
Resolve $ref references in Dify schema to actual schema content
Args:
schema: Schema object that may contain $ref references
registry: Optional schema registry, defaults to default registry
max_depth: Maximum recursion depth to prevent infinite loops (default: 10)
Returns:
Schema with all $ref references resolved to actual content
Raises:
RecursionError: If maximum recursion depth is exceeded
"""
if registry is None:
registry = SchemaRegistry.default_registry()
return _resolve_refs_recursive(schema, registry, max_depth, 0)


def _resolve_refs_recursive(schema: Any, registry: SchemaRegistry, max_depth: int, current_depth: int) -> Any:
"""
Recursively resolve $ref references in schema
Args:
schema: Schema object to process
registry: Schema registry for lookups
max_depth: Maximum allowed recursion depth
current_depth: Current recursion depth
Returns:
Schema with references resolved
Raises:
RecursionError: If maximum depth exceeded
"""
# Check recursion depth
if current_depth >= max_depth:
raise RecursionError(f"Maximum recursion depth ({max_depth}) exceeded while resolving schema references")
if isinstance(schema, dict):
# Check if this is a $ref reference
if "$ref" in schema:
ref_uri = schema["$ref"]
# Only resolve Dify schema references
if _is_dify_schema_ref(ref_uri):
resolved_schema = registry.get_schema(ref_uri)
if resolved_schema:
# Remove metadata fields from resolved schema
cleaned_schema = _remove_metadata_fields(resolved_schema)
# Recursively resolve the cleaned schema in case it contains more refs
return _resolve_refs_recursive(cleaned_schema, registry, max_depth, current_depth + 1)
else:
# If schema not found, return original ref (might be external or invalid)
return schema
else:
# Non-Dify reference, return as-is
return schema
else:
# Regular dict, recursively process all values
resolved_dict = {}
for key, value in schema.items():
resolved_dict[key] = _resolve_refs_recursive(value, registry, max_depth, current_depth + 1)
return resolved_dict
elif isinstance(schema, list):
# Process list items recursively
return [_resolve_refs_recursive(item, registry, max_depth, current_depth + 1) for item in schema]
else:
# Primitive value, return as-is
return schema


def _remove_metadata_fields(schema: dict) -> dict:
"""
Remove metadata fields from schema that shouldn't be included in resolved output
"""
if not isinstance(schema, dict):
return schema
# Create a copy and remove metadata fields
cleaned = schema.copy()
metadata_fields = ["$id", "$schema", "version"]
for field in metadata_fields:
cleaned.pop(field, None)
return cleaned


def _is_dify_schema_ref(ref_uri: str) -> bool:
"""
Check if the reference URI is a Dify schema reference
"""
if not isinstance(ref_uri, str):
return False
# Match Dify schema URI pattern: https://dify.ai/schemas/v*/name.json
pattern = r"^https://dify\.ai/schemas/(v\d+)/(.+)\.json$"
return bool(re.match(pattern, ref_uri))

+ 65
- 0
api/core/schemas/schema_manager.py Ver arquivo

@@ -0,0 +1,65 @@
from collections.abc import Mapping
from typing import Any, Optional

from core.schemas.registry import SchemaRegistry


class SchemaManager:
"""Schema manager provides high-level schema operations"""

def __init__(self, registry: Optional[SchemaRegistry] = None):
self.registry = registry or SchemaRegistry.default_registry()

def get_all_schema_definitions(self, version: str = "v1") -> list[Mapping[str, Any]]:
"""
Get all JSON Schema definitions for a specific version
Args:
version: Schema version, defaults to v1
Returns:
Array containing schema definitions, each element contains name and schema fields
"""
return self.registry.get_all_schemas_for_version(version)

def get_schema_by_name(self, schema_name: str, version: str = "v1") -> Optional[Mapping[str, Any]]:
"""
Get a specific schema by name
Args:
schema_name: Schema name
version: Schema version, defaults to v1
Returns:
Dictionary containing name and schema, returns None if not found
"""
uri = f"https://dify.ai/schemas/{version}/{schema_name}.json"
schema = self.registry.get_schema(uri)
if schema:
return {
"name": schema_name,
"schema": schema
}
return None

def list_available_schemas(self, version: str = "v1") -> list[str]:
"""
List all available schema names for a specific version
Args:
version: Schema version, defaults to v1
Returns:
List of schema names
"""
return self.registry.list_schemas(version)

def list_available_versions(self) -> list[str]:
"""
List all available schema versions
Returns:
List of versions
"""
return self.registry.list_versions()

+ 1
- 0
api/tests/unit_tests/core/schemas/__init__.py Ver arquivo

@@ -0,0 +1 @@
# Core schemas unit tests

+ 160
- 0
api/tests/unit_tests/core/schemas/test_resolver.py Ver arquivo

@@ -0,0 +1,160 @@

import pytest

from core.schemas import resolve_dify_schema_refs
from core.schemas.registry import SchemaRegistry


class TestSchemaResolver:
"""Test cases for schema reference resolution"""

def setup_method(self):
"""Setup method to initialize test resources"""
self.registry = SchemaRegistry.default_registry()

def test_simple_ref_resolution(self):
"""Test resolving a simple $ref to a complete schema"""
schema_with_ref = {
"$ref": "https://dify.ai/schemas/v1/qa_structure.json"
}
resolved = resolve_dify_schema_refs(schema_with_ref)
# Should be resolved to the actual qa_structure schema
assert resolved["type"] == "object"
assert resolved["title"] == "Q&A Structure Schema"
assert "qa_chunks" in resolved["properties"]
assert resolved["properties"]["qa_chunks"]["type"] == "array"
# Metadata fields should be removed
assert "$id" not in resolved
assert "$schema" not in resolved
assert "version" not in resolved

def test_nested_object_with_refs(self):
"""Test resolving $refs within nested object structures"""
nested_schema = {
"type": "object",
"properties": {
"file_data": {
"$ref": "https://dify.ai/schemas/v1/file.json"
},
"metadata": {
"type": "string",
"description": "Additional metadata"
}
}
}
resolved = resolve_dify_schema_refs(nested_schema)
# Original structure should be preserved
assert resolved["type"] == "object"
assert "metadata" in resolved["properties"]
assert resolved["properties"]["metadata"]["type"] == "string"
# $ref should be resolved
file_schema = resolved["properties"]["file_data"]
assert file_schema["type"] == "object"
assert file_schema["title"] == "File Schema"
assert "name" in file_schema["properties"]
# Metadata fields should be removed from resolved schema
assert "$id" not in file_schema
assert "$schema" not in file_schema
assert "version" not in file_schema

def test_array_items_ref_resolution(self):
"""Test resolving $refs in array items"""
array_schema = {
"type": "array",
"items": {
"$ref": "https://dify.ai/schemas/v1/general_structure.json"
},
"description": "Array of general structures"
}
resolved = resolve_dify_schema_refs(array_schema)
# Array structure should be preserved
assert resolved["type"] == "array"
assert resolved["description"] == "Array of general structures"
# Items $ref should be resolved
items_schema = resolved["items"]
assert items_schema["type"] == "array"
assert items_schema["title"] == "General Structure Schema"

def test_non_dify_ref_unchanged(self):
"""Test that non-Dify $refs are left unchanged"""
external_ref_schema = {
"type": "object",
"properties": {
"external_data": {
"$ref": "https://example.com/external-schema.json"
},
"dify_data": {
"$ref": "https://dify.ai/schemas/v1/file.json"
}
}
}
resolved = resolve_dify_schema_refs(external_ref_schema)
# External $ref should remain unchanged
assert resolved["properties"]["external_data"]["$ref"] == "https://example.com/external-schema.json"
# Dify $ref should be resolved
assert resolved["properties"]["dify_data"]["type"] == "object"
assert resolved["properties"]["dify_data"]["title"] == "File Schema"

def test_no_refs_schema_unchanged(self):
"""Test that schemas without $refs are returned unchanged"""
simple_schema = {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Name field"
},
"items": {
"type": "array",
"items": {
"type": "number"
}
}
},
"required": ["name"]
}
resolved = resolve_dify_schema_refs(simple_schema)
# Should be identical to input
assert resolved == simple_schema
assert resolved["type"] == "object"
assert resolved["properties"]["name"]["type"] == "string"
assert resolved["properties"]["items"]["items"]["type"] == "number"
assert resolved["required"] == ["name"]

def test_recursion_depth_protection(self):
"""Test that excessive recursion depth is prevented"""
# Create a moderately nested structure
deep_schema = {"$ref": "https://dify.ai/schemas/v1/qa_structure.json"}
# Wrap it in fewer layers to make the test more reasonable
for _ in range(2):
deep_schema = {
"type": "object",
"properties": {
"nested": deep_schema
}
}
# Should handle normal cases fine with reasonable depth
resolved = resolve_dify_schema_refs(deep_schema, max_depth=25)
assert resolved is not None
assert resolved["type"] == "object"
# Should raise error with very low max_depth
with pytest.raises(RecursionError, match="Maximum recursion depth"):
resolve_dify_schema_refs(deep_schema, max_depth=5)

Carregando…
Cancelar
Salvar