| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 | 
							- import logging
 - import re
 - import threading
 - from collections import deque
 - from dataclasses import dataclass
 - from typing import Any, Union
 - 
 - from core.schemas.registry import SchemaRegistry
 - 
 - logger = logging.getLogger(__name__)
 - 
 - # Type aliases for better clarity
 - SchemaType = Union[dict[str, Any], list[Any], str, int, float, bool, None]
 - SchemaDict = dict[str, Any]
 - 
 - # Pre-compiled pattern for better performance
 - _DIFY_SCHEMA_PATTERN = re.compile(r"^https://dify\.ai/schemas/(v\d+)/(.+)\.json$")
 - 
 - 
 - class SchemaResolutionError(Exception):
 -     """Base exception for schema resolution errors"""
 - 
 -     pass
 - 
 - 
 - class CircularReferenceError(SchemaResolutionError):
 -     """Raised when a circular reference is detected"""
 - 
 -     def __init__(self, ref_uri: str, ref_path: list[str]):
 -         self.ref_uri = ref_uri
 -         self.ref_path = ref_path
 -         super().__init__(f"Circular reference detected: {ref_uri} in path {' -> '.join(ref_path)}")
 - 
 - 
 - class MaxDepthExceededError(SchemaResolutionError):
 -     """Raised when maximum resolution depth is exceeded"""
 - 
 -     def __init__(self, max_depth: int):
 -         self.max_depth = max_depth
 -         super().__init__(f"Maximum resolution depth ({max_depth}) exceeded")
 - 
 - 
 - class SchemaNotFoundError(SchemaResolutionError):
 -     """Raised when a referenced schema cannot be found"""
 - 
 -     def __init__(self, ref_uri: str):
 -         self.ref_uri = ref_uri
 -         super().__init__(f"Schema not found: {ref_uri}")
 - 
 - 
 - @dataclass
 - class QueueItem:
 -     """Represents an item in the BFS queue"""
 - 
 -     current: Any
 -     parent: Any | None
 -     key: Union[str, int] | None
 -     depth: int
 -     ref_path: set[str]
 - 
 - 
 - class SchemaResolver:
 -     """Resolver for Dify schema references with caching and optimizations"""
 - 
 -     _cache: dict[str, SchemaDict] = {}
 -     _cache_lock = threading.Lock()
 - 
 -     def __init__(self, registry: SchemaRegistry | None = None, max_depth: int = 10):
 -         """
 -         Initialize the schema resolver
 - 
 -         Args:
 -             registry: Schema registry to use (defaults to default registry)
 -             max_depth: Maximum depth for reference resolution
 -         """
 -         self.registry = registry or SchemaRegistry.default_registry()
 -         self.max_depth = max_depth
 - 
 -     @classmethod
 -     def clear_cache(cls) -> None:
 -         """Clear the global schema cache"""
 -         with cls._cache_lock:
 -             cls._cache.clear()
 - 
 -     def resolve(self, schema: SchemaType) -> SchemaType:
 -         """
 -         Resolve all $ref references in the schema
 - 
 -         Performance optimization: quickly checks for $ref presence before processing.
 - 
 -         Args:
 -             schema: Schema to resolve
 - 
 -         Returns:
 -             Resolved schema with all references expanded
 - 
 -         Raises:
 -             CircularReferenceError: If circular reference detected
 -             MaxDepthExceededError: If max depth exceeded
 -             SchemaNotFoundError: If referenced schema not found
 -         """
 -         if not isinstance(schema, (dict, list)):
 -             return schema
 - 
 -         # Fast path: if no Dify refs found, return original schema unchanged
 -         # This avoids expensive deepcopy and BFS traversal for schemas without refs
 -         if not _has_dify_refs(schema):
 -             return schema
 - 
 -         # Slow path: schema contains refs, perform full resolution
 -         import copy
 - 
 -         result = copy.deepcopy(schema)
 - 
 -         # Initialize BFS queue
 -         queue = deque([QueueItem(current=result, parent=None, key=None, depth=0, ref_path=set())])
 - 
 -         while queue:
 -             item = queue.popleft()
 - 
 -             # Process the current item
 -             self._process_queue_item(queue, item)
 - 
 -         return result
 - 
 -     def _process_queue_item(self, queue: deque, item: QueueItem) -> None:
 -         """Process a single queue item"""
 -         if isinstance(item.current, dict):
 -             self._process_dict(queue, item)
 -         elif isinstance(item.current, list):
 -             self._process_list(queue, item)
 - 
 -     def _process_dict(self, queue: deque, item: QueueItem) -> None:
 -         """Process a dictionary item"""
 -         ref_uri = item.current.get("$ref")
 - 
 -         if ref_uri and _is_dify_schema_ref(ref_uri):
 -             # Handle $ref resolution
 -             self._resolve_ref(queue, item, ref_uri)
 -         else:
 -             # Process nested items
 -             for key, value in item.current.items():
 -                 if isinstance(value, (dict, list)):
 -                     next_depth = item.depth + 1
 -                     if next_depth >= self.max_depth:
 -                         raise MaxDepthExceededError(self.max_depth)
 -                     queue.append(
 -                         QueueItem(current=value, parent=item.current, key=key, depth=next_depth, ref_path=item.ref_path)
 -                     )
 - 
 -     def _process_list(self, queue: deque, item: QueueItem) -> None:
 -         """Process a list item"""
 -         for idx, value in enumerate(item.current):
 -             if isinstance(value, (dict, list)):
 -                 next_depth = item.depth + 1
 -                 if next_depth >= self.max_depth:
 -                     raise MaxDepthExceededError(self.max_depth)
 -                 queue.append(
 -                     QueueItem(current=value, parent=item.current, key=idx, depth=next_depth, ref_path=item.ref_path)
 -                 )
 - 
 -     def _resolve_ref(self, queue: deque, item: QueueItem, ref_uri: str) -> None:
 -         """Resolve a $ref reference"""
 -         # Check for circular reference
 -         if ref_uri in item.ref_path:
 -             # Mark as circular and skip
 -             item.current["$circular_ref"] = True
 -             logger.warning("Circular reference detected: %s", ref_uri)
 -             return
 - 
 -         # Get resolved schema (from cache or registry)
 -         resolved_schema = self._get_resolved_schema(ref_uri)
 -         if not resolved_schema:
 -             logger.warning("Schema not found: %s", ref_uri)
 -             return
 - 
 -         # Update ref path
 -         new_ref_path = item.ref_path | {ref_uri}
 - 
 -         # Replace the reference with resolved schema
 -         next_depth = item.depth + 1
 -         if next_depth >= self.max_depth:
 -             raise MaxDepthExceededError(self.max_depth)
 - 
 -         if item.parent is None:
 -             # Root level replacement
 -             item.current.clear()
 -             item.current.update(resolved_schema)
 -             queue.append(
 -                 QueueItem(current=item.current, parent=None, key=None, depth=next_depth, ref_path=new_ref_path)
 -             )
 -         else:
 -             # Update parent container
 -             item.parent[item.key] = resolved_schema.copy()
 -             queue.append(
 -                 QueueItem(
 -                     current=item.parent[item.key],
 -                     parent=item.parent,
 -                     key=item.key,
 -                     depth=next_depth,
 -                     ref_path=new_ref_path,
 -                 )
 -             )
 - 
 -     def _get_resolved_schema(self, ref_uri: str) -> SchemaDict | None:
 -         """Get resolved schema from cache or registry"""
 -         # Check cache first
 -         with self._cache_lock:
 -             if ref_uri in self._cache:
 -                 return self._cache[ref_uri].copy()
 - 
 -         # Fetch from registry
 -         schema = self.registry.get_schema(ref_uri)
 -         if not schema:
 -             return None
 - 
 -         # Clean and cache
 -         cleaned = _remove_metadata_fields(schema)
 -         with self._cache_lock:
 -             self._cache[ref_uri] = cleaned
 - 
 -         return cleaned.copy()
 - 
 - 
 - def resolve_dify_schema_refs(
 -     schema: SchemaType, registry: SchemaRegistry | None = None, max_depth: int = 30
 - ) -> SchemaType:
 -     """
 -     Resolve $ref references in Dify schema to actual schema content
 - 
 -     This is a convenience function that creates a resolver and resolves the schema.
 -     Performance optimization: quickly checks for $ref presence before processing.
 - 
 -     Args:
 -         schema: Schema object that may contain $ref references
 -         registry: Optional schema registry, defaults to default registry
 -         max_depth: Maximum depth to prevent infinite loops (default: 30)
 - 
 -     Returns:
 -         Schema with all $ref references resolved to actual content
 - 
 -     Raises:
 -         CircularReferenceError: If circular reference detected
 -         MaxDepthExceededError: If maximum depth exceeded
 -         SchemaNotFoundError: If referenced schema not found
 -     """
 -     # Fast path: if no Dify refs found, return original schema unchanged
 -     # This avoids expensive deepcopy and BFS traversal for schemas without refs
 -     if not _has_dify_refs(schema):
 -         return schema
 - 
 -     # Slow path: schema contains refs, perform full resolution
 -     resolver = SchemaResolver(registry, max_depth)
 -     return resolver.resolve(schema)
 - 
 - 
 - def _remove_metadata_fields(schema: dict) -> dict:
 -     """
 -     Remove metadata fields from schema that shouldn't be included in resolved output
 - 
 -     Args:
 -         schema: Schema dictionary
 - 
 -     Returns:
 -         Cleaned schema without metadata fields
 -     """
 -     # Create a copy and remove metadata fields
 -     cleaned = schema.copy()
 -     metadata_fields = ["$id", "$schema", "version"]
 - 
 -     for field in metadata_fields:
 -         cleaned.pop(field, None)
 - 
 -     return cleaned
 - 
 - 
 - def _is_dify_schema_ref(ref_uri: Any) -> bool:
 -     """
 -     Check if the reference URI is a Dify schema reference
 - 
 -     Args:
 -         ref_uri: URI to check
 - 
 -     Returns:
 -         True if it's a Dify schema reference
 -     """
 -     if not isinstance(ref_uri, str):
 -         return False
 - 
 -     # Use pre-compiled pattern for better performance
 -     return bool(_DIFY_SCHEMA_PATTERN.match(ref_uri))
 - 
 - 
 - def _has_dify_refs_recursive(schema: SchemaType) -> bool:
 -     """
 -     Recursively check if a schema contains any Dify $ref references
 - 
 -     This is the fallback method when string-based detection is not possible.
 - 
 -     Args:
 -         schema: Schema to check for references
 - 
 -     Returns:
 -         True if any Dify $ref is found, False otherwise
 -     """
 -     if isinstance(schema, dict):
 -         # Check if this dict has a $ref field
 -         ref_uri = schema.get("$ref")
 -         if ref_uri and _is_dify_schema_ref(ref_uri):
 -             return True
 - 
 -         # Check nested values
 -         for value in schema.values():
 -             if _has_dify_refs_recursive(value):
 -                 return True
 - 
 -     elif isinstance(schema, list):
 -         # Check each item in the list
 -         for item in schema:
 -             if _has_dify_refs_recursive(item):
 -                 return True
 - 
 -     # Primitive types don't contain refs
 -     return False
 - 
 - 
 - def _has_dify_refs_hybrid(schema: SchemaType) -> bool:
 -     """
 -     Hybrid detection: fast string scan followed by precise recursive check
 - 
 -     Performance optimization using two-phase detection:
 -     1. Fast string scan to quickly eliminate schemas without $ref
 -     2. Precise recursive validation only for potential candidates
 - 
 -     Args:
 -         schema: Schema to check for references
 - 
 -     Returns:
 -         True if any Dify $ref is found, False otherwise
 -     """
 -     # Phase 1: Fast string-based pre-filtering
 -     try:
 -         import json
 - 
 -         schema_str = json.dumps(schema, separators=(",", ":"))
 - 
 -         # Quick elimination: no $ref at all
 -         if '"$ref"' not in schema_str:
 -             return False
 - 
 -         # Quick elimination: no Dify schema URLs
 -         if "https://dify.ai/schemas/" not in schema_str:
 -             return False
 - 
 -     except (TypeError, ValueError, OverflowError):
 -         # JSON serialization failed (e.g., circular references, non-serializable objects)
 -         # Fall back to recursive detection
 -         logger.debug("JSON serialization failed for schema, using recursive detection")
 -         return _has_dify_refs_recursive(schema)
 - 
 -     # Phase 2: Precise recursive validation
 -     # Only executed for schemas that passed string pre-filtering
 -     return _has_dify_refs_recursive(schema)
 - 
 - 
 - def _has_dify_refs(schema: SchemaType) -> bool:
 -     """
 -     Check if a schema contains any Dify $ref references
 - 
 -     Uses hybrid detection for optimal performance:
 -     - Fast string scan for quick elimination
 -     - Precise recursive check for validation
 - 
 -     Args:
 -         schema: Schema to check for references
 - 
 -     Returns:
 -         True if any Dify $ref is found, False otherwise
 -     """
 -     return _has_dify_refs_hybrid(schema)
 - 
 - 
 - def parse_dify_schema_uri(uri: str) -> tuple[str, str]:
 -     """
 -     Parse a Dify schema URI to extract version and schema name
 - 
 -     Args:
 -         uri: Schema URI to parse
 - 
 -     Returns:
 -         Tuple of (version, schema_name) or ("", "") if invalid
 -     """
 -     match = _DIFY_SCHEMA_PATTERN.match(uri)
 -     if not match:
 -         return "", ""
 - 
 -     return match.group(1), match.group(2)
 
 
  |