refactor(resolver): implement BFS approach for resolving references in Dify schemas

This commit is contained in:
Harry 2025-08-26 13:31:05 +08:00
parent 1d2d0ff49f
commit 7e20273bce
1 changed files with 117 additions and 19 deletions

View File

@ -1,4 +1,5 @@
import re
from collections import deque
from typing import Any, Optional
from core.schemas.registry import SchemaRegistry
@ -7,49 +8,146 @@ from core.schemas.registry import SchemaRegistry
def resolve_dify_schema_refs(schema: Any, registry: Optional[SchemaRegistry] = None, max_depth: int = 10) -> Any:
"""
Resolve $ref references in Dify schema to actual schema content
Args:
schema: Schema object that may contain $ref references
registry: Optional schema registry, defaults to default registry
max_depth: Maximum recursion depth to prevent infinite loops (default: 10)
max_depth: Maximum depth to prevent infinite loops (default: 10)
Returns:
Schema with all $ref references resolved to actual content
Raises:
RecursionError: If maximum recursion depth is exceeded
RecursionError: If maximum depth is exceeded
"""
if registry is None:
registry = SchemaRegistry.default_registry()
return _resolve_refs_recursive(schema, registry, max_depth, 0)
return _resolve_refs_bfs(schema, registry, max_depth)
def _resolve_refs_bfs(schema: Any, registry: SchemaRegistry, max_depth: int) -> Any:
"""
Resolve $ref references using Breadth-First Search (BFS) approach with cycle detection
Args:
schema: Schema object to process
registry: Schema registry for lookups
max_depth: Maximum allowed depth
Returns:
Schema with references resolved
Raises:
RecursionError: If maximum depth exceeded or circular reference detected
"""
import copy
# Deep copy the schema to avoid modifying original
result = copy.deepcopy(schema)
# Queue stores tuples: (current_value, parent_container, key_or_index, depth, ref_path)
# parent_container is the dict/list that contains current_value
# key_or_index is the key (for dict) or index (for list) to access current_value in parent
# ref_path is a tuple of resolved reference URIs to detect cycles
queue = deque([(result, None, None, 0, ())])
while queue:
current, parent, key, depth, ref_path = queue.popleft()
# Process based on type
if isinstance(current, dict):
# Check if this is a $ref reference
if "$ref" in current:
ref_uri = current["$ref"]
# Only resolve Dify schema references
if _is_dify_schema_ref(ref_uri):
# Check for circular reference
if ref_uri in ref_path:
# Found a cycle - leave the ref as-is to avoid infinite loop
# Could also raise an error here if preferred
current["$circular_ref"] = True # Mark as circular for debugging
continue
resolved_schema = registry.get_schema(ref_uri)
if resolved_schema:
# Remove metadata fields from resolved schema
cleaned_schema = _remove_metadata_fields(resolved_schema)
# Check depth limit before adding to queue
if depth + 1 > max_depth:
raise RecursionError(
f"Maximum depth ({max_depth}) exceeded while resolving schema references"
)
# Update ref_path with current reference
new_ref_path = ref_path + (ref_uri,)
# Replace the reference with resolved schema
if parent is None:
# Root level replacement
result = copy.deepcopy(cleaned_schema)
# Add the resolved schema back to queue for further processing
queue.append((result, None, None, depth + 1, new_ref_path))
else:
# Update parent container (works for both dict and list)
if isinstance(parent, (dict, list)):
parent[key] = copy.deepcopy(cleaned_schema)
# Add the resolved schema to queue for further processing
queue.append((parent[key], parent, key, depth + 1, new_ref_path))
# If schema not found, leave the original ref as-is
# Non-Dify reference, leave as-is
else:
# Regular dict, add all values to queue for processing
for k, v in current.items():
if isinstance(v, (dict, list)):
# Check depth limit before adding to queue
if depth + 1 > max_depth:
raise RecursionError(
f"Maximum depth ({max_depth}) exceeded while resolving schema references"
)
queue.append((v, current, k, depth + 1, ref_path))
elif isinstance(current, list):
# Process list items
for idx, item in enumerate(current):
if isinstance(item, (dict, list)):
# Check depth limit before adding to queue (fixed: should be > not >=)
if depth + 1 > max_depth:
raise RecursionError(f"Maximum depth ({max_depth}) exceeded while resolving schema references")
queue.append((item, current, idx, depth + 1, ref_path))
# Primitive values don't need processing
return result
def _resolve_refs_recursive(schema: Any, registry: SchemaRegistry, max_depth: int, current_depth: int) -> Any:
"""
Recursively resolve $ref references in schema
Args:
schema: Schema object to process
registry: Schema registry for lookups
max_depth: Maximum allowed recursion depth
current_depth: Current recursion depth
Returns:
Schema with references resolved
Raises:
RecursionError: If maximum depth exceeded
"""
# Check recursion depth
if current_depth >= max_depth:
raise RecursionError(f"Maximum recursion depth ({max_depth}) exceeded while resolving schema references")
if isinstance(schema, dict):
# Check if this is a $ref reference
if "$ref" in schema:
ref_uri = schema["$ref"]
# Only resolve Dify schema references
if _is_dify_schema_ref(ref_uri):
resolved_schema = registry.get_schema(ref_uri)
@ -70,11 +168,11 @@ def _resolve_refs_recursive(schema: Any, registry: SchemaRegistry, max_depth: in
for key, value in schema.items():
resolved_dict[key] = _resolve_refs_recursive(value, registry, max_depth, current_depth + 1)
return resolved_dict
elif isinstance(schema, list):
# Process list items recursively
return [_resolve_refs_recursive(item, registry, max_depth, current_depth + 1) for item in schema]
else:
# Primitive value, return as-is
return schema
@ -86,14 +184,14 @@ def _remove_metadata_fields(schema: dict) -> dict:
"""
if not isinstance(schema, dict):
return schema
# Create a copy and remove metadata fields
cleaned = schema.copy()
metadata_fields = ["$id", "$schema", "version"]
for field in metadata_fields:
cleaned.pop(field, None)
return cleaned
@ -103,7 +201,7 @@ def _is_dify_schema_ref(ref_uri: str) -> bool:
"""
if not isinstance(ref_uri, str):
return False
# Match Dify schema URI pattern: https://dify.ai/schemas/v*/name.json
pattern = r"^https://dify\.ai/schemas/(v\d+)/(.+)\.json$"
return bool(re.match(pattern, ref_uri))
return bool(re.match(pattern, ref_uri))