mirror of
https://github.com/langgenius/dify.git
synced 2026-04-28 20:17:29 +08:00
refactor(graph_engine): Merge error strategies into error_handler.py
Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
parent
e060d7c28c
commit
d52621fce3
@ -1,20 +1,12 @@
|
|||||||
"""
|
"""
|
||||||
Error handling strategies for graph engine.
|
Error handling for graph engine.
|
||||||
|
|
||||||
This package implements different error recovery strategies using
|
This package provides error handling functionality for managing
|
||||||
the Strategy pattern for clean separation of concerns.
|
node execution failures with different recovery strategies.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .abort_strategy import AbortStrategy
|
|
||||||
from .default_value_strategy import DefaultValueStrategy
|
|
||||||
from .error_handler import ErrorHandler
|
from .error_handler import ErrorHandler
|
||||||
from .fail_branch_strategy import FailBranchStrategy
|
|
||||||
from .retry_strategy import RetryStrategy
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"AbortStrategy",
|
|
||||||
"DefaultValueStrategy",
|
|
||||||
"ErrorHandler",
|
"ErrorHandler",
|
||||||
"FailBranchStrategy",
|
|
||||||
"RetryStrategy",
|
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,40 +0,0 @@
|
|||||||
"""
|
|
||||||
Abort error strategy implementation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import final
|
|
||||||
|
|
||||||
from core.workflow.graph import Graph
|
|
||||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@final
|
|
||||||
class AbortStrategy:
|
|
||||||
"""
|
|
||||||
Error strategy that aborts execution on failure.
|
|
||||||
|
|
||||||
This is the default strategy when no other strategy is specified.
|
|
||||||
It stops the entire graph execution when a node fails.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def handle_error(self, event: NodeRunFailedEvent, graph: Graph, retry_count: int) -> GraphNodeEventBase | None:
|
|
||||||
"""
|
|
||||||
Handle error by aborting execution.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
event: The failure event
|
|
||||||
graph: The workflow graph
|
|
||||||
retry_count: Current retry attempt count (unused)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
None - signals abortion
|
|
||||||
"""
|
|
||||||
_ = graph
|
|
||||||
_ = retry_count
|
|
||||||
logger.error("Node %s failed with ABORT strategy: %s", event.node_id, event.error)
|
|
||||||
|
|
||||||
# Return None to signal that execution should stop
|
|
||||||
return None
|
|
||||||
@ -1,58 +0,0 @@
|
|||||||
"""
|
|
||||||
Default value error strategy implementation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import final
|
|
||||||
|
|
||||||
from core.workflow.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
|
|
||||||
from core.workflow.graph import Graph
|
|
||||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunExceptionEvent, NodeRunFailedEvent
|
|
||||||
from core.workflow.node_events import NodeRunResult
|
|
||||||
|
|
||||||
|
|
||||||
@final
|
|
||||||
class DefaultValueStrategy:
|
|
||||||
"""
|
|
||||||
Error strategy that uses default values on failure.
|
|
||||||
|
|
||||||
This strategy allows nodes to fail gracefully by providing
|
|
||||||
predefined default output values.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def handle_error(self, event: NodeRunFailedEvent, graph: Graph, retry_count: int) -> GraphNodeEventBase | None:
|
|
||||||
"""
|
|
||||||
Handle error by using default values.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
event: The failure event
|
|
||||||
graph: The workflow graph
|
|
||||||
retry_count: Current retry attempt count (unused)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
NodeRunExceptionEvent with default values
|
|
||||||
"""
|
|
||||||
_ = retry_count
|
|
||||||
node = graph.nodes[event.node_id]
|
|
||||||
|
|
||||||
outputs = {
|
|
||||||
**node.default_value_dict,
|
|
||||||
"error_message": event.node_run_result.error,
|
|
||||||
"error_type": event.node_run_result.error_type,
|
|
||||||
}
|
|
||||||
|
|
||||||
return NodeRunExceptionEvent(
|
|
||||||
id=event.id,
|
|
||||||
node_id=event.node_id,
|
|
||||||
node_type=event.node_type,
|
|
||||||
start_at=event.start_at,
|
|
||||||
node_run_result=NodeRunResult(
|
|
||||||
status=WorkflowNodeExecutionStatus.EXCEPTION,
|
|
||||||
inputs=event.node_run_result.inputs,
|
|
||||||
process_data=event.node_run_result.process_data,
|
|
||||||
outputs=outputs,
|
|
||||||
metadata={
|
|
||||||
WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: ErrorStrategy.DEFAULT_VALUE,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
error=event.error,
|
|
||||||
)
|
|
||||||
@ -2,20 +2,31 @@
|
|||||||
Main error handler that coordinates error strategies.
|
Main error handler that coordinates error strategies.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
from typing import TYPE_CHECKING, final
|
from typing import TYPE_CHECKING, final
|
||||||
|
|
||||||
from core.workflow.enums import ErrorStrategy as ErrorStrategyEnum
|
from core.workflow.enums import (
|
||||||
|
ErrorStrategy as ErrorStrategyEnum,
|
||||||
|
)
|
||||||
|
from core.workflow.enums import (
|
||||||
|
WorkflowNodeExecutionMetadataKey,
|
||||||
|
WorkflowNodeExecutionStatus,
|
||||||
|
)
|
||||||
from core.workflow.graph import Graph
|
from core.workflow.graph import Graph
|
||||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent
|
from core.workflow.graph_events import (
|
||||||
|
GraphNodeEventBase,
|
||||||
from .abort_strategy import AbortStrategy
|
NodeRunExceptionEvent,
|
||||||
from .default_value_strategy import DefaultValueStrategy
|
NodeRunFailedEvent,
|
||||||
from .fail_branch_strategy import FailBranchStrategy
|
NodeRunRetryEvent,
|
||||||
from .retry_strategy import RetryStrategy
|
)
|
||||||
|
from core.workflow.node_events import NodeRunResult
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..domain import GraphExecution
|
from ..domain import GraphExecution
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
class ErrorHandler:
|
class ErrorHandler:
|
||||||
@ -38,12 +49,6 @@ class ErrorHandler:
|
|||||||
self._graph = graph
|
self._graph = graph
|
||||||
self._graph_execution = graph_execution
|
self._graph_execution = graph_execution
|
||||||
|
|
||||||
# Initialize strategies
|
|
||||||
self._abort_strategy = AbortStrategy()
|
|
||||||
self._retry_strategy = RetryStrategy()
|
|
||||||
self._fail_branch_strategy = FailBranchStrategy()
|
|
||||||
self._default_value_strategy = DefaultValueStrategy()
|
|
||||||
|
|
||||||
def handle_node_failure(self, event: NodeRunFailedEvent) -> GraphNodeEventBase | None:
|
def handle_node_failure(self, event: NodeRunFailedEvent) -> GraphNodeEventBase | None:
|
||||||
"""
|
"""
|
||||||
Handle a node failure event.
|
Handle a node failure event.
|
||||||
@ -64,7 +69,7 @@ class ErrorHandler:
|
|||||||
|
|
||||||
# First check if retry is configured and not exhausted
|
# First check if retry is configured and not exhausted
|
||||||
if node.retry and retry_count < node.retry_config.max_retries:
|
if node.retry and retry_count < node.retry_config.max_retries:
|
||||||
result = self._retry_strategy.handle_error(event, self._graph, retry_count)
|
result = self._handle_retry(event, retry_count)
|
||||||
if result:
|
if result:
|
||||||
# Retry count will be incremented when NodeRunRetryEvent is handled
|
# Retry count will be incremented when NodeRunRetryEvent is handled
|
||||||
return result
|
return result
|
||||||
@ -74,8 +79,133 @@ class ErrorHandler:
|
|||||||
|
|
||||||
match strategy:
|
match strategy:
|
||||||
case None:
|
case None:
|
||||||
return self._abort_strategy.handle_error(event, self._graph, retry_count)
|
return self._handle_abort(event)
|
||||||
case ErrorStrategyEnum.FAIL_BRANCH:
|
case ErrorStrategyEnum.FAIL_BRANCH:
|
||||||
return self._fail_branch_strategy.handle_error(event, self._graph, retry_count)
|
return self._handle_fail_branch(event)
|
||||||
case ErrorStrategyEnum.DEFAULT_VALUE:
|
case ErrorStrategyEnum.DEFAULT_VALUE:
|
||||||
return self._default_value_strategy.handle_error(event, self._graph, retry_count)
|
return self._handle_default_value(event)
|
||||||
|
|
||||||
|
def _handle_abort(self, event: NodeRunFailedEvent):
|
||||||
|
"""
|
||||||
|
Handle error by aborting execution.
|
||||||
|
|
||||||
|
This is the default strategy when no other strategy is specified.
|
||||||
|
It stops the entire graph execution when a node fails.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event: The failure event
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None - signals abortion
|
||||||
|
"""
|
||||||
|
logger.error("Node %s failed with ABORT strategy: %s", event.node_id, event.error)
|
||||||
|
# Return None to signal that execution should stop
|
||||||
|
|
||||||
|
def _handle_retry(self, event: NodeRunFailedEvent, retry_count: int):
|
||||||
|
"""
|
||||||
|
Handle error by retrying the node.
|
||||||
|
|
||||||
|
This strategy re-attempts node execution up to a configured
|
||||||
|
maximum number of retries with configurable intervals.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event: The failure event
|
||||||
|
retry_count: Current retry attempt count
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
NodeRunRetryEvent if retry should occur, None otherwise
|
||||||
|
"""
|
||||||
|
node = self._graph.nodes[event.node_id]
|
||||||
|
|
||||||
|
# Check if we've exceeded max retries
|
||||||
|
if not node.retry or retry_count >= node.retry_config.max_retries:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Wait for retry interval
|
||||||
|
time.sleep(node.retry_config.retry_interval_seconds)
|
||||||
|
|
||||||
|
# Create retry event
|
||||||
|
return NodeRunRetryEvent(
|
||||||
|
id=event.id,
|
||||||
|
node_title=node.title,
|
||||||
|
node_id=event.node_id,
|
||||||
|
node_type=event.node_type,
|
||||||
|
node_run_result=event.node_run_result,
|
||||||
|
start_at=event.start_at,
|
||||||
|
error=event.error,
|
||||||
|
retry_index=retry_count + 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _handle_fail_branch(self, event: NodeRunFailedEvent):
|
||||||
|
"""
|
||||||
|
Handle error by taking the fail branch.
|
||||||
|
|
||||||
|
This strategy converts failures to exceptions and routes execution
|
||||||
|
through a designated fail-branch edge.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event: The failure event
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
NodeRunExceptionEvent to continue via fail branch
|
||||||
|
"""
|
||||||
|
outputs = {
|
||||||
|
"error_message": event.node_run_result.error,
|
||||||
|
"error_type": event.node_run_result.error_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
return NodeRunExceptionEvent(
|
||||||
|
id=event.id,
|
||||||
|
node_id=event.node_id,
|
||||||
|
node_type=event.node_type,
|
||||||
|
start_at=event.start_at,
|
||||||
|
node_run_result=NodeRunResult(
|
||||||
|
status=WorkflowNodeExecutionStatus.EXCEPTION,
|
||||||
|
inputs=event.node_run_result.inputs,
|
||||||
|
process_data=event.node_run_result.process_data,
|
||||||
|
outputs=outputs,
|
||||||
|
edge_source_handle="fail-branch",
|
||||||
|
metadata={
|
||||||
|
WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: ErrorStrategyEnum.FAIL_BRANCH,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
error=event.error,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _handle_default_value(self, event: NodeRunFailedEvent):
|
||||||
|
"""
|
||||||
|
Handle error by using default values.
|
||||||
|
|
||||||
|
This strategy allows nodes to fail gracefully by providing
|
||||||
|
predefined default output values.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event: The failure event
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
NodeRunExceptionEvent with default values
|
||||||
|
"""
|
||||||
|
node = self._graph.nodes[event.node_id]
|
||||||
|
|
||||||
|
outputs = {
|
||||||
|
**node.default_value_dict,
|
||||||
|
"error_message": event.node_run_result.error,
|
||||||
|
"error_type": event.node_run_result.error_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
return NodeRunExceptionEvent(
|
||||||
|
id=event.id,
|
||||||
|
node_id=event.node_id,
|
||||||
|
node_type=event.node_type,
|
||||||
|
start_at=event.start_at,
|
||||||
|
node_run_result=NodeRunResult(
|
||||||
|
status=WorkflowNodeExecutionStatus.EXCEPTION,
|
||||||
|
inputs=event.node_run_result.inputs,
|
||||||
|
process_data=event.node_run_result.process_data,
|
||||||
|
outputs=outputs,
|
||||||
|
metadata={
|
||||||
|
WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: ErrorStrategyEnum.DEFAULT_VALUE,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
error=event.error,
|
||||||
|
)
|
||||||
|
|||||||
@ -1,57 +0,0 @@
|
|||||||
"""
|
|
||||||
Fail branch error strategy implementation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import final
|
|
||||||
|
|
||||||
from core.workflow.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
|
|
||||||
from core.workflow.graph import Graph
|
|
||||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunExceptionEvent, NodeRunFailedEvent
|
|
||||||
from core.workflow.node_events import NodeRunResult
|
|
||||||
|
|
||||||
|
|
||||||
@final
|
|
||||||
class FailBranchStrategy:
|
|
||||||
"""
|
|
||||||
Error strategy that continues execution via a fail branch.
|
|
||||||
|
|
||||||
This strategy converts failures to exceptions and routes execution
|
|
||||||
through a designated fail-branch edge.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def handle_error(self, event: NodeRunFailedEvent, graph: Graph, retry_count: int) -> GraphNodeEventBase | None:
|
|
||||||
"""
|
|
||||||
Handle error by taking the fail branch.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
event: The failure event
|
|
||||||
graph: The workflow graph
|
|
||||||
retry_count: Current retry attempt count (unused)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
NodeRunExceptionEvent to continue via fail branch
|
|
||||||
"""
|
|
||||||
_ = graph
|
|
||||||
_ = retry_count
|
|
||||||
outputs = {
|
|
||||||
"error_message": event.node_run_result.error,
|
|
||||||
"error_type": event.node_run_result.error_type,
|
|
||||||
}
|
|
||||||
|
|
||||||
return NodeRunExceptionEvent(
|
|
||||||
id=event.id,
|
|
||||||
node_id=event.node_id,
|
|
||||||
node_type=event.node_type,
|
|
||||||
start_at=event.start_at,
|
|
||||||
node_run_result=NodeRunResult(
|
|
||||||
status=WorkflowNodeExecutionStatus.EXCEPTION,
|
|
||||||
inputs=event.node_run_result.inputs,
|
|
||||||
process_data=event.node_run_result.process_data,
|
|
||||||
outputs=outputs,
|
|
||||||
edge_source_handle="fail-branch",
|
|
||||||
metadata={
|
|
||||||
WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: ErrorStrategy.FAIL_BRANCH,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
error=event.error,
|
|
||||||
)
|
|
||||||
@ -1,52 +0,0 @@
|
|||||||
"""
|
|
||||||
Retry error strategy implementation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
from typing import final
|
|
||||||
|
|
||||||
from core.workflow.graph import Graph
|
|
||||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent, NodeRunRetryEvent
|
|
||||||
|
|
||||||
|
|
||||||
@final
|
|
||||||
class RetryStrategy:
|
|
||||||
"""
|
|
||||||
Error strategy that retries failed nodes.
|
|
||||||
|
|
||||||
This strategy re-attempts node execution up to a configured
|
|
||||||
maximum number of retries with configurable intervals.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def handle_error(self, event: NodeRunFailedEvent, graph: Graph, retry_count: int) -> GraphNodeEventBase | None:
|
|
||||||
"""
|
|
||||||
Handle error by retrying the node.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
event: The failure event
|
|
||||||
graph: The workflow graph
|
|
||||||
retry_count: Current retry attempt count
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
NodeRunRetryEvent if retry should occur, None otherwise
|
|
||||||
"""
|
|
||||||
node = graph.nodes[event.node_id]
|
|
||||||
|
|
||||||
# Check if we've exceeded max retries
|
|
||||||
if not node.retry or retry_count >= node.retry_config.max_retries:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Wait for retry interval
|
|
||||||
time.sleep(node.retry_config.retry_interval_seconds)
|
|
||||||
|
|
||||||
# Create retry event
|
|
||||||
return NodeRunRetryEvent(
|
|
||||||
id=event.id,
|
|
||||||
node_title=node.title,
|
|
||||||
node_id=event.node_id,
|
|
||||||
node_type=event.node_type,
|
|
||||||
node_run_result=event.node_run_result,
|
|
||||||
start_at=event.start_at,
|
|
||||||
error=event.error,
|
|
||||||
retry_index=retry_count + 1,
|
|
||||||
)
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
"""
|
|
||||||
Base error strategy protocol.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Protocol
|
|
||||||
|
|
||||||
from core.workflow.graph import Graph
|
|
||||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent
|
|
||||||
|
|
||||||
|
|
||||||
class ErrorStrategy(Protocol):
|
|
||||||
"""
|
|
||||||
Protocol for error handling strategies.
|
|
||||||
|
|
||||||
Each strategy implements a different approach to handling
|
|
||||||
node execution failures.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def handle_error(self, event: NodeRunFailedEvent, graph: Graph, retry_count: int) -> GraphNodeEventBase | None:
|
|
||||||
"""
|
|
||||||
Handle a node failure event.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
event: The failure event
|
|
||||||
graph: The workflow graph
|
|
||||||
retry_count: Current retry attempt count
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Optional new event to process, or None to stop
|
|
||||||
"""
|
|
||||||
...
|
|
||||||
Loading…
Reference in New Issue
Block a user