feat(api): Implement EventManager error logging and add coverage (#29204)

- Ensure `EventManager._notify_layers` logs exceptions instead of silently swallowing them 
  so GraphEngine layer failures surface for debugging
- Introduce unit tests to assert the logger captures the runtime error when collecting events
- Enable the `S110` lint rule to catch `try-except-pass` patterns
- Add proper error logging for existing `try-except-pass` blocks.
This commit is contained in:
QuantumGhost 2025-12-08 09:40:40 +08:00 committed by GitHub
parent a25faa334a
commit 91667e3c1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 103 additions and 33 deletions

View File

@ -36,17 +36,20 @@ select = [
"UP", # pyupgrade rules "UP", # pyupgrade rules
"W191", # tab-indentation "W191", # tab-indentation
"W605", # invalid-escape-sequence "W605", # invalid-escape-sequence
"G001", # don't use str format to logging messages
"G003", # don't use + in logging messages
"G004", # don't use f-strings to format logging messages
"UP042", # use StrEnum,
"S110", # disallow the try-except-pass pattern.
# security related linting rules # security related linting rules
# RCE proctection (sort of) # RCE proctection (sort of)
"S102", # exec-builtin, disallow use of `exec` "S102", # exec-builtin, disallow use of `exec`
"S307", # suspicious-eval-usage, disallow use of `eval` and `ast.literal_eval` "S307", # suspicious-eval-usage, disallow use of `eval` and `ast.literal_eval`
"S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers. "S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers.
"S302", # suspicious-marshal-usage, disallow use of `marshal` module "S302", # suspicious-marshal-usage, disallow use of `marshal` module
"S311", # suspicious-non-cryptographic-random-usage "S311", # suspicious-non-cryptographic-random-usage,
"G001", # don't use str format to logging messages
"G003", # don't use + in logging messages
"G004", # don't use f-strings to format logging messages
"UP042", # use StrEnum
] ]
ignore = [ ignore = [
@ -91,18 +94,16 @@ ignore = [
"configs/*" = [ "configs/*" = [
"N802", # invalid-function-name "N802", # invalid-function-name
] ]
"core/model_runtime/callbacks/base_callback.py" = [ "core/model_runtime/callbacks/base_callback.py" = ["T201"]
"T201", "core/workflow/callbacks/workflow_logging_callback.py" = ["T201"]
]
"core/workflow/callbacks/workflow_logging_callback.py" = [
"T201",
]
"libs/gmpy2_pkcs10aep_cipher.py" = [ "libs/gmpy2_pkcs10aep_cipher.py" = [
"N803", # invalid-argument-name "N803", # invalid-argument-name
] ]
"tests/*" = [ "tests/*" = [
"F811", # redefined-while-unused "F811", # redefined-while-unused
"T201", # allow print in tests "T201", # allow print in tests,
"S110", # allow ignoring exceptions in tests code (currently)
] ]
[lint.pyflakes] [lint.pyflakes]

View File

@ -1,3 +1,4 @@
import logging
import time import time
from collections.abc import Callable from collections.abc import Callable
from datetime import timedelta from datetime import timedelta
@ -28,6 +29,8 @@ P = ParamSpec("P")
R = TypeVar("R") R = TypeVar("R")
T = TypeVar("T") T = TypeVar("T")
logger = logging.getLogger(__name__)
class WhereisUserArg(StrEnum): class WhereisUserArg(StrEnum):
""" """
@ -238,8 +241,8 @@ def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None):
# Basic check: UUIDs are 36 chars with hyphens # Basic check: UUIDs are 36 chars with hyphens
if len(str_id) == 36 and str_id.count("-") == 4: if len(str_id) == 36 and str_id.count("-") == 4:
dataset_id = str_id dataset_id = str_id
except: except Exception:
pass logger.exception("Failed to parse dataset_id from class method args")
elif len(args) > 0: elif len(args) > 0:
# Not a class method, check if args[0] looks like a UUID # Not a class method, check if args[0] looks like a UUID
potential_id = args[0] potential_id = args[0]
@ -247,8 +250,8 @@ def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None):
str_id = str(potential_id) str_id = str(potential_id)
if len(str_id) == 36 and str_id.count("-") == 4: if len(str_id) == 36 and str_id.count("-") == 4:
dataset_id = str_id dataset_id = str_id
except: except Exception:
pass logger.exception("Failed to parse dataset_id from positional args")
# Validate dataset if dataset_id is provided # Validate dataset if dataset_id is provided
if dataset_id: if dataset_id:

View File

@ -1,4 +1,5 @@
import json import json
import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections.abc import Generator, Mapping, Sequence from collections.abc import Generator, Mapping, Sequence
from typing import Any from typing import Any
@ -23,6 +24,8 @@ from core.tools.entities.tool_entities import ToolInvokeMeta
from core.tools.tool_engine import ToolEngine from core.tools.tool_engine import ToolEngine
from models.model import Message from models.model import Message
logger = logging.getLogger(__name__)
class CotAgentRunner(BaseAgentRunner, ABC): class CotAgentRunner(BaseAgentRunner, ABC):
_is_first_iteration = True _is_first_iteration = True
@ -400,8 +403,8 @@ class CotAgentRunner(BaseAgentRunner, ABC):
action_input=json.loads(message.tool_calls[0].function.arguments), action_input=json.loads(message.tool_calls[0].function.arguments),
) )
current_scratchpad.action_str = json.dumps(current_scratchpad.action.to_dict()) current_scratchpad.action_str = json.dumps(current_scratchpad.action.to_dict())
except: except Exception:
pass logger.exception("Failed to parse tool call from assistant message")
elif isinstance(message, ToolPromptMessage): elif isinstance(message, ToolPromptMessage):
if current_scratchpad: if current_scratchpad:
assert isinstance(message.content, str) assert isinstance(message.content, str)

View File

@ -253,7 +253,7 @@ class ProviderConfiguration(BaseModel):
try: try:
credentials[key] = encrypter.decrypt_token(tenant_id=self.tenant_id, token=credentials[key]) credentials[key] = encrypter.decrypt_token(tenant_id=self.tenant_id, token=credentials[key])
except Exception: except Exception:
pass logger.exception("Failed to decrypt credential secret variable %s", key)
return self.obfuscated_credentials( return self.obfuscated_credentials(
credentials=credentials, credentials=credentials,
@ -765,7 +765,7 @@ class ProviderConfiguration(BaseModel):
try: try:
credentials[key] = encrypter.decrypt_token(tenant_id=self.tenant_id, token=credentials[key]) credentials[key] = encrypter.decrypt_token(tenant_id=self.tenant_id, token=credentials[key])
except Exception: except Exception:
pass logger.exception("Failed to decrypt model credential secret variable %s", key)
current_credential_id = credential_record.id current_credential_id = credential_record.id
current_credential_name = credential_record.credential_name current_credential_name = credential_record.credential_name

View File

@ -1,3 +1,4 @@
import logging
from collections.abc import Sequence from collections.abc import Sequence
import httpx import httpx
@ -8,6 +9,7 @@ from core.helper.download import download_with_size_limit
from core.plugin.entities.marketplace import MarketplacePluginDeclaration from core.plugin.entities.marketplace import MarketplacePluginDeclaration
marketplace_api_url = URL(str(dify_config.MARKETPLACE_API_URL)) marketplace_api_url = URL(str(dify_config.MARKETPLACE_API_URL))
logger = logging.getLogger(__name__)
def get_plugin_pkg_url(plugin_unique_identifier: str) -> str: def get_plugin_pkg_url(plugin_unique_identifier: str) -> str:
@ -55,7 +57,9 @@ def batch_fetch_plugin_manifests_ignore_deserialization_error(
try: try:
result.append(MarketplacePluginDeclaration.model_validate(plugin)) result.append(MarketplacePluginDeclaration.model_validate(plugin))
except Exception: except Exception:
pass logger.exception(
"Failed to deserialize marketplace plugin manifest for %s", plugin.get("plugin_id", "unknown")
)
return result return result

View File

@ -521,4 +521,4 @@ class TencentDataTrace(BaseTraceInstance):
if hasattr(self, "trace_client"): if hasattr(self, "trace_client"):
self.trace_client.shutdown() self.trace_client.shutdown()
except Exception: except Exception:
pass logger.exception("[Tencent APM] Failed to shutdown trace client during cleanup")

View File

@ -723,7 +723,7 @@ class ToolManager:
) )
except Exception: except Exception:
# app has been deleted # app has been deleted
pass logger.exception("Failed to transform workflow provider %s to controller", workflow_provider.id)
labels = ToolLabelManager.get_tools_labels( labels = ToolLabelManager.get_tools_labels(
[cast(ToolProviderController, controller) for controller in workflow_provider_controllers] [cast(ToolProviderController, controller) for controller in workflow_provider_controllers]

View File

@ -2,6 +2,7 @@
Unified event manager for collecting and emitting events. Unified event manager for collecting and emitting events.
""" """
import logging
import threading import threading
import time import time
from collections.abc import Generator from collections.abc import Generator
@ -12,6 +13,8 @@ from core.workflow.graph_events import GraphEngineEvent
from ..layers.base import GraphEngineLayer from ..layers.base import GraphEngineLayer
_logger = logging.getLogger(__name__)
@final @final
class ReadWriteLock: class ReadWriteLock:
@ -180,5 +183,4 @@ class EventManager:
try: try:
layer.on_event(event) layer.on_event(event)
except Exception: except Exception:
# Silently ignore layer errors during collection _logger.exception("Error in layer on_event, layer_type=%s", type(layer))
pass

View File

@ -6,12 +6,15 @@ using the new Redis command channel, without requiring user permission checks.
Supports stop, pause, and resume operations. Supports stop, pause, and resume operations.
""" """
import logging
from typing import final from typing import final
from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel
from core.workflow.graph_engine.entities.commands import AbortCommand, GraphEngineCommand, PauseCommand from core.workflow.graph_engine.entities.commands import AbortCommand, GraphEngineCommand, PauseCommand
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
logger = logging.getLogger(__name__)
@final @final
class GraphEngineManager: class GraphEngineManager:
@ -57,4 +60,4 @@ class GraphEngineManager:
except Exception: except Exception:
# Silently fail if Redis is unavailable # Silently fail if Redis is unavailable
# The legacy control mechanisms will still work # The legacy control mechanisms will still work
pass logger.exception("Failed to send graph engine command %s for task %s", command.__class__.__name__, task_id)

View File

@ -1,9 +1,13 @@
import logging
from core.tools.tool_manager import ToolManager from core.tools.tool_manager import ToolManager
from core.tools.utils.configuration import ToolParameterConfigurationManager from core.tools.utils.configuration import ToolParameterConfigurationManager
from core.workflow.nodes import NodeType from core.workflow.nodes import NodeType
from core.workflow.nodes.tool.entities import ToolEntity from core.workflow.nodes.tool.entities import ToolEntity
from events.app_event import app_draft_workflow_was_synced from events.app_event import app_draft_workflow_was_synced
logger = logging.getLogger(__name__)
@app_draft_workflow_was_synced.connect @app_draft_workflow_was_synced.connect
def handle(sender, **kwargs): def handle(sender, **kwargs):
@ -30,6 +34,10 @@ def handle(sender, **kwargs):
identity_id=f"WORKFLOW.{app.id}.{node_data.get('id')}", identity_id=f"WORKFLOW.{app.id}.{node_data.get('id')}",
) )
manager.delete_tool_parameters_cache() manager.delete_tool_parameters_cache()
except: except Exception:
# tool dose not exist # tool dose not exist
pass logger.exception(
"Failed to delete tool parameters cache for workflow %s node %s",
app.id,
node_data.get("id"),
)

View File

@ -199,9 +199,9 @@ class FileLifecycleManager:
# Temporarily create basic metadata information # Temporarily create basic metadata information
except ValueError: except ValueError:
continue continue
except: except Exception:
# If cannot scan version files, only return current version # If cannot scan version files, only return current version
pass logger.exception("Failed to scan version files for %s", filename)
return sorted(versions, key=lambda x: x.version or 0, reverse=True) return sorted(versions, key=lambda x: x.version or 0, reverse=True)

View File

@ -211,7 +211,7 @@ class AppService:
# override tool parameters # override tool parameters
tool["tool_parameters"] = masked_parameter tool["tool_parameters"] = masked_parameter
except Exception: except Exception:
pass logger.exception("Failed to mask agent tool parameters for tool %s", agent_tool_entity.tool_name)
# override agent mode # override agent mode
if model_config: if model_config:

View File

@ -1,4 +1,5 @@
import json import json
import logging
from collections.abc import Mapping from collections.abc import Mapping
from datetime import datetime from datetime import datetime
from typing import Any from typing import Any
@ -19,6 +20,8 @@ from models.tools import WorkflowToolProvider
from models.workflow import Workflow from models.workflow import Workflow
from services.tools.tools_transform_service import ToolTransformService from services.tools.tools_transform_service import ToolTransformService
logger = logging.getLogger(__name__)
class WorkflowToolManageService: class WorkflowToolManageService:
""" """
@ -198,7 +201,7 @@ class WorkflowToolManageService:
tools.append(ToolTransformService.workflow_provider_to_controller(provider)) tools.append(ToolTransformService.workflow_provider_to_controller(provider))
except Exception: except Exception:
# skip deleted tools # skip deleted tools
pass logger.exception("Failed to load workflow tool provider %s", provider.id)
labels = ToolLabelManager.get_tools_labels([t for t in tools if isinstance(t, ToolProviderController)]) labels = ToolLabelManager.get_tools_labels([t for t in tools if isinstance(t, ToolProviderController)])

View File

@ -1,4 +1,5 @@
import json import json
import logging
import operator import operator
import typing import typing
@ -12,6 +13,8 @@ from core.plugin.impl.plugin import PluginInstaller
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.account import TenantPluginAutoUpgradeStrategy from models.account import TenantPluginAutoUpgradeStrategy
logger = logging.getLogger(__name__)
RETRY_TIMES_OF_ONE_PLUGIN_IN_ONE_TENANT = 3 RETRY_TIMES_OF_ONE_PLUGIN_IN_ONE_TENANT = 3
CACHE_REDIS_KEY_PREFIX = "plugin_autoupgrade_check_task:cached_plugin_manifests:" CACHE_REDIS_KEY_PREFIX = "plugin_autoupgrade_check_task:cached_plugin_manifests:"
CACHE_REDIS_TTL = 60 * 15 # 15 minutes CACHE_REDIS_TTL = 60 * 15 # 15 minutes
@ -42,6 +45,7 @@ def _get_cached_manifest(plugin_id: str) -> typing.Union[MarketplacePluginDeclar
return MarketplacePluginDeclaration.model_validate(cached_json) return MarketplacePluginDeclaration.model_validate(cached_json)
except Exception: except Exception:
logger.exception("Failed to get cached manifest for plugin %s", plugin_id)
return False return False
@ -63,7 +67,7 @@ def _set_cached_manifest(plugin_id: str, manifest: typing.Union[MarketplacePlugi
except Exception: except Exception:
# If Redis fails, continue without caching # If Redis fails, continue without caching
# traceback.print_exc() # traceback.print_exc()
pass logger.exception("Failed to set cached manifest for plugin %s", plugin_id)
def marketplace_batch_fetch_plugin_manifests( def marketplace_batch_fetch_plugin_manifests(

View File

@ -0,0 +1,39 @@
"""Tests for the EventManager."""
from __future__ import annotations
import logging
from core.workflow.graph_engine.event_management.event_manager import EventManager
from core.workflow.graph_engine.layers.base import GraphEngineLayer
from core.workflow.graph_events import GraphEngineEvent
class _FaultyLayer(GraphEngineLayer):
"""Layer that raises from on_event to test error handling."""
def on_graph_start(self) -> None: # pragma: no cover - not used in tests
pass
def on_event(self, event: GraphEngineEvent) -> None:
raise RuntimeError("boom")
def on_graph_end(self, error: Exception | None) -> None: # pragma: no cover - not used in tests
pass
def test_event_manager_logs_layer_errors(caplog) -> None:
"""Ensure errors raised by layers are logged when collecting events."""
event_manager = EventManager()
event_manager.set_layers([_FaultyLayer()])
with caplog.at_level(logging.ERROR):
event_manager.collect(GraphEngineEvent())
error_logs = [record for record in caplog.records if "Error in layer on_event" in record.getMessage()]
assert error_logs, "Expected layer errors to be logged"
log_record = error_logs[0]
assert log_record.exc_info is not None
assert isinstance(log_record.exc_info[1], RuntimeError)