feat(api): Implement EventManager error logging and add coverage (#29204)

- Ensure `EventManager._notify_layers` logs exceptions instead of silently swallowing them 
  so GraphEngine layer failures surface for debugging
- Introduce unit tests to assert the logger captures the runtime error when collecting events
- Enable the `S110` lint rule to catch `try-except-pass` patterns
- Add proper error logging for existing `try-except-pass` blocks.
This commit is contained in:
QuantumGhost 2025-12-08 09:40:40 +08:00 committed by GitHub
parent a25faa334a
commit 91667e3c1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 103 additions and 33 deletions

View File

@ -36,17 +36,20 @@ select = [
"UP", # pyupgrade rules
"W191", # tab-indentation
"W605", # invalid-escape-sequence
"G001", # don't use str format to logging messages
"G003", # don't use + in logging messages
"G004", # don't use f-strings to format logging messages
"UP042", # use StrEnum,
"S110", # disallow the try-except-pass pattern.
# security related linting rules
# RCE proctection (sort of)
"S102", # exec-builtin, disallow use of `exec`
"S307", # suspicious-eval-usage, disallow use of `eval` and `ast.literal_eval`
"S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers.
"S302", # suspicious-marshal-usage, disallow use of `marshal` module
"S311", # suspicious-non-cryptographic-random-usage
"G001", # don't use str format to logging messages
"G003", # don't use + in logging messages
"G004", # don't use f-strings to format logging messages
"UP042", # use StrEnum
"S311", # suspicious-non-cryptographic-random-usage,
]
ignore = [
@ -91,18 +94,16 @@ ignore = [
"configs/*" = [
"N802", # invalid-function-name
]
"core/model_runtime/callbacks/base_callback.py" = [
"T201",
]
"core/workflow/callbacks/workflow_logging_callback.py" = [
"T201",
]
"core/model_runtime/callbacks/base_callback.py" = ["T201"]
"core/workflow/callbacks/workflow_logging_callback.py" = ["T201"]
"libs/gmpy2_pkcs10aep_cipher.py" = [
"N803", # invalid-argument-name
]
"tests/*" = [
"F811", # redefined-while-unused
"T201", # allow print in tests
"T201", # allow print in tests,
"S110", # allow ignoring exceptions in tests code (currently)
]
[lint.pyflakes]

View File

@ -1,3 +1,4 @@
import logging
import time
from collections.abc import Callable
from datetime import timedelta
@ -28,6 +29,8 @@ P = ParamSpec("P")
R = TypeVar("R")
T = TypeVar("T")
logger = logging.getLogger(__name__)
class WhereisUserArg(StrEnum):
"""
@ -238,8 +241,8 @@ def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None):
# Basic check: UUIDs are 36 chars with hyphens
if len(str_id) == 36 and str_id.count("-") == 4:
dataset_id = str_id
except:
pass
except Exception:
logger.exception("Failed to parse dataset_id from class method args")
elif len(args) > 0:
# Not a class method, check if args[0] looks like a UUID
potential_id = args[0]
@ -247,8 +250,8 @@ def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None):
str_id = str(potential_id)
if len(str_id) == 36 and str_id.count("-") == 4:
dataset_id = str_id
except:
pass
except Exception:
logger.exception("Failed to parse dataset_id from positional args")
# Validate dataset if dataset_id is provided
if dataset_id:

View File

@ -1,4 +1,5 @@
import json
import logging
from abc import ABC, abstractmethod
from collections.abc import Generator, Mapping, Sequence
from typing import Any
@ -23,6 +24,8 @@ from core.tools.entities.tool_entities import ToolInvokeMeta
from core.tools.tool_engine import ToolEngine
from models.model import Message
logger = logging.getLogger(__name__)
class CotAgentRunner(BaseAgentRunner, ABC):
_is_first_iteration = True
@ -400,8 +403,8 @@ class CotAgentRunner(BaseAgentRunner, ABC):
action_input=json.loads(message.tool_calls[0].function.arguments),
)
current_scratchpad.action_str = json.dumps(current_scratchpad.action.to_dict())
except:
pass
except Exception:
logger.exception("Failed to parse tool call from assistant message")
elif isinstance(message, ToolPromptMessage):
if current_scratchpad:
assert isinstance(message.content, str)

View File

@ -253,7 +253,7 @@ class ProviderConfiguration(BaseModel):
try:
credentials[key] = encrypter.decrypt_token(tenant_id=self.tenant_id, token=credentials[key])
except Exception:
pass
logger.exception("Failed to decrypt credential secret variable %s", key)
return self.obfuscated_credentials(
credentials=credentials,
@ -765,7 +765,7 @@ class ProviderConfiguration(BaseModel):
try:
credentials[key] = encrypter.decrypt_token(tenant_id=self.tenant_id, token=credentials[key])
except Exception:
pass
logger.exception("Failed to decrypt model credential secret variable %s", key)
current_credential_id = credential_record.id
current_credential_name = credential_record.credential_name

View File

@ -1,3 +1,4 @@
import logging
from collections.abc import Sequence
import httpx
@ -8,6 +9,7 @@ from core.helper.download import download_with_size_limit
from core.plugin.entities.marketplace import MarketplacePluginDeclaration
marketplace_api_url = URL(str(dify_config.MARKETPLACE_API_URL))
logger = logging.getLogger(__name__)
def get_plugin_pkg_url(plugin_unique_identifier: str) -> str:
@ -55,7 +57,9 @@ def batch_fetch_plugin_manifests_ignore_deserialization_error(
try:
result.append(MarketplacePluginDeclaration.model_validate(plugin))
except Exception:
pass
logger.exception(
"Failed to deserialize marketplace plugin manifest for %s", plugin.get("plugin_id", "unknown")
)
return result

View File

@ -521,4 +521,4 @@ class TencentDataTrace(BaseTraceInstance):
if hasattr(self, "trace_client"):
self.trace_client.shutdown()
except Exception:
pass
logger.exception("[Tencent APM] Failed to shutdown trace client during cleanup")

View File

@ -723,7 +723,7 @@ class ToolManager:
)
except Exception:
# app has been deleted
pass
logger.exception("Failed to transform workflow provider %s to controller", workflow_provider.id)
labels = ToolLabelManager.get_tools_labels(
[cast(ToolProviderController, controller) for controller in workflow_provider_controllers]

View File

@ -2,6 +2,7 @@
Unified event manager for collecting and emitting events.
"""
import logging
import threading
import time
from collections.abc import Generator
@ -12,6 +13,8 @@ from core.workflow.graph_events import GraphEngineEvent
from ..layers.base import GraphEngineLayer
_logger = logging.getLogger(__name__)
@final
class ReadWriteLock:
@ -180,5 +183,4 @@ class EventManager:
try:
layer.on_event(event)
except Exception:
# Silently ignore layer errors during collection
pass
_logger.exception("Error in layer on_event, layer_type=%s", type(layer))

View File

@ -6,12 +6,15 @@ using the new Redis command channel, without requiring user permission checks.
Supports stop, pause, and resume operations.
"""
import logging
from typing import final
from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel
from core.workflow.graph_engine.entities.commands import AbortCommand, GraphEngineCommand, PauseCommand
from extensions.ext_redis import redis_client
logger = logging.getLogger(__name__)
@final
class GraphEngineManager:
@ -57,4 +60,4 @@ class GraphEngineManager:
except Exception:
# Silently fail if Redis is unavailable
# The legacy control mechanisms will still work
pass
logger.exception("Failed to send graph engine command %s for task %s", command.__class__.__name__, task_id)

View File

@ -1,9 +1,13 @@
import logging
from core.tools.tool_manager import ToolManager
from core.tools.utils.configuration import ToolParameterConfigurationManager
from core.workflow.nodes import NodeType
from core.workflow.nodes.tool.entities import ToolEntity
from events.app_event import app_draft_workflow_was_synced
logger = logging.getLogger(__name__)
@app_draft_workflow_was_synced.connect
def handle(sender, **kwargs):
@ -30,6 +34,10 @@ def handle(sender, **kwargs):
identity_id=f"WORKFLOW.{app.id}.{node_data.get('id')}",
)
manager.delete_tool_parameters_cache()
except:
except Exception:
# tool dose not exist
pass
logger.exception(
"Failed to delete tool parameters cache for workflow %s node %s",
app.id,
node_data.get("id"),
)

View File

@ -199,9 +199,9 @@ class FileLifecycleManager:
# Temporarily create basic metadata information
except ValueError:
continue
except:
except Exception:
# If cannot scan version files, only return current version
pass
logger.exception("Failed to scan version files for %s", filename)
return sorted(versions, key=lambda x: x.version or 0, reverse=True)

View File

@ -211,7 +211,7 @@ class AppService:
# override tool parameters
tool["tool_parameters"] = masked_parameter
except Exception:
pass
logger.exception("Failed to mask agent tool parameters for tool %s", agent_tool_entity.tool_name)
# override agent mode
if model_config:

View File

@ -1,4 +1,5 @@
import json
import logging
from collections.abc import Mapping
from datetime import datetime
from typing import Any
@ -19,6 +20,8 @@ from models.tools import WorkflowToolProvider
from models.workflow import Workflow
from services.tools.tools_transform_service import ToolTransformService
logger = logging.getLogger(__name__)
class WorkflowToolManageService:
"""
@ -198,7 +201,7 @@ class WorkflowToolManageService:
tools.append(ToolTransformService.workflow_provider_to_controller(provider))
except Exception:
# skip deleted tools
pass
logger.exception("Failed to load workflow tool provider %s", provider.id)
labels = ToolLabelManager.get_tools_labels([t for t in tools if isinstance(t, ToolProviderController)])

View File

@ -1,4 +1,5 @@
import json
import logging
import operator
import typing
@ -12,6 +13,8 @@ from core.plugin.impl.plugin import PluginInstaller
from extensions.ext_redis import redis_client
from models.account import TenantPluginAutoUpgradeStrategy
logger = logging.getLogger(__name__)
RETRY_TIMES_OF_ONE_PLUGIN_IN_ONE_TENANT = 3
CACHE_REDIS_KEY_PREFIX = "plugin_autoupgrade_check_task:cached_plugin_manifests:"
CACHE_REDIS_TTL = 60 * 15 # 15 minutes
@ -42,6 +45,7 @@ def _get_cached_manifest(plugin_id: str) -> typing.Union[MarketplacePluginDeclar
return MarketplacePluginDeclaration.model_validate(cached_json)
except Exception:
logger.exception("Failed to get cached manifest for plugin %s", plugin_id)
return False
@ -63,7 +67,7 @@ def _set_cached_manifest(plugin_id: str, manifest: typing.Union[MarketplacePlugi
except Exception:
# If Redis fails, continue without caching
# traceback.print_exc()
pass
logger.exception("Failed to set cached manifest for plugin %s", plugin_id)
def marketplace_batch_fetch_plugin_manifests(

View File

@ -0,0 +1,39 @@
"""Tests for the EventManager."""
from __future__ import annotations
import logging
from core.workflow.graph_engine.event_management.event_manager import EventManager
from core.workflow.graph_engine.layers.base import GraphEngineLayer
from core.workflow.graph_events import GraphEngineEvent
class _FaultyLayer(GraphEngineLayer):
"""Layer that raises from on_event to test error handling."""
def on_graph_start(self) -> None: # pragma: no cover - not used in tests
pass
def on_event(self, event: GraphEngineEvent) -> None:
raise RuntimeError("boom")
def on_graph_end(self, error: Exception | None) -> None: # pragma: no cover - not used in tests
pass
def test_event_manager_logs_layer_errors(caplog) -> None:
"""Ensure errors raised by layers are logged when collecting events."""
event_manager = EventManager()
event_manager.set_layers([_FaultyLayer()])
with caplog.at_level(logging.ERROR):
event_manager.collect(GraphEngineEvent())
error_logs = [record for record in caplog.records if "Error in layer on_event" in record.getMessage()]
assert error_logs, "Expected layer errors to be logged"
log_record = error_logs[0]
assert log_record.exc_info is not None
assert isinstance(log_record.exc_info[1], RuntimeError)