dify/api/core/ops/entities/trace_entity.py
GareArc 83f5850d0a
refactor(telemetry): add resolved_parent_context property and fix edge cases
- Add resolved_parent_context property to BaseTraceInfo for reusable parent context extraction
- Refactor enterprise_trace.py to use property instead of duplicated dict plucking (~19 lines eliminated)
- Fix UUID validation in exporter.py with specific error logging for invalid trace correlation IDs
- Add error isolation in event_handlers.py to prevent telemetry failures from breaking user operations
- Replace pickle-based payload_fallback with JSON storage rehydration for security
- Update TelemetryEnvelope to use Pydantic v2 ConfigDict with extra='forbid'
- Update tests to reflect contract changes and new error handling behavior
2026-03-01 19:33:59 -08:00

285 lines
8.1 KiB
Python

from collections.abc import Mapping
from datetime import datetime
from enum import StrEnum
from typing import Any, Union
from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
class BaseTraceInfo(BaseModel):
message_id: str | None = None
message_data: Any | None = None
inputs: Union[str, dict[str, Any], list[Any]] | None = None
outputs: Union[str, dict[str, Any], list[Any]] | None = None
start_time: datetime | None = None
end_time: datetime | None = None
metadata: dict[str, Any]
trace_id: str | None = None
@field_validator("inputs", "outputs")
@classmethod
def ensure_type(cls, v: str | dict[str, Any] | list[Any] | None) -> str | dict[str, Any] | list[Any] | None:
if v is None:
return None
if isinstance(v, str | dict | list):
return v
return ""
model_config = ConfigDict(protected_namespaces=())
@property
def resolved_trace_id(self) -> str | None:
"""Get trace_id with intelligent fallback.
Priority:
1. External trace_id (from X-Trace-Id header)
2. workflow_run_id (if this trace type has it)
3. message_id (as final fallback)
"""
if self.trace_id:
return self.trace_id
# Try workflow_run_id (only exists on workflow-related traces)
workflow_run_id = getattr(self, "workflow_run_id", None)
if workflow_run_id:
return workflow_run_id
# Final fallback to message_id
return str(self.message_id) if self.message_id else None
@property
def resolved_parent_context(self) -> tuple[str | None, str | None]:
"""Resolve cross-workflow parent linking from metadata.
Extracts typed parent IDs from the untyped ``parent_trace_context``
metadata dict (set by tool_node when invoking nested workflows).
Returns:
(trace_correlation_override, parent_span_id_source) where
trace_correlation_override is the outer workflow_run_id and
parent_span_id_source is the outer node_execution_id.
"""
parent_ctx = self.metadata.get("parent_trace_context")
if not isinstance(parent_ctx, dict):
return None, None
trace_override = parent_ctx.get("parent_workflow_run_id")
parent_span = parent_ctx.get("parent_node_execution_id")
return (
trace_override if isinstance(trace_override, str) else None,
parent_span if isinstance(parent_span, str) else None,
)
@field_serializer("start_time", "end_time")
def serialize_datetime(self, dt: datetime | None) -> str | None:
if dt is None:
return None
return dt.isoformat()
class WorkflowTraceInfo(BaseTraceInfo):
workflow_data: Any = None
conversation_id: str | None = None
workflow_app_log_id: str | None = None
workflow_id: str
tenant_id: str
workflow_run_id: str
workflow_run_elapsed_time: Union[int, float]
workflow_run_status: str
workflow_run_inputs: Mapping[str, Any]
workflow_run_outputs: Mapping[str, Any]
workflow_run_version: str
error: str | None = None
total_tokens: int
prompt_tokens: int | None = None
completion_tokens: int | None = None
file_list: list[str]
query: str
metadata: dict[str, Any]
invoked_by: str | None = None
class MessageTraceInfo(BaseTraceInfo):
conversation_model: str
message_tokens: int
answer_tokens: int
total_tokens: int
error: str | None = None
file_list: Union[str, dict[str, Any], list[Any]] | None = None
message_file_data: Any | None = None
conversation_mode: str
gen_ai_server_time_to_first_token: float | None = None
llm_streaming_time_to_generate: float | None = None
is_streaming_request: bool = False
class ModerationTraceInfo(BaseTraceInfo):
flagged: bool
action: str
preset_response: str
query: str
class SuggestedQuestionTraceInfo(BaseTraceInfo):
total_tokens: int
status: str | None = None
error: str | None = None
from_account_id: str | None = None
agent_based: bool | None = None
from_source: str | None = None
model_provider: str | None = None
model_id: str | None = None
suggested_question: list[str]
level: str
status_message: str | None = None
workflow_run_id: str | None = None
model_config = ConfigDict(protected_namespaces=())
class DatasetRetrievalTraceInfo(BaseTraceInfo):
documents: Any = None
error: str | None = None
class ToolTraceInfo(BaseTraceInfo):
tool_name: str
tool_inputs: dict[str, Any]
tool_outputs: str
metadata: dict[str, Any]
message_file_data: Any = None
error: str | None = None
tool_config: dict[str, Any]
time_cost: Union[int, float]
tool_parameters: dict[str, Any]
file_url: Union[str, None, list[str]] = None
class GenerateNameTraceInfo(BaseTraceInfo):
conversation_id: str | None = None
tenant_id: str
class PromptGenerationTraceInfo(BaseTraceInfo):
"""Trace information for prompt generation operations (rule-generate, code-generate, etc.)."""
tenant_id: str
user_id: str
app_id: str | None = None
operation_type: str
instruction: str
prompt_tokens: int
completion_tokens: int
total_tokens: int
model_provider: str
model_name: str
latency: float
total_price: float | None = None
currency: str | None = None
error: str | None = None
model_config = ConfigDict(protected_namespaces=())
class WorkflowNodeTraceInfo(BaseTraceInfo):
workflow_id: str
workflow_run_id: str
tenant_id: str
node_execution_id: str
node_id: str
node_type: str
title: str
status: str
error: str | None = None
elapsed_time: float
index: int
predecessor_node_id: str | None = None
total_tokens: int = 0
total_price: float = 0.0
currency: str | None = None
model_provider: str | None = None
model_name: str | None = None
prompt_tokens: int | None = None
completion_tokens: int | None = None
tool_name: str | None = None
iteration_id: str | None = None
iteration_index: int | None = None
loop_id: str | None = None
loop_index: int | None = None
parallel_id: str | None = None
node_inputs: Mapping[str, Any] | None = None
node_outputs: Mapping[str, Any] | None = None
process_data: Mapping[str, Any] | None = None
invoked_by: str | None = None
model_config = ConfigDict(protected_namespaces=())
class DraftNodeExecutionTrace(WorkflowNodeTraceInfo):
pass
class TaskData(BaseModel):
app_id: str
trace_info_type: str
trace_info: Any = None
trace_info_info_map = {
"WorkflowTraceInfo": WorkflowTraceInfo,
"MessageTraceInfo": MessageTraceInfo,
"ModerationTraceInfo": ModerationTraceInfo,
"SuggestedQuestionTraceInfo": SuggestedQuestionTraceInfo,
"DatasetRetrievalTraceInfo": DatasetRetrievalTraceInfo,
"ToolTraceInfo": ToolTraceInfo,
"GenerateNameTraceInfo": GenerateNameTraceInfo,
"PromptGenerationTraceInfo": PromptGenerationTraceInfo,
"WorkflowNodeTraceInfo": WorkflowNodeTraceInfo,
"DraftNodeExecutionTrace": DraftNodeExecutionTrace,
}
class OperationType(StrEnum):
"""Operation type for token metric labels.
Used as a metric attribute on ``dify.tokens.input`` / ``dify.tokens.output``
counters so consumers can break down token usage by operation.
"""
WORKFLOW = "workflow"
NODE_EXECUTION = "node_execution"
MESSAGE = "message"
RULE_GENERATE = "rule_generate"
CODE_GENERATE = "code_generate"
STRUCTURED_OUTPUT = "structured_output"
INSTRUCTION_MODIFY = "instruction_modify"
class TraceTaskName(StrEnum):
CONVERSATION_TRACE = "conversation"
WORKFLOW_TRACE = "workflow"
DRAFT_NODE_EXECUTION_TRACE = "draft_node_execution"
MESSAGE_TRACE = "message"
MODERATION_TRACE = "moderation"
SUGGESTED_QUESTION_TRACE = "suggested_question"
DATASET_RETRIEVAL_TRACE = "dataset_retrieval"
TOOL_TRACE = "tool"
GENERATE_NAME_TRACE = "generate_conversation_name"
PROMPT_GENERATION_TRACE = "prompt_generation"
DATASOURCE_TRACE = "datasource"
NODE_EXECUTION_TRACE = "node_execution"