"""Enterprise trace handler — duck-typed, NOT a BaseTraceInstance subclass. Invoked directly in the Celery task, not through OpsTraceManager dispatch. Only requires a matching ``trace(trace_info)`` method signature. Signal strategy: - **Traces (spans)**: workflow run, node execution, draft node execution only. - **Metrics + structured logs**: all other event types. """ from __future__ import annotations import json import logging from typing import Any from core.ops.entities.trace_entity import ( BaseTraceInfo, DatasetRetrievalTraceInfo, DraftNodeExecutionTrace, GenerateNameTraceInfo, MessageTraceInfo, ModerationTraceInfo, SuggestedQuestionTraceInfo, ToolTraceInfo, WorkflowNodeTraceInfo, WorkflowTraceInfo, ) from enterprise.telemetry.entities import ( EnterpriseTelemetryCounter, EnterpriseTelemetryHistogram, EnterpriseTelemetrySpan, ) from enterprise.telemetry.telemetry_log import emit_metric_only_event, emit_telemetry_log logger = logging.getLogger(__name__) class EnterpriseOtelTrace: """Duck-typed enterprise trace handler. ``*_trace`` methods emit spans (workflow/node only) or structured logs (all other events), plus metrics at 100 % accuracy. """ def __init__(self) -> None: from extensions.ext_enterprise_telemetry import get_enterprise_exporter exporter = get_enterprise_exporter() if exporter is None: raise RuntimeError("EnterpriseOtelTrace instantiated but exporter is not initialized") self._exporter = exporter def trace(self, trace_info: BaseTraceInfo) -> None: if isinstance(trace_info, WorkflowTraceInfo): self._workflow_trace(trace_info) elif isinstance(trace_info, MessageTraceInfo): self._message_trace(trace_info) elif isinstance(trace_info, ToolTraceInfo): self._tool_trace(trace_info) elif isinstance(trace_info, DraftNodeExecutionTrace): self._draft_node_execution_trace(trace_info) elif isinstance(trace_info, WorkflowNodeTraceInfo): self._node_execution_trace(trace_info) elif isinstance(trace_info, ModerationTraceInfo): self._moderation_trace(trace_info) elif isinstance(trace_info, SuggestedQuestionTraceInfo): self._suggested_question_trace(trace_info) elif isinstance(trace_info, DatasetRetrievalTraceInfo): self._dataset_retrieval_trace(trace_info) elif isinstance(trace_info, GenerateNameTraceInfo): self._generate_name_trace(trace_info) def _common_attrs(self, trace_info: BaseTraceInfo) -> dict[str, Any]: return { "dify.trace_id": trace_info.trace_id, "dify.tenant_id": trace_info.metadata.get("tenant_id"), "dify.app_id": trace_info.metadata.get("app_id"), "dify.app.name": trace_info.metadata.get("app_name"), "dify.workspace.name": trace_info.metadata.get("workspace_name"), "gen_ai.user.id": trace_info.metadata.get("user_id"), "dify.message.id": trace_info.message_id, } def _maybe_json(self, value: Any) -> str | None: if value is None: return None if isinstance(value, str): return value try: return json.dumps(value, default=str) except (TypeError, ValueError): return str(value) # ------------------------------------------------------------------ # SPAN-emitting handlers (workflow, node execution, draft node) # ------------------------------------------------------------------ def _workflow_trace(self, info: WorkflowTraceInfo) -> None: # -- Slim span attrs: identity + structure + status + timing only -- span_attrs: dict[str, Any] = { "dify.trace_id": info.trace_id, "dify.tenant_id": info.metadata.get("tenant_id"), "dify.app_id": info.metadata.get("app_id"), "dify.workflow.id": info.workflow_id, "dify.workflow.run_id": info.workflow_run_id, "dify.workflow.status": info.workflow_run_status, "dify.workflow.error": info.error, "dify.workflow.elapsed_time": info.workflow_run_elapsed_time, "dify.invoke_from": info.metadata.get("triggered_from"), "dify.conversation.id": info.conversation_id, "dify.message.id": info.message_id, } trace_correlation_override: str | None = None parent_span_id_source: str | None = None parent_ctx = info.metadata.get("parent_trace_context") if parent_ctx and isinstance(parent_ctx, dict): span_attrs["dify.parent.trace_id"] = parent_ctx.get("trace_id") span_attrs["dify.parent.node.execution_id"] = parent_ctx.get("parent_node_execution_id") span_attrs["dify.parent.workflow.run_id"] = parent_ctx.get("parent_workflow_run_id") span_attrs["dify.parent.app.id"] = parent_ctx.get("parent_app_id") trace_correlation_override = parent_ctx.get("parent_workflow_run_id") parent_span_id_source = parent_ctx.get("parent_node_execution_id") self._exporter.export_span( EnterpriseTelemetrySpan.WORKFLOW_RUN, span_attrs, correlation_id=info.workflow_run_id, span_id_source=info.workflow_run_id, start_time=info.start_time, end_time=info.end_time, trace_correlation_override=trace_correlation_override, parent_span_id_source=parent_span_id_source, ) # -- Companion log: ALL attrs (span + detail) for full picture -- log_attrs: dict[str, Any] = {**span_attrs} log_attrs.update( { "dify.app.name": info.metadata.get("app_name"), "dify.workspace.name": info.metadata.get("workspace_name"), "gen_ai.user.id": info.metadata.get("user_id"), "gen_ai.usage.total_tokens": info.total_tokens, "dify.workflow.version": info.workflow_run_version, } ) if self._exporter.include_content: log_attrs["dify.workflow.inputs"] = self._maybe_json(info.workflow_run_inputs) log_attrs["dify.workflow.outputs"] = self._maybe_json(info.workflow_run_outputs) log_attrs["dify.workflow.query"] = info.query else: ref = f"ref:workflow_run_id={info.workflow_run_id}" log_attrs["dify.workflow.inputs"] = ref log_attrs["dify.workflow.outputs"] = ref log_attrs["dify.workflow.query"] = ref emit_telemetry_log( event_name="dify.workflow.run", attributes=log_attrs, signal="span_detail", trace_id_source=info.workflow_run_id, span_id_source=info.workflow_run_id, tenant_id=info.metadata.get("tenant_id"), user_id=info.metadata.get("user_id"), ) # -- Metrics -- labels = { "tenant_id": info.tenant_id, "app_id": info.metadata.get("app_id", ""), } self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, labels) invoke_from = info.metadata.get("triggered_from", "") self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "workflow", "status": info.workflow_run_status, "invoke_from": invoke_from}, ) self._exporter.record_histogram( EnterpriseTelemetryHistogram.WORKFLOW_DURATION, float(info.workflow_run_elapsed_time), {**labels, "status": info.workflow_run_status}, ) if info.error: self._exporter.increment_counter(EnterpriseTelemetryCounter.ERRORS, 1, {**labels, "type": "workflow"}) def _node_execution_trace(self, info: WorkflowNodeTraceInfo) -> None: self._emit_node_execution_trace(info, EnterpriseTelemetrySpan.NODE_EXECUTION, "node") def _draft_node_execution_trace(self, info: DraftNodeExecutionTrace) -> None: self._emit_node_execution_trace( info, EnterpriseTelemetrySpan.DRAFT_NODE_EXECUTION, "draft_node", correlation_id_override=info.node_execution_id, trace_correlation_override_param=info.workflow_run_id, ) def _emit_node_execution_trace( self, info: WorkflowNodeTraceInfo, span_name: EnterpriseTelemetrySpan, request_type: str, correlation_id_override: str | None = None, trace_correlation_override_param: str | None = None, ) -> None: # -- Slim span attrs: identity + structure + status + timing -- span_attrs: dict[str, Any] = { "dify.trace_id": info.trace_id, "dify.tenant_id": info.tenant_id, "dify.app_id": info.metadata.get("app_id"), "dify.workflow.id": info.workflow_id, "dify.workflow.run_id": info.workflow_run_id, "dify.message.id": info.message_id, "dify.conversation.id": info.metadata.get("conversation_id"), "dify.node.execution_id": info.node_execution_id, "dify.node.id": info.node_id, "dify.node.type": info.node_type, "dify.node.title": info.title, "dify.node.status": info.status, "dify.node.error": info.error, "dify.node.elapsed_time": info.elapsed_time, "dify.node.index": info.index, "dify.node.predecessor_node_id": info.predecessor_node_id, "dify.node.iteration_id": info.iteration_id, "dify.node.loop_id": info.loop_id, "dify.node.parallel_id": info.parallel_id, } trace_correlation_override = trace_correlation_override_param parent_ctx = info.metadata.get("parent_trace_context") if parent_ctx and isinstance(parent_ctx, dict): trace_correlation_override = parent_ctx.get("parent_workflow_run_id") or trace_correlation_override effective_correlation_id = correlation_id_override or info.workflow_run_id self._exporter.export_span( span_name, span_attrs, correlation_id=effective_correlation_id, span_id_source=info.node_execution_id, start_time=info.start_time, end_time=info.end_time, trace_correlation_override=trace_correlation_override, ) # -- Companion log: ALL attrs (span + detail) -- log_attrs: dict[str, Any] = {**span_attrs} log_attrs.update( { "dify.app.name": info.metadata.get("app_name"), "dify.workspace.name": info.metadata.get("workspace_name"), "dify.invoke_from": info.metadata.get("invoke_from"), "gen_ai.user.id": info.metadata.get("user_id"), "gen_ai.usage.total_tokens": info.total_tokens, "dify.node.total_price": info.total_price, "dify.node.currency": info.currency, "gen_ai.provider.name": info.model_provider, "gen_ai.request.model": info.model_name, "gen_ai.usage.input_tokens": info.prompt_tokens, "gen_ai.usage.output_tokens": info.completion_tokens, "gen_ai.tool.name": info.tool_name, "dify.node.iteration_index": info.iteration_index, "dify.node.loop_index": info.loop_index, "dify.plugin.name": info.metadata.get("plugin_name"), "dify.credential.name": info.metadata.get("credential_name"), "dify.dataset.ids": self._maybe_json(info.metadata.get("dataset_ids")), "dify.dataset.names": self._maybe_json(info.metadata.get("dataset_names")), } ) if self._exporter.include_content: log_attrs["dify.node.inputs"] = self._maybe_json(info.node_inputs) log_attrs["dify.node.outputs"] = self._maybe_json(info.node_outputs) log_attrs["dify.node.process_data"] = self._maybe_json(info.process_data) else: ref = f"ref:node_execution_id={info.node_execution_id}" log_attrs["dify.node.inputs"] = ref log_attrs["dify.node.outputs"] = ref log_attrs["dify.node.process_data"] = ref emit_telemetry_log( event_name=span_name.value, attributes=log_attrs, signal="span_detail", trace_id_source=info.workflow_run_id, span_id_source=info.node_execution_id, tenant_id=info.tenant_id, user_id=info.metadata.get("user_id"), ) # -- Metrics -- labels = { "tenant_id": info.tenant_id, "app_id": info.metadata.get("app_id", ""), "node_type": info.node_type, "model_provider": info.model_provider or "", } if info.total_tokens: token_labels = {**labels, "model_name": info.model_name or ""} self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": request_type, "status": info.status} ) duration_labels = dict(labels) plugin_name = info.metadata.get("plugin_name") if plugin_name and info.node_type in {"tool", "knowledge-retrieval"}: duration_labels["plugin_name"] = plugin_name self._exporter.record_histogram(EnterpriseTelemetryHistogram.NODE_DURATION, info.elapsed_time, duration_labels) if info.error: self._exporter.increment_counter(EnterpriseTelemetryCounter.ERRORS, 1, {**labels, "type": request_type}) # ------------------------------------------------------------------ # METRIC-ONLY handlers (structured log + counters/histograms) # ------------------------------------------------------------------ def _message_trace(self, info: MessageTraceInfo) -> None: attrs = self._common_attrs(info) attrs.update( { "dify.invoke_from": info.metadata.get("from_source"), "dify.conversation.id": info.metadata.get("conversation_id"), "dify.conversation.mode": info.conversation_mode, "gen_ai.provider.name": info.metadata.get("ls_provider"), "gen_ai.request.model": info.metadata.get("ls_model_name"), "gen_ai.usage.input_tokens": info.message_tokens, "gen_ai.usage.output_tokens": info.answer_tokens, "gen_ai.usage.total_tokens": info.total_tokens, "dify.message.status": info.metadata.get("status"), "dify.message.error": info.error, "dify.message.from_source": info.metadata.get("from_source"), "dify.message.from_end_user_id": info.metadata.get("from_end_user_id"), "dify.message.from_account_id": info.metadata.get("from_account_id"), "dify.streaming": info.is_streaming_request, "dify.message.time_to_first_token": info.gen_ai_server_time_to_first_token, "dify.message.streaming_duration": info.llm_streaming_time_to_generate, "dify.workflow.run_id": info.metadata.get("workflow_run_id"), } ) if self._exporter.include_content: attrs["dify.message.inputs"] = self._maybe_json(info.inputs) attrs["dify.message.outputs"] = self._maybe_json(info.outputs) else: ref = f"ref:message_id={info.message_id}" attrs["dify.message.inputs"] = ref attrs["dify.message.outputs"] = ref emit_metric_only_event( event_name="dify.message.run", attributes=attrs, trace_id_source=info.metadata.get("workflow_run_id") or str(info.message_id) if info.message_id else None, tenant_id=info.metadata.get("tenant_id"), user_id=info.metadata.get("user_id"), ) labels = { "tenant_id": info.metadata.get("tenant_id", ""), "app_id": info.metadata.get("app_id", ""), "model_provider": info.metadata.get("ls_provider", ""), "model_name": info.metadata.get("ls_model_name", ""), } self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, labels) invoke_from = info.metadata.get("from_source", "") self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "message", "status": info.metadata.get("status", ""), "invoke_from": invoke_from}, ) if info.start_time and info.end_time: duration = (info.end_time - info.start_time).total_seconds() self._exporter.record_histogram(EnterpriseTelemetryHistogram.MESSAGE_DURATION, duration, labels) if info.gen_ai_server_time_to_first_token is not None: self._exporter.record_histogram( EnterpriseTelemetryHistogram.MESSAGE_TTFT, info.gen_ai_server_time_to_first_token, labels ) if info.error: self._exporter.increment_counter(EnterpriseTelemetryCounter.ERRORS, 1, {**labels, "type": "message"}) def _tool_trace(self, info: ToolTraceInfo) -> None: attrs = self._common_attrs(info) attrs.update( { "gen_ai.tool.name": info.tool_name, "dify.tool.time_cost": info.time_cost, "dify.tool.error": info.error, } ) if self._exporter.include_content: attrs["dify.tool.inputs"] = self._maybe_json(info.tool_inputs) attrs["dify.tool.outputs"] = info.tool_outputs attrs["dify.tool.parameters"] = self._maybe_json(info.tool_parameters) attrs["dify.tool.config"] = self._maybe_json(info.tool_config) else: ref = f"ref:message_id={info.message_id}" attrs["dify.tool.inputs"] = ref attrs["dify.tool.outputs"] = ref attrs["dify.tool.parameters"] = ref attrs["dify.tool.config"] = ref emit_metric_only_event( event_name="dify.tool.execution", attributes=attrs, tenant_id=info.metadata.get("tenant_id"), user_id=info.metadata.get("user_id"), ) labels = { "tenant_id": info.metadata.get("tenant_id", ""), "app_id": info.metadata.get("app_id", ""), "tool_name": info.tool_name, } self._exporter.increment_counter(EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "tool"}) self._exporter.record_histogram(EnterpriseTelemetryHistogram.TOOL_DURATION, float(info.time_cost), labels) if info.error: self._exporter.increment_counter(EnterpriseTelemetryCounter.ERRORS, 1, {**labels, "type": "tool"}) def _moderation_trace(self, info: ModerationTraceInfo) -> None: attrs = self._common_attrs(info) attrs.update( { "dify.moderation.flagged": info.flagged, "dify.moderation.action": info.action, "dify.moderation.preset_response": info.preset_response, } ) if self._exporter.include_content: attrs["dify.moderation.query"] = info.query else: attrs["dify.moderation.query"] = f"ref:message_id={info.message_id}" emit_metric_only_event( event_name="dify.moderation.check", attributes=attrs, tenant_id=info.metadata.get("tenant_id"), user_id=info.metadata.get("user_id"), ) labels = {"tenant_id": info.metadata.get("tenant_id", ""), "app_id": info.metadata.get("app_id", "")} self._exporter.increment_counter(EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "moderation"}) def _suggested_question_trace(self, info: SuggestedQuestionTraceInfo) -> None: attrs = self._common_attrs(info) attrs.update( { "gen_ai.usage.total_tokens": info.total_tokens, "dify.suggested_question.status": info.status, "dify.suggested_question.error": info.error, "gen_ai.provider.name": info.model_provider, "gen_ai.request.model": info.model_id, "dify.suggested_question.count": len(info.suggested_question), } ) if self._exporter.include_content: attrs["dify.suggested_question.questions"] = self._maybe_json(info.suggested_question) else: attrs["dify.suggested_question.questions"] = f"ref:message_id={info.message_id}" emit_metric_only_event( event_name="dify.suggested_question.generation", attributes=attrs, tenant_id=info.metadata.get("tenant_id"), user_id=info.metadata.get("user_id"), ) labels = {"tenant_id": info.metadata.get("tenant_id", ""), "app_id": info.metadata.get("app_id", "")} self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "suggested_question"} ) def _dataset_retrieval_trace(self, info: DatasetRetrievalTraceInfo) -> None: attrs = self._common_attrs(info) attrs["dify.dataset.error"] = info.error docs = info.documents or [] dataset_ids: list[str] = [] dataset_names: list[str] = [] structured_docs: list[dict] = [] for doc in docs: meta = doc.get("metadata", {}) if isinstance(doc, dict) else {} did = meta.get("dataset_id") dname = meta.get("dataset_name") if did and did not in dataset_ids: dataset_ids.append(did) if dname and dname not in dataset_names: dataset_names.append(dname) structured_docs.append( { "dataset_id": did, "document_id": meta.get("document_id"), "segment_id": meta.get("segment_id"), "score": meta.get("score"), } ) attrs["dify.dataset.ids"] = self._maybe_json(dataset_ids) attrs["dify.dataset.names"] = self._maybe_json(dataset_names) attrs["dify.retrieval.document_count"] = len(docs) embedding_models = info.metadata.get("embedding_models") or {} if isinstance(embedding_models, dict): providers: list[str] = [] models: list[str] = [] for ds_info in embedding_models.values(): if isinstance(ds_info, dict): p = ds_info.get("embedding_model_provider", "") m = ds_info.get("embedding_model", "") if p and p not in providers: providers.append(p) if m and m not in models: models.append(m) attrs["dify.dataset.embedding_providers"] = self._maybe_json(providers) attrs["dify.dataset.embedding_models"] = self._maybe_json(models) if self._exporter.include_content: attrs["dify.retrieval.query"] = self._maybe_json(info.inputs) attrs["dify.dataset.documents"] = self._maybe_json(structured_docs) else: ref = f"ref:message_id={info.message_id}" attrs["dify.retrieval.query"] = ref attrs["dify.dataset.documents"] = ref emit_metric_only_event( event_name="dify.dataset.retrieval", attributes=attrs, tenant_id=info.metadata.get("tenant_id"), user_id=info.metadata.get("user_id"), ) labels = {"tenant_id": info.metadata.get("tenant_id", ""), "app_id": info.metadata.get("app_id", "")} self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "dataset_retrieval"} ) for did in dataset_ids: self._exporter.increment_counter( EnterpriseTelemetryCounter.DATASET_RETRIEVALS, 1, {**labels, "dataset_id": did} ) def _generate_name_trace(self, info: GenerateNameTraceInfo) -> None: attrs = self._common_attrs(info) attrs["dify.conversation.id"] = info.conversation_id if self._exporter.include_content: attrs["dify.generate_name.inputs"] = self._maybe_json(info.inputs) attrs["dify.generate_name.outputs"] = self._maybe_json(info.outputs) else: ref = f"ref:conversation_id={info.conversation_id}" attrs["dify.generate_name.inputs"] = ref attrs["dify.generate_name.outputs"] = ref emit_metric_only_event( event_name="dify.generate_name.execution", attributes=attrs, tenant_id=info.tenant_id, user_id=info.metadata.get("user_id"), ) labels = {"tenant_id": info.tenant_id, "app_id": info.metadata.get("app_id", "")} self._exporter.increment_counter(EnterpriseTelemetryCounter.REQUESTS, 1, {**labels, "type": "generate_name"})