From 5e57f7359879f9dcc21bccc21f717187e2a437ed Mon Sep 17 00:00:00 2001 From: GareArc Date: Sat, 28 Feb 2026 00:02:21 -0800 Subject: [PATCH] feat(telemetry): add model provider and name tags to all trace metrics Add comprehensive model tracking across all OTEL metrics and logs: - Node execution metrics now include model_name for LLM operations - Suggested question metrics include model_provider and model_name - Dataset retrieval captures both embedding and rerank model info - Updated DATA_DICTIONARY.md with complete metric label documentation This enables granular cost tracking, performance analysis, and usage monitoring per model across all operation types. --- api/core/ops/ops_trace_manager.py | 13 ++++++++++ api/enterprise/telemetry/DATA_DICTIONARY.md | 18 ++++++++------ api/enterprise/telemetry/enterprise_trace.py | 25 ++++++++++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 3f7bc662fe..38beda77d0 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -945,6 +945,17 @@ class TraceTask: "embedding_model_provider": row[2] or "", } + # Extract rerank model info from retrieval_model kwargs + rerank_model_provider = "" + rerank_model_name = "" + if "retrieval_model" in kwargs: + retrieval_model = kwargs["retrieval_model"] + if isinstance(retrieval_model, dict): + reranking_model = retrieval_model.get("reranking_model") + if isinstance(reranking_model, dict): + rerank_model_provider = reranking_model.get("reranking_provider_name", "") + rerank_model_name = reranking_model.get("reranking_model_name", "") + metadata = { "message_id": message_id, "ls_provider": message_data.model_provider, @@ -961,6 +972,8 @@ class TraceTask: "app_name": app_name, "workspace_name": workspace_name, "embedding_models": embedding_models, + "rerank_model_provider": rerank_model_provider, + "rerank_model_name": rerank_model_name, } if node_execution_id := kwargs.get("node_execution_id"): metadata["node_execution_id"] = node_execution_id diff --git a/api/enterprise/telemetry/DATA_DICTIONARY.md b/api/enterprise/telemetry/DATA_DICTIONARY.md index fb8f35e693..c0d07d2550 100644 --- a/api/enterprise/telemetry/DATA_DICTIONARY.md +++ b/api/enterprise/telemetry/DATA_DICTIONARY.md @@ -91,12 +91,12 @@ All counters are cumulative and emitted at 100% accuracy. | `type` | Additional Labels | |--------|-------------------| | `workflow` | `tenant_id`, `app_id`, `status`, `invoke_from` | -| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `status` | -| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `status` | +| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `status` | +| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `status` | | `message` | `tenant_id`, `app_id`, `model_provider`, `model_name`, `status`, `invoke_from` | | `tool` | `tenant_id`, `app_id`, `tool_name` | | `moderation` | `tenant_id`, `app_id` | -| `suggested_question` | `tenant_id`, `app_id` | +| `suggested_question` | `tenant_id`, `app_id`, `model_provider`, `model_name` | | `dataset_retrieval` | `tenant_id`, `app_id` | | `generate_name` | `tenant_id`, `app_id` | | `prompt_generation` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `status` | @@ -112,8 +112,8 @@ All counters are cumulative and emitted at 100% accuracy. | `type` | Additional Labels | |--------|-------------------| | `workflow` | `tenant_id`, `app_id` | -| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider` | -| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider` | +| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name` | +| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name` | | `message` | `tenant_id`, `app_id`, `model_provider`, `model_name` | | `tool` | `tenant_id`, `app_id`, `tool_name` | | `prompt_generation` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name` | @@ -123,7 +123,7 @@ All counters are cumulative and emitted at 100% accuracy. | Metric | Unit | Labels | |--------|------|--------| | `dify.feedback.total` | `{feedback}` | `tenant_id`, `app_id`, `rating` | -| `dify.dataset.retrievals.total` | `{retrieval}` | `tenant_id`, `app_id`, `dataset_id` | +| `dify.dataset.retrievals.total` | `{retrieval}` | `tenant_id`, `app_id`, `dataset_id`, `embedding_model_provider`, `embedding_model`, `rerank_model_provider`, `rerank_model` | | `dify.app.created.total` | `{app}` | `tenant_id`, `app_id`, `mode` | | `dify.app.updated.total` | `{app}` | `tenant_id`, `app_id` | | `dify.app.deleted.total` | `{app}` | `tenant_id`, `app_id` | @@ -133,7 +133,7 @@ All counters are cumulative and emitted at 100% accuracy. | Metric | Unit | Labels | |--------|------|--------| | `dify.workflow.duration` | `s` | `tenant_id`, `app_id`, `status` | -| `dify.node.duration` | `s` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `plugin_name` | +| `dify.node.duration` | `s` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `plugin_name` | | `dify.message.duration` | `s` | `tenant_id`, `app_id`, `model_provider`, `model_name` | | `dify.message.time_to_first_token` | `s` | `tenant_id`, `app_id`, `model_provider`, `model_name` | | `dify.tool.duration` | `s` | `tenant_id`, `app_id`, `tool_name` | @@ -288,6 +288,10 @@ Logs without structural spans. Signal type: `metric_only` | `dify.message.id` | string | Message identifier | | `dify.dataset.id` | string | Dataset identifier | | `dify.dataset.name` | string | Dataset name | +| `dify.dataset.embedding_providers` | JSON array | Embedding model providers (one per dataset) | +| `dify.dataset.embedding_models` | JSON array | Embedding models (one per dataset) | +| `dify.retrieval.rerank_provider` | string | Rerank model provider | +| `dify.retrieval.rerank_model` | string | Rerank model name | | `dify.retrieval.query` | string | Search query (content-gated) | | `dify.retrieval.document_count` | int | Documents retrieved | | `dify.retrieval.duration` | float | Duration (seconds) | diff --git a/api/enterprise/telemetry/enterprise_trace.py b/api/enterprise/telemetry/enterprise_trace.py index a6893c7c88..214ab9cd29 100644 --- a/api/enterprise/telemetry/enterprise_trace.py +++ b/api/enterprise/telemetry/enterprise_trace.py @@ -419,9 +419,11 @@ class EnterpriseOtelTrace: **labels, type=request_type, status=info.status, + model_name=info.model_name or "", ), ) duration_labels = dict(labels) + duration_labels["model_name"] = info.model_name or "" plugin_name = metadata.get("plugin_name") if plugin_name and info.node_type in {"tool", "knowledge-retrieval"}: duration_labels["plugin_name"] = plugin_name @@ -434,6 +436,7 @@ class EnterpriseOtelTrace: self._labels( **labels, type=request_type, + model_name=info.model_name or "", ), ) @@ -674,6 +677,8 @@ class EnterpriseOtelTrace: self._labels( **labels, type="suggested_question", + model_provider=info.model_provider or "", + model_name=info.model_id or "", ), ) @@ -738,6 +743,13 @@ class EnterpriseOtelTrace: attrs["dify.dataset.embedding_providers"] = self._maybe_json(providers) attrs["dify.dataset.embedding_models"] = self._maybe_json(models) + # Add rerank model to logs + rerank_provider = metadata.get("rerank_model_provider", "") + rerank_model = metadata.get("rerank_model_name", "") + if rerank_provider or rerank_model: + attrs["dify.retrieval.rerank_provider"] = rerank_provider + attrs["dify.retrieval.rerank_model"] = rerank_model + ref = f"ref:message_id={info.message_id}" retrieval_inputs = self._safe_payload_value(info.inputs) attrs["dify.retrieval.query"] = self._content_or_ref(retrieval_inputs, ref) @@ -766,12 +778,25 @@ class EnterpriseOtelTrace: ) for did in dataset_ids: + # Get embedding model for this specific dataset + ds_embedding_info = embedding_models.get(did, {}) + embedding_provider = ds_embedding_info.get("embedding_model_provider", "") + embedding_model = ds_embedding_info.get("embedding_model", "") + + # Get rerank model (same for all datasets in this retrieval) + rerank_provider = metadata.get("rerank_model_provider", "") + rerank_model = metadata.get("rerank_model_name", "") + self._exporter.increment_counter( EnterpriseTelemetryCounter.DATASET_RETRIEVALS, 1, self._labels( **labels, dataset_id=did, + embedding_model_provider=embedding_provider, + embedding_model=embedding_model, + rerank_model_provider=rerank_provider, + rerank_model=rerank_model, ), )