From 4e3112bd7fa888db2a9458b59fb34e5ff255a935 Mon Sep 17 00:00:00 2001 From: GareArc Date: Fri, 6 Feb 2026 01:02:19 -0800 Subject: [PATCH] feat(telemetry): add enterprise OTEL telemetry with gateway, traces, metrics, and logs --- api/README.md | 1 + api/core/llm_generator/llm_generator.py | 15 ++++++------ api/core/ops/entities/trace_entity.py | 16 +++++++++++++ api/enterprise/telemetry/enterprise_trace.py | 24 ++++++++++++++++---- 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/api/README.md b/api/README.md index 9871d2c311..9f247da8f0 100644 --- a/api/README.md +++ b/api/README.md @@ -122,6 +122,7 @@ These commands assume you start from the repository root. ```bash cd api + # Note: enterprise_telemetry queue is only used in Enterprise Edition uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,enterprise_telemetry ``` diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index 4279d44fc0..c6edd83fc8 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -25,6 +25,7 @@ from core.model_runtime.entities.llm_entities import LLMResult from core.model_runtime.entities.message_entities import PromptMessage, SystemPromptMessage, UserPromptMessage from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError +from core.ops.entities.trace_entity import OperationType from core.ops.utils import measure_time from core.prompt.utils.prompt_template_parser import PromptTemplateParser from core.telemetry import TelemetryContext, TelemetryEvent, TraceTaskName @@ -215,7 +216,7 @@ class LLMGenerator: tenant_id=tenant_id, user_id=user_id, app_id=app_id, - operation_type="rule_generate", + operation_type=OperationType.RULE_GENERATE, instruction=instruction, generated_output=generated_output, llm_result=llm_result, @@ -272,7 +273,7 @@ class LLMGenerator: tenant_id=tenant_id, user_id=user_id, app_id=app_id, - operation_type="rule_generate", + operation_type=OperationType.RULE_GENERATE, instruction=instruction, generated_output="", llm_result=llm_result, @@ -338,7 +339,7 @@ class LLMGenerator: tenant_id=tenant_id, user_id=user_id, app_id=app_id, - operation_type="rule_generate", + operation_type=OperationType.RULE_GENERATE, instruction=instruction, generated_output=str(generated_output) if generated_output else "", llm_result=llm_result, @@ -409,7 +410,7 @@ class LLMGenerator: tenant_id=tenant_id, user_id=user_id, app_id=app_id, - operation_type="code_generate", + operation_type=OperationType.CODE_GENERATE, instruction=instruction, generated_output=result.get("code", ""), llm_result=llm_result, @@ -504,7 +505,7 @@ class LLMGenerator: tenant_id=tenant_id, user_id=user_id, app_id=app_id, - operation_type="structured_output", + operation_type=OperationType.STRUCTURED_OUTPUT, instruction=instruction, generated_output=result.get("output", ""), llm_result=llm_result, @@ -736,7 +737,7 @@ class LLMGenerator: tenant_id=tenant_id, user_id=user_id, app_id=app_id, - operation_type="instruction_modify", + operation_type=OperationType.INSTRUCTION_MODIFY, instruction=instruction, generated_output=generated_output, llm_result=llm_result, @@ -753,7 +754,7 @@ class LLMGenerator: tenant_id: str, user_id: str, app_id: str | None, - operation_type: str, + operation_type: OperationType, instruction: str, generated_output: str, llm_result: LLMResult | None, diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py index 5c878281a6..2eb745aeb9 100644 --- a/api/core/ops/entities/trace_entity.py +++ b/api/core/ops/entities/trace_entity.py @@ -211,6 +211,22 @@ trace_info_info_map = { } +class OperationType(StrEnum): + """Operation type for token metric labels. + + Used as a metric attribute on ``dify.tokens.input`` / ``dify.tokens.output`` + counters so consumers can break down token usage by operation. + """ + + WORKFLOW = "workflow" + NODE_EXECUTION = "node_execution" + MESSAGE = "message" + RULE_GENERATE = "rule_generate" + CODE_GENERATE = "code_generate" + STRUCTURED_OUTPUT = "structured_output" + INSTRUCTION_MODIFY = "instruction_modify" + + class TraceTaskName(StrEnum): CONVERSATION_TRACE = "conversation" WORKFLOW_TRACE = "workflow" diff --git a/api/enterprise/telemetry/enterprise_trace.py b/api/enterprise/telemetry/enterprise_trace.py index 2b0c9ade7c..5db6215552 100644 --- a/api/enterprise/telemetry/enterprise_trace.py +++ b/api/enterprise/telemetry/enterprise_trace.py @@ -23,6 +23,7 @@ from core.ops.entities.trace_entity import ( GenerateNameTraceInfo, MessageTraceInfo, ModerationTraceInfo, + OperationType, PromptGenerationTraceInfo, SuggestedQuestionTraceInfo, ToolTraceInfo, @@ -216,11 +217,17 @@ class EnterpriseOtelTrace: tenant_id=tenant_id or "", app_id=app_id or "", ) - self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, labels) + token_labels = self._labels( + **labels, + operation_type=OperationType.WORKFLOW, + ) + self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) if info.prompt_tokens is not None and info.prompt_tokens > 0: - self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, labels) + self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels) if info.completion_tokens is not None and info.completion_tokens > 0: - self._exporter.increment_counter(EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, labels) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels + ) invoke_from = metadata.get("triggered_from", "") self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS, @@ -365,6 +372,7 @@ class EnterpriseOtelTrace: token_labels = self._labels( **labels, model_name=info.model_name or "", + operation_type=OperationType.NODE_EXECUTION, ) self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) if info.prompt_tokens is not None and info.prompt_tokens > 0: @@ -454,7 +462,15 @@ class EnterpriseOtelTrace: model_provider=metadata.get("ls_provider", ""), model_name=metadata.get("ls_model_name", ""), ) - self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, labels) + token_labels = self._labels( + **labels, + operation_type=OperationType.MESSAGE, + ) + self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) + if info.message_tokens > 0: + self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.message_tokens, token_labels) + if info.answer_tokens > 0: + self._exporter.increment_counter(EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.answer_tokens, token_labels) invoke_from = metadata.get("from_source", "") self._exporter.increment_counter( EnterpriseTelemetryCounter.REQUESTS,