feat(telemetry): add enterprise OTEL telemetry with gateway, traces, metrics, and logs

This commit is contained in:
GareArc 2026-02-06 01:02:19 -08:00
parent 576eca2113
commit 4e3112bd7f
No known key found for this signature in database
4 changed files with 45 additions and 11 deletions

View File

@ -122,6 +122,7 @@ These commands assume you start from the repository root.
```bash
cd api
# Note: enterprise_telemetry queue is only used in Enterprise Edition
uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,enterprise_telemetry
```

View File

@ -25,6 +25,7 @@ from core.model_runtime.entities.llm_entities import LLMResult
from core.model_runtime.entities.message_entities import PromptMessage, SystemPromptMessage, UserPromptMessage
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
from core.ops.entities.trace_entity import OperationType
from core.ops.utils import measure_time
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
from core.telemetry import TelemetryContext, TelemetryEvent, TraceTaskName
@ -215,7 +216,7 @@ class LLMGenerator:
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type="rule_generate",
operation_type=OperationType.RULE_GENERATE,
instruction=instruction,
generated_output=generated_output,
llm_result=llm_result,
@ -272,7 +273,7 @@ class LLMGenerator:
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type="rule_generate",
operation_type=OperationType.RULE_GENERATE,
instruction=instruction,
generated_output="",
llm_result=llm_result,
@ -338,7 +339,7 @@ class LLMGenerator:
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type="rule_generate",
operation_type=OperationType.RULE_GENERATE,
instruction=instruction,
generated_output=str(generated_output) if generated_output else "",
llm_result=llm_result,
@ -409,7 +410,7 @@ class LLMGenerator:
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type="code_generate",
operation_type=OperationType.CODE_GENERATE,
instruction=instruction,
generated_output=result.get("code", ""),
llm_result=llm_result,
@ -504,7 +505,7 @@ class LLMGenerator:
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type="structured_output",
operation_type=OperationType.STRUCTURED_OUTPUT,
instruction=instruction,
generated_output=result.get("output", ""),
llm_result=llm_result,
@ -736,7 +737,7 @@ class LLMGenerator:
tenant_id=tenant_id,
user_id=user_id,
app_id=app_id,
operation_type="instruction_modify",
operation_type=OperationType.INSTRUCTION_MODIFY,
instruction=instruction,
generated_output=generated_output,
llm_result=llm_result,
@ -753,7 +754,7 @@ class LLMGenerator:
tenant_id: str,
user_id: str,
app_id: str | None,
operation_type: str,
operation_type: OperationType,
instruction: str,
generated_output: str,
llm_result: LLMResult | None,

View File

@ -211,6 +211,22 @@ trace_info_info_map = {
}
class OperationType(StrEnum):
"""Operation type for token metric labels.
Used as a metric attribute on ``dify.tokens.input`` / ``dify.tokens.output``
counters so consumers can break down token usage by operation.
"""
WORKFLOW = "workflow"
NODE_EXECUTION = "node_execution"
MESSAGE = "message"
RULE_GENERATE = "rule_generate"
CODE_GENERATE = "code_generate"
STRUCTURED_OUTPUT = "structured_output"
INSTRUCTION_MODIFY = "instruction_modify"
class TraceTaskName(StrEnum):
CONVERSATION_TRACE = "conversation"
WORKFLOW_TRACE = "workflow"

View File

@ -23,6 +23,7 @@ from core.ops.entities.trace_entity import (
GenerateNameTraceInfo,
MessageTraceInfo,
ModerationTraceInfo,
OperationType,
PromptGenerationTraceInfo,
SuggestedQuestionTraceInfo,
ToolTraceInfo,
@ -216,11 +217,17 @@ class EnterpriseOtelTrace:
tenant_id=tenant_id or "",
app_id=app_id or "",
)
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, labels)
token_labels = self._labels(
**labels,
operation_type=OperationType.WORKFLOW,
)
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
if info.prompt_tokens is not None and info.prompt_tokens > 0:
self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, labels)
self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels)
if info.completion_tokens is not None and info.completion_tokens > 0:
self._exporter.increment_counter(EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, labels)
self._exporter.increment_counter(
EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels
)
invoke_from = metadata.get("triggered_from", "")
self._exporter.increment_counter(
EnterpriseTelemetryCounter.REQUESTS,
@ -365,6 +372,7 @@ class EnterpriseOtelTrace:
token_labels = self._labels(
**labels,
model_name=info.model_name or "",
operation_type=OperationType.NODE_EXECUTION,
)
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
if info.prompt_tokens is not None and info.prompt_tokens > 0:
@ -454,7 +462,15 @@ class EnterpriseOtelTrace:
model_provider=metadata.get("ls_provider", ""),
model_name=metadata.get("ls_model_name", ""),
)
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, labels)
token_labels = self._labels(
**labels,
operation_type=OperationType.MESSAGE,
)
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
if info.message_tokens > 0:
self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.message_tokens, token_labels)
if info.answer_tokens > 0:
self._exporter.increment_counter(EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.answer_tokens, token_labels)
invoke_from = metadata.get("from_source", "")
self._exporter.increment_counter(
EnterpriseTelemetryCounter.REQUESTS,