mirror of
https://github.com/langgenius/dify.git
synced 2026-03-14 13:51:33 +08:00
fix(telemetry): fix zero-value message and workflow duration histograms
Workflow RT: replace float(info.workflow_run_elapsed_time) with (end_time - start_time).total_seconds() using workflow_run.created_at and workflow_run.finished_at. The elapsed_time DB field defaults to 0 and can be stale if the workflow_storage Celery task has not committed yet when the trace fires. Wall-clock timestamps are more reliable; elapsed_time is kept as fallback. Message RT: change end_time from created_at + provider_response_latency to message.updated_at when updated_at > created_at. The pipeline explicitly sets message.updated_at = naive_utc_now() at the moment the LLM response is complete, making it the canonical response-complete timestamp. Falls back to the latency-based calculation for error/aborted messages.
This commit is contained in:
parent
9b5b355a4e
commit
fe741140d5
@ -904,7 +904,7 @@ class TraceTask:
|
||||
outputs=message_data.answer,
|
||||
file_list=file_list,
|
||||
start_time=created_at,
|
||||
end_time=created_at + timedelta(seconds=message_data.provider_response_latency),
|
||||
end_time=message_data.updated_at if message_data.updated_at and message_data.updated_at > created_at else created_at + timedelta(seconds=message_data.provider_response_latency),
|
||||
metadata=metadata,
|
||||
message_file_data=message_file_data,
|
||||
conversation_mode=conversation_mode,
|
||||
|
||||
@ -256,9 +256,18 @@ class EnterpriseOtelTrace:
|
||||
invoke_from=invoke_from,
|
||||
),
|
||||
)
|
||||
# Prefer wall-clock timestamps over the elapsed_time field: elapsed_time defaults
|
||||
# to 0 in the DB and can be stale if the Celery write races with the trace task.
|
||||
# start_time = workflow_run.created_at, end_time = workflow_run.finished_at.
|
||||
if info.start_time and info.end_time:
|
||||
workflow_duration = (info.end_time - info.start_time).total_seconds()
|
||||
elif info.workflow_run_elapsed_time:
|
||||
workflow_duration = float(info.workflow_run_elapsed_time)
|
||||
else:
|
||||
workflow_duration = 0.0
|
||||
self._exporter.record_histogram(
|
||||
EnterpriseTelemetryHistogram.WORKFLOW_DURATION,
|
||||
float(info.workflow_run_elapsed_time),
|
||||
workflow_duration,
|
||||
self._labels(
|
||||
**labels,
|
||||
status=info.workflow_run_status,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user