From 4da93ba579ed129c54b62a56454b6457283b62e2 Mon Sep 17 00:00:00 2001 From: heyszt <270985384@qq.com> Date: Sat, 27 Sep 2025 09:51:23 +0800 Subject: [PATCH] update gen_ai semconv for aliyun trace (#26288) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/core/ops/aliyun_trace/aliyun_trace.py | 40 ++++---- api/core/ops/aliyun_trace/entities/semconv.py | 9 +- api/core/ops/aliyun_trace/utils.py | 95 +++++++++++++++++++ 3 files changed, 123 insertions(+), 21 deletions(-) diff --git a/api/core/ops/aliyun_trace/aliyun_trace.py b/api/core/ops/aliyun_trace/aliyun_trace.py index c0727326ce..a7d8576d8d 100644 --- a/api/core/ops/aliyun_trace/aliyun_trace.py +++ b/api/core/ops/aliyun_trace/aliyun_trace.py @@ -14,12 +14,12 @@ from core.ops.aliyun_trace.data_exporter.traceclient import ( from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData, TraceMetadata from core.ops.aliyun_trace.entities.semconv import ( GEN_AI_COMPLETION, - GEN_AI_MODEL_NAME, + GEN_AI_INPUT_MESSAGE, + GEN_AI_OUTPUT_MESSAGE, GEN_AI_PROMPT, - GEN_AI_PROMPT_TEMPLATE_TEMPLATE, - GEN_AI_PROMPT_TEMPLATE_VARIABLE, + GEN_AI_PROVIDER_NAME, + GEN_AI_REQUEST_MODEL, GEN_AI_RESPONSE_FINISH_REASON, - GEN_AI_SYSTEM, GEN_AI_USAGE_INPUT_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_USAGE_TOTAL_TOKENS, @@ -35,6 +35,9 @@ from core.ops.aliyun_trace.utils import ( create_links_from_trace_id, create_status_from_error, extract_retrieval_documents, + format_input_messages, + format_output_messages, + format_retrieval_documents, get_user_id_from_message_data, get_workflow_node_status, serialize_json_data, @@ -151,10 +154,6 @@ class AliyunDataTrace(BaseTraceInstance): ) self.trace_client.add_span(message_span) - app_model_config = getattr(message_data, "app_model_config", {}) - pre_prompt = getattr(app_model_config, "pre_prompt", "") - inputs_data = getattr(message_data, "inputs", {}) - llm_span = SpanData( trace_id=trace_metadata.trace_id, parent_span_id=message_span_id, @@ -170,13 +169,11 @@ class AliyunDataTrace(BaseTraceInstance): inputs=inputs_json, outputs=outputs_str, ), - GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "", - GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "", + GEN_AI_REQUEST_MODEL: trace_info.metadata.get("ls_model_name") or "", + GEN_AI_PROVIDER_NAME: trace_info.metadata.get("ls_provider") or "", GEN_AI_USAGE_INPUT_TOKENS: str(trace_info.message_tokens), GEN_AI_USAGE_OUTPUT_TOKENS: str(trace_info.answer_tokens), GEN_AI_USAGE_TOTAL_TOKENS: str(trace_info.total_tokens), - GEN_AI_PROMPT_TEMPLATE_VARIABLE: serialize_json_data(inputs_data), - GEN_AI_PROMPT_TEMPLATE_TEMPLATE: pre_prompt, GEN_AI_PROMPT: inputs_json, GEN_AI_COMPLETION: outputs_str, }, @@ -364,6 +361,10 @@ class AliyunDataTrace(BaseTraceInstance): input_value = str(node_execution.inputs.get("query", "")) if node_execution.inputs else "" output_value = serialize_json_data(node_execution.outputs.get("result", [])) if node_execution.outputs else "" + retrieval_documents = node_execution.outputs.get("result", []) if node_execution.outputs else [] + semantic_retrieval_documents = format_retrieval_documents(retrieval_documents) + semantic_retrieval_documents_json = serialize_json_data(semantic_retrieval_documents) + return SpanData( trace_id=trace_metadata.trace_id, parent_span_id=trace_metadata.workflow_span_id, @@ -380,7 +381,7 @@ class AliyunDataTrace(BaseTraceInstance): outputs=output_value, ), RETRIEVAL_QUERY: input_value, - RETRIEVAL_DOCUMENT: output_value, + RETRIEVAL_DOCUMENT: semantic_retrieval_documents_json, }, status=get_workflow_node_status(node_execution), links=trace_metadata.links, @@ -396,6 +397,9 @@ class AliyunDataTrace(BaseTraceInstance): prompts_json = serialize_json_data(process_data.get("prompts", [])) text_output = str(outputs.get("text", "")) + gen_ai_input_message = format_input_messages(process_data) + gen_ai_output_message = format_output_messages(outputs) + return SpanData( trace_id=trace_metadata.trace_id, parent_span_id=trace_metadata.workflow_span_id, @@ -411,14 +415,16 @@ class AliyunDataTrace(BaseTraceInstance): inputs=prompts_json, outputs=text_output, ), - GEN_AI_MODEL_NAME: process_data.get("model_name") or "", - GEN_AI_SYSTEM: process_data.get("model_provider") or "", + GEN_AI_REQUEST_MODEL: process_data.get("model_name") or "", + GEN_AI_PROVIDER_NAME: process_data.get("model_provider") or "", GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), GEN_AI_PROMPT: prompts_json, GEN_AI_COMPLETION: text_output, GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason") or "", + GEN_AI_INPUT_MESSAGE: gen_ai_input_message, + GEN_AI_OUTPUT_MESSAGE: gen_ai_output_message, }, status=get_workflow_node_status(node_execution), links=trace_metadata.links, @@ -502,8 +508,8 @@ class AliyunDataTrace(BaseTraceInstance): inputs=inputs_json, outputs=suggested_question_json, ), - GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "", - GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "", + GEN_AI_REQUEST_MODEL: trace_info.metadata.get("ls_model_name") or "", + GEN_AI_PROVIDER_NAME: trace_info.metadata.get("ls_provider") or "", GEN_AI_PROMPT: inputs_json, GEN_AI_COMPLETION: suggested_question_json, }, diff --git a/api/core/ops/aliyun_trace/entities/semconv.py b/api/core/ops/aliyun_trace/entities/semconv.py index 7a22db21e2..c823fcab8a 100644 --- a/api/core/ops/aliyun_trace/entities/semconv.py +++ b/api/core/ops/aliyun_trace/entities/semconv.py @@ -17,17 +17,18 @@ RETRIEVAL_QUERY: Final[str] = "retrieval.query" RETRIEVAL_DOCUMENT: Final[str] = "retrieval.document" # LLM attributes -GEN_AI_MODEL_NAME: Final[str] = "gen_ai.model_name" -GEN_AI_SYSTEM: Final[str] = "gen_ai.system" +GEN_AI_REQUEST_MODEL: Final[str] = "gen_ai.request.model" +GEN_AI_PROVIDER_NAME: Final[str] = "gen_ai.provider.name" GEN_AI_USAGE_INPUT_TOKENS: Final[str] = "gen_ai.usage.input_tokens" GEN_AI_USAGE_OUTPUT_TOKENS: Final[str] = "gen_ai.usage.output_tokens" GEN_AI_USAGE_TOTAL_TOKENS: Final[str] = "gen_ai.usage.total_tokens" -GEN_AI_PROMPT_TEMPLATE_TEMPLATE: Final[str] = "gen_ai.prompt_template.template" -GEN_AI_PROMPT_TEMPLATE_VARIABLE: Final[str] = "gen_ai.prompt_template.variable" GEN_AI_PROMPT: Final[str] = "gen_ai.prompt" GEN_AI_COMPLETION: Final[str] = "gen_ai.completion" GEN_AI_RESPONSE_FINISH_REASON: Final[str] = "gen_ai.response.finish_reason" +GEN_AI_INPUT_MESSAGE: Final[str] = "gen_ai.input.messages" +GEN_AI_OUTPUT_MESSAGE: Final[str] = "gen_ai.output.messages" + # Tool attributes TOOL_NAME: Final[str] = "tool.name" TOOL_DESCRIPTION: Final[str] = "tool.description" diff --git a/api/core/ops/aliyun_trace/utils.py b/api/core/ops/aliyun_trace/utils.py index 2ec9e75dcd..7f68889e92 100644 --- a/api/core/ops/aliyun_trace/utils.py +++ b/api/core/ops/aliyun_trace/utils.py @@ -1,4 +1,5 @@ import json +from collections.abc import Mapping from typing import Any from opentelemetry.trace import Link, Status, StatusCode @@ -93,3 +94,97 @@ def create_common_span_attributes( INPUT_VALUE: inputs, OUTPUT_VALUE: outputs, } + + +def format_retrieval_documents(retrieval_documents: list) -> list: + try: + if not isinstance(retrieval_documents, list): + return [] + + semantic_documents = [] + for doc in retrieval_documents: + if not isinstance(doc, dict): + continue + + metadata = doc.get("metadata", {}) + content = doc.get("content", "") + title = doc.get("title", "") + score = metadata.get("score", 0.0) + document_id = metadata.get("document_id", "") + + semantic_metadata = {} + if title: + semantic_metadata["title"] = title + if metadata.get("source"): + semantic_metadata["source"] = metadata["source"] + elif metadata.get("_source"): + semantic_metadata["source"] = metadata["_source"] + if metadata.get("doc_metadata"): + doc_metadata = metadata["doc_metadata"] + if isinstance(doc_metadata, dict): + semantic_metadata.update(doc_metadata) + + semantic_doc = { + "document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id} + } + semantic_documents.append(semantic_doc) + + return semantic_documents + except Exception: + return [] + + +def format_input_messages(process_data: Mapping[str, Any]) -> str: + try: + if not isinstance(process_data, dict): + return serialize_json_data([]) + + prompts = process_data.get("prompts", []) + if not prompts: + return serialize_json_data([]) + + valid_roles = {"system", "user", "assistant", "tool"} + input_messages = [] + for prompt in prompts: + if not isinstance(prompt, dict): + continue + + role = prompt.get("role", "") + text = prompt.get("text", "") + + if not role or role not in valid_roles: + continue + + if text: + message = {"role": role, "parts": [{"type": "text", "content": text}]} + input_messages.append(message) + + return serialize_json_data(input_messages) + except Exception: + return serialize_json_data([]) + + +def format_output_messages(outputs: Mapping[str, Any]) -> str: + try: + if not isinstance(outputs, dict): + return serialize_json_data([]) + + text = outputs.get("text", "") + finish_reason = outputs.get("finish_reason", "") + + if not text: + return serialize_json_data([]) + + valid_finish_reasons = {"stop", "length", "content_filter", "tool_call", "error"} + if finish_reason not in valid_finish_reasons: + finish_reason = "stop" + + output_message = { + "role": "assistant", + "parts": [{"type": "text", "content": text}], + "finish_reason": finish_reason, + } + + return serialize_json_data([output_message]) + except Exception: + return serialize_json_data([])