dify/api/extensions/otel/parser/retrieval.py
qiuqiua 9ef6b90843
feat: sync main branch (#31938)
Signed-off-by: majiayu000 <1835304752@qq.com>
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com>
Signed-off-by: -LAN- <laipz8200@outlook.com>
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: 盐粒 Yanli <yanli@dify.ai>
Co-authored-by: wangxiaolei <fatelei@gmail.com>
Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cursx <33718736+Cursx@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: lif <1835304752@qq.com>
Co-authored-by: 非法操作 <hjlarry@163.com>
Co-authored-by: Asuka Minato <i@asukaminato.eu.org>
Co-authored-by: fenglin <790872612@qq.com>
Co-authored-by: qiaofenglin <qiaofenglin@baidu.com>
Co-authored-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: TomoOkuyama <49631611+TomoOkuyama@users.noreply.github.com>
Co-authored-by: Tomo Okuyama <tomo.okuyama@intersystems.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: zyssyz123 <916125788@qq.com>
Co-authored-by: hj24 <mambahj24@gmail.com>
Co-authored-by: Coding On Star <447357187@qq.com>
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: Xiangxuan Qu <fghpdf@outlook.com>
Co-authored-by: fghpdf <fghpdf@users.noreply.github.com>
Co-authored-by: coopercoder <whitetiger0127@163.com>
Co-authored-by: zhaiguangpeng <zhaiguangpeng@didiglobal.com>
Co-authored-by: Junyan Qin (Chin) <rockchinq@gmail.com>
Co-authored-by: E.G <146701565+GlobalStar117@users.noreply.github.com>
Co-authored-by: GlobalStar117 <GlobalStar117@users.noreply.github.com>
Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: heyszt <270985384@qq.com>
Co-authored-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com>
Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: moonpanda <chuanzegao@163.com>
Co-authored-by: warlocgao <warlocgao@tencent.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
Co-authored-by: KVOJJJin <jzongcode@gmail.com>
Co-authored-by: eux <euxx@users.noreply.github.com>
Co-authored-by: bangjiehan <bangjiehan@gmail.com>
Co-authored-by: FFXN <31929997+FFXN@users.noreply.github.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: Nie Ronghua <nieronghua@sf-express.com>
Co-authored-by: JQSevenMiao <141806521+JQSevenMiao@users.noreply.github.com>
Co-authored-by: jiasiqi <jiasiqi3@tal.com>
Co-authored-by: Seokrin Taron Sung <sungsjade@gmail.com>
Co-authored-by: CrabSAMA <40541269+CrabSAMA@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: yihong <zouzou0208@gmail.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: yessenia <yessenia.contact@gmail.com>
Co-authored-by: Jax <anobaka@qq.com>
Co-authored-by: niveshdandyan <155956228+niveshdandyan@users.noreply.github.com>
Co-authored-by: OSS Contributor <oss-contributor@example.com>
Co-authored-by: niveshdandyan <niveshdandyan@users.noreply.github.com>
Co-authored-by: Sean Kenneth Doherty <Smaster7772@gmail.com>
2026-02-04 19:04:24 +08:00

106 lines
3.7 KiB
Python

"""
Parser for knowledge retrieval nodes that captures retrieval-specific metadata.
"""
import logging
from collections.abc import Sequence
from typing import Any
from opentelemetry.trace import Span
from core.variables import Segment
from core.workflow.graph_events import GraphNodeEventBase
from core.workflow.nodes.base.node import Node
from extensions.otel.parser.base import DefaultNodeOTelParser, safe_json_dumps
from extensions.otel.semconv.gen_ai import RetrieverAttributes
logger = logging.getLogger(__name__)
def _format_retrieval_documents(retrieval_documents: list[Any]) -> list:
"""
Format retrieval documents for semantic conventions.
Args:
retrieval_documents: List of retrieval document dictionaries
Returns:
List of formatted semantic documents
"""
try:
if not isinstance(retrieval_documents, list):
return []
semantic_documents = []
for doc in retrieval_documents:
if not isinstance(doc, dict):
continue
metadata = doc.get("metadata", {})
content = doc.get("content", "")
title = doc.get("title", "")
score = metadata.get("score", 0.0)
document_id = metadata.get("document_id", "")
semantic_metadata = {}
if title:
semantic_metadata["title"] = title
if metadata.get("source"):
semantic_metadata["source"] = metadata["source"]
elif metadata.get("_source"):
semantic_metadata["source"] = metadata["_source"]
if metadata.get("doc_metadata"):
doc_metadata = metadata["doc_metadata"]
if isinstance(doc_metadata, dict):
semantic_metadata.update(doc_metadata)
semantic_doc = {
"document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id}
}
semantic_documents.append(semantic_doc)
return semantic_documents
except Exception as e:
logger.warning("Failed to format retrieval documents: %s", e, exc_info=True)
return []
class RetrievalNodeOTelParser:
"""Parser for knowledge retrieval nodes that captures retrieval-specific metadata."""
def __init__(self) -> None:
self._delegate = DefaultNodeOTelParser()
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
self._delegate.parse(node=node, span=span, error=error, result_event=result_event)
if not result_event or not result_event.node_run_result:
return
node_run_result = result_event.node_run_result
inputs = node_run_result.inputs or {}
outputs = node_run_result.outputs or {}
# Extract query from inputs
query = str(inputs.get("query", "")) if inputs else ""
if query:
span.set_attribute(RetrieverAttributes.QUERY, query)
# Extract and format retrieval documents from outputs
result_value = outputs.get("result") if outputs else None
retrieval_documents: list[Any] = []
if result_value:
value_to_check = result_value
if isinstance(result_value, Segment):
value_to_check = result_value.value
if isinstance(value_to_check, (list, Sequence)):
retrieval_documents = list(value_to_check)
if retrieval_documents:
semantic_retrieval_documents = _format_retrieval_documents(retrieval_documents)
semantic_retrieval_documents_json = safe_json_dumps(semantic_retrieval_documents)
span.set_attribute(RetrieverAttributes.DOCUMENT, semantic_retrieval_documents_json)