mirror of
https://github.com/langgenius/dify.git
synced 2026-05-10 14:14:17 +08:00
Signed-off-by: majiayu000 <1835304752@qq.com> Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Signed-off-by: yihong0618 <zouzou0208@gmail.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: 盐粒 Yanli <yanli@dify.ai> Co-authored-by: wangxiaolei <fatelei@gmail.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cursx <33718736+Cursx@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: lif <1835304752@qq.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Asuka Minato <i@asukaminato.eu.org> Co-authored-by: fenglin <790872612@qq.com> Co-authored-by: qiaofenglin <qiaofenglin@baidu.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: TomoOkuyama <49631611+TomoOkuyama@users.noreply.github.com> Co-authored-by: Tomo Okuyama <tomo.okuyama@intersystems.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: zyssyz123 <916125788@qq.com> Co-authored-by: hj24 <mambahj24@gmail.com> Co-authored-by: Coding On Star <447357187@qq.com> Co-authored-by: CodingOnStar <hanxujiang@dify.ai> Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com> Co-authored-by: Xiangxuan Qu <fghpdf@outlook.com> Co-authored-by: fghpdf <fghpdf@users.noreply.github.com> Co-authored-by: coopercoder <whitetiger0127@163.com> Co-authored-by: zhaiguangpeng <zhaiguangpeng@didiglobal.com> Co-authored-by: Junyan Qin (Chin) <rockchinq@gmail.com> Co-authored-by: E.G <146701565+GlobalStar117@users.noreply.github.com> Co-authored-by: GlobalStar117 <GlobalStar117@users.noreply.github.com> Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com> Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: heyszt <270985384@qq.com> Co-authored-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: moonpanda <chuanzegao@163.com> Co-authored-by: warlocgao <warlocgao@tencent.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: KVOJJJin <jzongcode@gmail.com> Co-authored-by: eux <euxx@users.noreply.github.com> Co-authored-by: bangjiehan <bangjiehan@gmail.com> Co-authored-by: FFXN <31929997+FFXN@users.noreply.github.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: Nie Ronghua <nieronghua@sf-express.com> Co-authored-by: JQSevenMiao <141806521+JQSevenMiao@users.noreply.github.com> Co-authored-by: jiasiqi <jiasiqi3@tal.com> Co-authored-by: Seokrin Taron Sung <sungsjade@gmail.com> Co-authored-by: CrabSAMA <40541269+CrabSAMA@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: yessenia <yessenia.contact@gmail.com> Co-authored-by: Jax <anobaka@qq.com> Co-authored-by: niveshdandyan <155956228+niveshdandyan@users.noreply.github.com> Co-authored-by: OSS Contributor <oss-contributor@example.com> Co-authored-by: niveshdandyan <niveshdandyan@users.noreply.github.com> Co-authored-by: Sean Kenneth Doherty <Smaster7772@gmail.com>
106 lines
3.7 KiB
Python
106 lines
3.7 KiB
Python
"""
|
|
Parser for knowledge retrieval nodes that captures retrieval-specific metadata.
|
|
"""
|
|
|
|
import logging
|
|
from collections.abc import Sequence
|
|
from typing import Any
|
|
|
|
from opentelemetry.trace import Span
|
|
|
|
from core.variables import Segment
|
|
from core.workflow.graph_events import GraphNodeEventBase
|
|
from core.workflow.nodes.base.node import Node
|
|
from extensions.otel.parser.base import DefaultNodeOTelParser, safe_json_dumps
|
|
from extensions.otel.semconv.gen_ai import RetrieverAttributes
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _format_retrieval_documents(retrieval_documents: list[Any]) -> list:
|
|
"""
|
|
Format retrieval documents for semantic conventions.
|
|
|
|
Args:
|
|
retrieval_documents: List of retrieval document dictionaries
|
|
|
|
Returns:
|
|
List of formatted semantic documents
|
|
"""
|
|
try:
|
|
if not isinstance(retrieval_documents, list):
|
|
return []
|
|
|
|
semantic_documents = []
|
|
for doc in retrieval_documents:
|
|
if not isinstance(doc, dict):
|
|
continue
|
|
|
|
metadata = doc.get("metadata", {})
|
|
content = doc.get("content", "")
|
|
title = doc.get("title", "")
|
|
score = metadata.get("score", 0.0)
|
|
document_id = metadata.get("document_id", "")
|
|
|
|
semantic_metadata = {}
|
|
if title:
|
|
semantic_metadata["title"] = title
|
|
if metadata.get("source"):
|
|
semantic_metadata["source"] = metadata["source"]
|
|
elif metadata.get("_source"):
|
|
semantic_metadata["source"] = metadata["_source"]
|
|
if metadata.get("doc_metadata"):
|
|
doc_metadata = metadata["doc_metadata"]
|
|
if isinstance(doc_metadata, dict):
|
|
semantic_metadata.update(doc_metadata)
|
|
|
|
semantic_doc = {
|
|
"document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id}
|
|
}
|
|
semantic_documents.append(semantic_doc)
|
|
|
|
return semantic_documents
|
|
except Exception as e:
|
|
logger.warning("Failed to format retrieval documents: %s", e, exc_info=True)
|
|
return []
|
|
|
|
|
|
class RetrievalNodeOTelParser:
|
|
"""Parser for knowledge retrieval nodes that captures retrieval-specific metadata."""
|
|
|
|
def __init__(self) -> None:
|
|
self._delegate = DefaultNodeOTelParser()
|
|
|
|
def parse(
|
|
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
|
|
) -> None:
|
|
self._delegate.parse(node=node, span=span, error=error, result_event=result_event)
|
|
|
|
if not result_event or not result_event.node_run_result:
|
|
return
|
|
|
|
node_run_result = result_event.node_run_result
|
|
inputs = node_run_result.inputs or {}
|
|
outputs = node_run_result.outputs or {}
|
|
|
|
# Extract query from inputs
|
|
query = str(inputs.get("query", "")) if inputs else ""
|
|
if query:
|
|
span.set_attribute(RetrieverAttributes.QUERY, query)
|
|
|
|
# Extract and format retrieval documents from outputs
|
|
result_value = outputs.get("result") if outputs else None
|
|
retrieval_documents: list[Any] = []
|
|
if result_value:
|
|
value_to_check = result_value
|
|
if isinstance(result_value, Segment):
|
|
value_to_check = result_value.value
|
|
|
|
if isinstance(value_to_check, (list, Sequence)):
|
|
retrieval_documents = list(value_to_check)
|
|
|
|
if retrieval_documents:
|
|
semantic_retrieval_documents = _format_retrieval_documents(retrieval_documents)
|
|
semantic_retrieval_documents_json = safe_json_dumps(semantic_retrieval_documents)
|
|
span.set_attribute(RetrieverAttributes.DOCUMENT, semantic_retrieval_documents_json)
|