From 8c9831177a9353d7e0a54616288fe69c023d4628 Mon Sep 17 00:00:00 2001 From: Zixuan Cheng <61724187+Theysua@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:49:12 -0700 Subject: [PATCH] fix(api): preserve citation metadata in web responses (#33778) Co-authored-by: AI Assistant --- .../base_app_generate_response_converter.py | 11 +++++++ ..._agent_chat_generate_response_converter.py | 33 +++++++++++++++++++ ..._completion_generate_response_converter.py | 16 +++++++++ 3 files changed, 60 insertions(+) diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py index 77950a832a..a92e3dd2ea 100644 --- a/api/core/app/apps/base_app_generate_response_converter.py +++ b/api/core/app/apps/base_app_generate_response_converter.py @@ -74,11 +74,22 @@ class AppGenerateResponseConverter(ABC): for resource in metadata["retriever_resources"]: updated_resources.append( { + "dataset_id": resource.get("dataset_id"), + "dataset_name": resource.get("dataset_name"), + "document_id": resource.get("document_id"), "segment_id": resource.get("segment_id", ""), "position": resource["position"], + "data_source_type": resource.get("data_source_type"), "document_name": resource["document_name"], "score": resource["score"], + "hit_count": resource.get("hit_count"), + "word_count": resource.get("word_count"), + "segment_position": resource.get("segment_position"), + "index_node_hash": resource.get("index_node_hash"), "content": resource["content"], + "page": resource.get("page"), + "title": resource.get("title"), + "files": resource.get("files"), "summary": resource.get("summary"), } ) diff --git a/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py index 02a1e04c98..e861a0c684 100644 --- a/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py +++ b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py @@ -44,11 +44,22 @@ class TestAgentChatAppGenerateResponseConverterBlocking: metadata={ "retriever_resources": [ { + "dataset_id": "dataset-1", + "dataset_name": "Dataset 1", + "document_id": "document-1", "segment_id": "s1", "position": 1, + "data_source_type": "file", "document_name": "doc", "score": 0.9, + "hit_count": 2, + "word_count": 128, + "segment_position": 3, + "index_node_hash": "abc1234", "content": "content", + "page": 5, + "title": "Citation Title", + "files": [{"id": "file-1"}], } ], "annotation_reply": {"id": "a"}, @@ -107,11 +118,22 @@ class TestAgentChatAppGenerateResponseConverterStream: metadata={ "retriever_resources": [ { + "dataset_id": "dataset-1", + "dataset_name": "Dataset 1", + "document_id": "document-1", "segment_id": "s1", "position": 1, + "data_source_type": "file", "document_name": "doc", "score": 0.9, + "hit_count": 2, + "word_count": 128, + "segment_position": 3, + "index_node_hash": "abc1234", "content": "content", + "page": 5, + "title": "Citation Title", + "files": [{"id": "file-1"}], "summary": "summary", "extra": "ignored", } @@ -151,11 +173,22 @@ class TestAgentChatAppGenerateResponseConverterStream: assert "usage" not in metadata assert metadata["retriever_resources"] == [ { + "dataset_id": "dataset-1", + "dataset_name": "Dataset 1", + "document_id": "document-1", "segment_id": "s1", "position": 1, + "data_source_type": "file", "document_name": "doc", "score": 0.9, + "hit_count": 2, + "word_count": 128, + "segment_position": 3, + "index_node_hash": "abc1234", "content": "content", + "page": 5, + "title": "Citation Title", + "files": [{"id": "file-1"}], "summary": "summary", } ] diff --git a/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py index cf473dfbeb..0136dbf5ad 100644 --- a/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py +++ b/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py @@ -38,11 +38,22 @@ class TestCompletionAppGenerateResponseConverter: metadata = { "retriever_resources": [ { + "dataset_id": "dataset-1", + "dataset_name": "Dataset 1", + "document_id": "document-1", "segment_id": "s", "position": 1, + "data_source_type": "file", "document_name": "doc", "score": 0.9, + "hit_count": 2, + "word_count": 128, + "segment_position": 3, + "index_node_hash": "abc1234", "content": "c", + "page": 5, + "title": "Citation Title", + "files": [{"id": "file-1"}], "summary": "sum", "extra": "x", } @@ -66,7 +77,12 @@ class TestCompletionAppGenerateResponseConverter: assert "annotation_reply" not in result["metadata"] assert "usage" not in result["metadata"] + assert result["metadata"]["retriever_resources"][0]["dataset_id"] == "dataset-1" + assert result["metadata"]["retriever_resources"][0]["document_id"] == "document-1" assert result["metadata"]["retriever_resources"][0]["segment_id"] == "s" + assert result["metadata"]["retriever_resources"][0]["data_source_type"] == "file" + assert result["metadata"]["retriever_resources"][0]["segment_position"] == 3 + assert result["metadata"]["retriever_resources"][0]["index_node_hash"] == "abc1234" assert "extra" not in result["metadata"]["retriever_resources"][0] def test_convert_blocking_simple_response_metadata_not_dict(self):