From 62e4ca0a6beabcd089bbb0cf73d7639fd943966a Mon Sep 17 00:00:00 2001 From: Hrushikesh Yadav Date: Wed, 25 Feb 2026 01:14:06 +0530 Subject: [PATCH] fix: handle agent_message event in MCP server streaming response The MCP server's process_streaming_response() only captured agent_thought events but agent-chat mode apps emit agent_message events containing the actual LLM answer. This caused the MCP response to return empty text or only internal reasoning instead of the real answer. fixes #32526 --- api/core/mcp/server/streamable_http.py | 13 +++++-- .../core/mcp/server/test_streamable_http.py | 39 ++++++++++++++++--- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/api/core/mcp/server/streamable_http.py b/api/core/mcp/server/streamable_http.py index 212c2eb073..7a98f40c66 100644 --- a/api/core/mcp/server/streamable_http.py +++ b/api/core/mcp/server/streamable_http.py @@ -203,16 +203,23 @@ def extract_answer_from_response(app: App, response: Any) -> str: def process_streaming_response(response: RateLimitGenerator) -> str: """Process streaming response for agent chat mode""" answer = "" + last_thought = "" for item in response.generator: if isinstance(item, str) and item.startswith("data: "): try: json_str = item[6:].strip() parsed_data = json.loads(json_str) - if parsed_data.get("event") == "agent_thought": - answer += parsed_data.get("thought", "") + event = parsed_data.get("event") + if event in ("message", "agent_message"): + answer += parsed_data.get("answer", "") + elif event == "agent_thought": + thought = parsed_data.get("thought", "") + if thought: + last_thought = thought except json.JSONDecodeError: continue - return answer + return answer or last_thought + def process_mapping_response(app: App, response: Mapping) -> str: diff --git a/api/tests/unit_tests/core/mcp/server/test_streamable_http.py b/api/tests/unit_tests/core/mcp/server/test_streamable_http.py index fe9f0935d5..f6e95c124f 100644 --- a/api/tests/unit_tests/core/mcp/server/test_streamable_http.py +++ b/api/tests/unit_tests/core/mcp/server/test_streamable_http.py @@ -357,21 +357,50 @@ class TestUtilityFunctions: assert result == expected def test_extract_answer_from_streaming_response(self): - """Test extracting answer from streaming response""" + """Test extracting answer from streaming response with agent_message events""" app = Mock(spec=App) - # Mock RateLimitGenerator mock_generator = Mock(spec=RateLimitGenerator) mock_generator.generator = [ 'data: {"event": "agent_thought", "thought": "thinking..."}', - 'data: {"event": "agent_thought", "thought": "more thinking"}', - 'data: {"event": "other", "content": "ignore this"}', + 'data: {"event": "agent_message", "answer": "Hello "}', + 'data: {"event": "agent_message", "answer": "World"}', + 'data: {"event": "message_end", "metadata": {}}', "not data format", ] result = extract_answer_from_response(app, mock_generator) - assert result == "thinking...more thinking" + assert result == "Hello World" + + def test_extract_answer_from_streaming_response_message_event(self): + """Test extracting answer from streaming response with message event""" + app = Mock(spec=App) + + mock_generator = Mock(spec=RateLimitGenerator) + mock_generator.generator = [ + 'data: {"event": "message", "answer": "Hello from chat"}', + ] + + result = extract_answer_from_response(app, mock_generator) + + assert result == "Hello from chat" + + def test_extract_answer_from_streaming_response_fallback_to_thought(self): + """Test extracting answer falls back to thought when no message events""" + app = Mock(spec=App) + + mock_generator = Mock(spec=RateLimitGenerator) + mock_generator.generator = [ + 'data: {"event": "agent_thought", "thought": "thinking..."}', + 'data: {"event": "agent_thought", "thought": "more thinking"}', + 'data: {"event": "other", "content": "ignore this"}', + ] + + result = extract_answer_from_response(app, mock_generator) + + assert result == "more thinking" + def test_process_mapping_response_invalid_mode(self): """Test processing mapping response with invalid app mode"""