feat: add reasoning content handling for Ollama's API response

This commit is contained in:
liugddx 2025-11-06 22:36:23 +08:00
parent 37903722fe
commit 28ecd2e7f9
4 changed files with 38 additions and 4 deletions

View File

@ -245,6 +245,7 @@ class AppRunner:
model: str = ""
prompt_messages: list[PromptMessage] = []
text = ""
reasoning_content = ""
usage = None
for result in invoke_result:
if not agent:
@ -264,6 +265,10 @@ class AppRunner:
else:
text += content # failback to str
# Handle reasoning content from delta (e.g., Ollama's thinking field)
if result.delta.reasoning_content:
reasoning_content += result.delta.reasoning_content
if not model:
model = result.model
@ -277,7 +282,11 @@ class AppRunner:
usage = LLMUsage.empty_usage()
llm_result = LLMResult(
model=model, prompt_messages=prompt_messages, message=AssistantPromptMessage(content=text), usage=usage
model=model,
prompt_messages=prompt_messages,
message=AssistantPromptMessage(content=text),
usage=usage,
reasoning_content=reasoning_content or None,
)
queue_manager.publish(

View File

@ -192,6 +192,7 @@ class LLMResultChunkDelta(BaseModel):
message: AssistantPromptMessage
usage: LLMUsage | None = None
finish_reason: str | None = None
reasoning_content: str | None = None
class LLMResultChunk(BaseModel):

View File

@ -9,6 +9,7 @@ from pydantic import BaseModel
from yarl import URL
from configs import dify_config
from core.model_runtime.entities.llm_entities import LLMResultChunk
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
@ -249,7 +250,17 @@ class BasePluginClient:
"""
for line in self._stream_request(method, path, params, headers, data, files):
try:
rep = PluginDaemonBasicResponse[type_].model_validate_json(line) # type: ignore
line_data = json.loads(line)
if isinstance(line_data, dict) and type_ is LLMResultChunk:
if "data" in line_data and isinstance(line_data["data"], dict):
data_dict = line_data["data"]
if "delta" in data_dict and isinstance(data_dict["delta"], dict):
delta_dict = data_dict["delta"]
if "thinking" in delta_dict and "reasoning_content" not in delta_dict:
delta_dict["reasoning_content"] = delta_dict.pop("thinking")
rep = PluginDaemonBasicResponse[type_].model_validate(line_data) # type: ignore
except (ValueError, TypeError):
# TODO modify this when line_data has code and message
try:

View File

@ -453,6 +453,7 @@ class LLMNode(Node):
usage = LLMUsage.empty_usage()
finish_reason = None
full_text_buffer = io.StringIO()
reasoning_content_buffer = io.StringIO()
# Initialize streaming metrics tracking
start_time = request_start_time if request_start_time is not None else time.perf_counter()
@ -487,6 +488,15 @@ class LLMNode(Node):
is_final=False,
)
# Handle reasoning content from delta (e.g., Ollama's thinking field)
if result.delta.reasoning_content:
reasoning_content_buffer.write(result.delta.reasoning_content)
yield StreamChunkEvent(
selector=[node_id, "reasoning_content"],
chunk=result.delta.reasoning_content,
is_final=False,
)
# Update the whole metadata
if not model and result.model:
model = result.model
@ -503,6 +513,7 @@ class LLMNode(Node):
# Extract reasoning content from <think> tags in the main text
full_text = full_text_buffer.getvalue()
streamed_reasoning_content = reasoning_content_buffer.getvalue()
if reasoning_format == "tagged":
# Keep <think> tags in text for backward compatibility
@ -510,7 +521,8 @@ class LLMNode(Node):
reasoning_content = ""
else:
# Extract clean text and reasoning from <think> tags
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
clean_text, extracted_reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
reasoning_content = streamed_reasoning_content or extracted_reasoning_content
# Calculate streaming metrics
end_time = time.perf_counter()
@ -1120,7 +1132,8 @@ class LLMNode(Node):
reasoning_content = ""
else:
# Extract clean text and reasoning from <think> tags
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
clean_text, extracted_reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
reasoning_content = invoke_result.reasoning_content or extracted_reasoning_content
event = ModelInvokeCompletedEvent(
# Use clean_text for separated mode, full_text for tagged mode