This commit is contained in:
Guangdong Liu 2025-12-29 15:43:28 +08:00 committed by GitHub
commit e6cbca7367
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 38 additions and 4 deletions

View File

@ -247,6 +247,7 @@ class AppRunner:
model: str = "" model: str = ""
prompt_messages: list[PromptMessage] = [] prompt_messages: list[PromptMessage] = []
text = "" text = ""
reasoning_content = ""
usage = None usage = None
for result in invoke_result: for result in invoke_result:
if not agent: if not agent:
@ -266,6 +267,10 @@ class AppRunner:
else: else:
text += content # failback to str text += content # failback to str
# Handle reasoning content from delta (e.g., Ollama's thinking field)
if result.delta.reasoning_content:
reasoning_content += result.delta.reasoning_content
if not model: if not model:
model = result.model model = result.model
@ -279,7 +284,11 @@ class AppRunner:
usage = LLMUsage.empty_usage() usage = LLMUsage.empty_usage()
llm_result = LLMResult( llm_result = LLMResult(
model=model, prompt_messages=prompt_messages, message=AssistantPromptMessage(content=text), usage=usage model=model,
prompt_messages=prompt_messages,
message=AssistantPromptMessage(content=text),
usage=usage,
reasoning_content=reasoning_content or None,
) )
queue_manager.publish( queue_manager.publish(

View File

@ -192,6 +192,7 @@ class LLMResultChunkDelta(BaseModel):
message: AssistantPromptMessage message: AssistantPromptMessage
usage: LLMUsage | None = None usage: LLMUsage | None = None
finish_reason: str | None = None finish_reason: str | None = None
reasoning_content: str | None = None
class LLMResultChunk(BaseModel): class LLMResultChunk(BaseModel):

View File

@ -9,6 +9,7 @@ from pydantic import BaseModel
from yarl import URL from yarl import URL
from configs import dify_config from configs import dify_config
from core.model_runtime.entities.llm_entities import LLMResultChunk
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
InvokeAuthorizationError, InvokeAuthorizationError,
InvokeBadRequestError, InvokeBadRequestError,
@ -255,7 +256,17 @@ class BasePluginClient:
""" """
for line in self._stream_request(method, path, params, headers, data, files): for line in self._stream_request(method, path, params, headers, data, files):
try: try:
rep = PluginDaemonBasicResponse[type_].model_validate_json(line) # type: ignore line_data = json.loads(line)
if isinstance(line_data, dict) and type_ is LLMResultChunk:
if "data" in line_data and isinstance(line_data["data"], dict):
data_dict = line_data["data"]
if "delta" in data_dict and isinstance(data_dict["delta"], dict):
delta_dict = data_dict["delta"]
if "thinking" in delta_dict and "reasoning_content" not in delta_dict:
delta_dict["reasoning_content"] = delta_dict.pop("thinking")
rep = PluginDaemonBasicResponse[type_].model_validate(line_data) # type: ignore
except (ValueError, TypeError): except (ValueError, TypeError):
# TODO modify this when line_data has code and message # TODO modify this when line_data has code and message
try: try:

View File

@ -444,6 +444,7 @@ class LLMNode(Node[LLMNodeData]):
usage = LLMUsage.empty_usage() usage = LLMUsage.empty_usage()
finish_reason = None finish_reason = None
full_text_buffer = io.StringIO() full_text_buffer = io.StringIO()
reasoning_content_buffer = io.StringIO()
# Initialize streaming metrics tracking # Initialize streaming metrics tracking
start_time = request_start_time if request_start_time is not None else time.perf_counter() start_time = request_start_time if request_start_time is not None else time.perf_counter()
@ -478,6 +479,15 @@ class LLMNode(Node[LLMNodeData]):
is_final=False, is_final=False,
) )
# Handle reasoning content from delta (e.g., Ollama's thinking field)
if result.delta.reasoning_content:
reasoning_content_buffer.write(result.delta.reasoning_content)
yield StreamChunkEvent(
selector=[node_id, "reasoning_content"],
chunk=result.delta.reasoning_content,
is_final=False,
)
# Update the whole metadata # Update the whole metadata
if not model and result.model: if not model and result.model:
model = result.model model = result.model
@ -494,6 +504,7 @@ class LLMNode(Node[LLMNodeData]):
# Extract reasoning content from <think> tags in the main text # Extract reasoning content from <think> tags in the main text
full_text = full_text_buffer.getvalue() full_text = full_text_buffer.getvalue()
streamed_reasoning_content = reasoning_content_buffer.getvalue()
if reasoning_format == "tagged": if reasoning_format == "tagged":
# Keep <think> tags in text for backward compatibility # Keep <think> tags in text for backward compatibility
@ -501,7 +512,8 @@ class LLMNode(Node[LLMNodeData]):
reasoning_content = "" reasoning_content = ""
else: else:
# Extract clean text and reasoning from <think> tags # Extract clean text and reasoning from <think> tags
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format) clean_text, extracted_reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
reasoning_content = streamed_reasoning_content or extracted_reasoning_content
# Calculate streaming metrics # Calculate streaming metrics
end_time = time.perf_counter() end_time = time.perf_counter()
@ -1158,7 +1170,8 @@ class LLMNode(Node[LLMNodeData]):
reasoning_content = "" reasoning_content = ""
else: else:
# Extract clean text and reasoning from <think> tags # Extract clean text and reasoning from <think> tags
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format) clean_text, extracted_reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
reasoning_content = invoke_result.reasoning_content or extracted_reasoning_content
event = ModelInvokeCompletedEvent( event = ModelInvokeCompletedEvent(
# Use clean_text for separated mode, full_text for tagged mode # Use clean_text for separated mode, full_text for tagged mode