mirror of https://github.com/langgenius/dify.git
Merge 28ecd2e7f9 into 2c919efa69
This commit is contained in:
commit
e6cbca7367
|
|
@ -247,6 +247,7 @@ class AppRunner:
|
||||||
model: str = ""
|
model: str = ""
|
||||||
prompt_messages: list[PromptMessage] = []
|
prompt_messages: list[PromptMessage] = []
|
||||||
text = ""
|
text = ""
|
||||||
|
reasoning_content = ""
|
||||||
usage = None
|
usage = None
|
||||||
for result in invoke_result:
|
for result in invoke_result:
|
||||||
if not agent:
|
if not agent:
|
||||||
|
|
@ -266,6 +267,10 @@ class AppRunner:
|
||||||
else:
|
else:
|
||||||
text += content # failback to str
|
text += content # failback to str
|
||||||
|
|
||||||
|
# Handle reasoning content from delta (e.g., Ollama's thinking field)
|
||||||
|
if result.delta.reasoning_content:
|
||||||
|
reasoning_content += result.delta.reasoning_content
|
||||||
|
|
||||||
if not model:
|
if not model:
|
||||||
model = result.model
|
model = result.model
|
||||||
|
|
||||||
|
|
@ -279,7 +284,11 @@ class AppRunner:
|
||||||
usage = LLMUsage.empty_usage()
|
usage = LLMUsage.empty_usage()
|
||||||
|
|
||||||
llm_result = LLMResult(
|
llm_result = LLMResult(
|
||||||
model=model, prompt_messages=prompt_messages, message=AssistantPromptMessage(content=text), usage=usage
|
model=model,
|
||||||
|
prompt_messages=prompt_messages,
|
||||||
|
message=AssistantPromptMessage(content=text),
|
||||||
|
usage=usage,
|
||||||
|
reasoning_content=reasoning_content or None,
|
||||||
)
|
)
|
||||||
|
|
||||||
queue_manager.publish(
|
queue_manager.publish(
|
||||||
|
|
|
||||||
|
|
@ -192,6 +192,7 @@ class LLMResultChunkDelta(BaseModel):
|
||||||
message: AssistantPromptMessage
|
message: AssistantPromptMessage
|
||||||
usage: LLMUsage | None = None
|
usage: LLMUsage | None = None
|
||||||
finish_reason: str | None = None
|
finish_reason: str | None = None
|
||||||
|
reasoning_content: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class LLMResultChunk(BaseModel):
|
class LLMResultChunk(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from pydantic import BaseModel
|
||||||
from yarl import URL
|
from yarl import URL
|
||||||
|
|
||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
|
from core.model_runtime.entities.llm_entities import LLMResultChunk
|
||||||
from core.model_runtime.errors.invoke import (
|
from core.model_runtime.errors.invoke import (
|
||||||
InvokeAuthorizationError,
|
InvokeAuthorizationError,
|
||||||
InvokeBadRequestError,
|
InvokeBadRequestError,
|
||||||
|
|
@ -255,7 +256,17 @@ class BasePluginClient:
|
||||||
"""
|
"""
|
||||||
for line in self._stream_request(method, path, params, headers, data, files):
|
for line in self._stream_request(method, path, params, headers, data, files):
|
||||||
try:
|
try:
|
||||||
rep = PluginDaemonBasicResponse[type_].model_validate_json(line) # type: ignore
|
line_data = json.loads(line)
|
||||||
|
|
||||||
|
if isinstance(line_data, dict) and type_ is LLMResultChunk:
|
||||||
|
if "data" in line_data and isinstance(line_data["data"], dict):
|
||||||
|
data_dict = line_data["data"]
|
||||||
|
if "delta" in data_dict and isinstance(data_dict["delta"], dict):
|
||||||
|
delta_dict = data_dict["delta"]
|
||||||
|
if "thinking" in delta_dict and "reasoning_content" not in delta_dict:
|
||||||
|
delta_dict["reasoning_content"] = delta_dict.pop("thinking")
|
||||||
|
|
||||||
|
rep = PluginDaemonBasicResponse[type_].model_validate(line_data) # type: ignore
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
# TODO modify this when line_data has code and message
|
# TODO modify this when line_data has code and message
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -444,6 +444,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||||
usage = LLMUsage.empty_usage()
|
usage = LLMUsage.empty_usage()
|
||||||
finish_reason = None
|
finish_reason = None
|
||||||
full_text_buffer = io.StringIO()
|
full_text_buffer = io.StringIO()
|
||||||
|
reasoning_content_buffer = io.StringIO()
|
||||||
|
|
||||||
# Initialize streaming metrics tracking
|
# Initialize streaming metrics tracking
|
||||||
start_time = request_start_time if request_start_time is not None else time.perf_counter()
|
start_time = request_start_time if request_start_time is not None else time.perf_counter()
|
||||||
|
|
@ -478,6 +479,15 @@ class LLMNode(Node[LLMNodeData]):
|
||||||
is_final=False,
|
is_final=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Handle reasoning content from delta (e.g., Ollama's thinking field)
|
||||||
|
if result.delta.reasoning_content:
|
||||||
|
reasoning_content_buffer.write(result.delta.reasoning_content)
|
||||||
|
yield StreamChunkEvent(
|
||||||
|
selector=[node_id, "reasoning_content"],
|
||||||
|
chunk=result.delta.reasoning_content,
|
||||||
|
is_final=False,
|
||||||
|
)
|
||||||
|
|
||||||
# Update the whole metadata
|
# Update the whole metadata
|
||||||
if not model and result.model:
|
if not model and result.model:
|
||||||
model = result.model
|
model = result.model
|
||||||
|
|
@ -494,6 +504,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||||
|
|
||||||
# Extract reasoning content from <think> tags in the main text
|
# Extract reasoning content from <think> tags in the main text
|
||||||
full_text = full_text_buffer.getvalue()
|
full_text = full_text_buffer.getvalue()
|
||||||
|
streamed_reasoning_content = reasoning_content_buffer.getvalue()
|
||||||
|
|
||||||
if reasoning_format == "tagged":
|
if reasoning_format == "tagged":
|
||||||
# Keep <think> tags in text for backward compatibility
|
# Keep <think> tags in text for backward compatibility
|
||||||
|
|
@ -501,7 +512,8 @@ class LLMNode(Node[LLMNodeData]):
|
||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
else:
|
else:
|
||||||
# Extract clean text and reasoning from <think> tags
|
# Extract clean text and reasoning from <think> tags
|
||||||
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
|
clean_text, extracted_reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
|
||||||
|
reasoning_content = streamed_reasoning_content or extracted_reasoning_content
|
||||||
|
|
||||||
# Calculate streaming metrics
|
# Calculate streaming metrics
|
||||||
end_time = time.perf_counter()
|
end_time = time.perf_counter()
|
||||||
|
|
@ -1158,7 +1170,8 @@ class LLMNode(Node[LLMNodeData]):
|
||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
else:
|
else:
|
||||||
# Extract clean text and reasoning from <think> tags
|
# Extract clean text and reasoning from <think> tags
|
||||||
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
|
clean_text, extracted_reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
|
||||||
|
reasoning_content = invoke_result.reasoning_content or extracted_reasoning_content
|
||||||
|
|
||||||
event = ModelInvokeCompletedEvent(
|
event = ModelInvokeCompletedEvent(
|
||||||
# Use clean_text for separated mode, full_text for tagged mode
|
# Use clean_text for separated mode, full_text for tagged mode
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue