diff --git a/api/core/sandbox/bash/bash_tool.py b/api/core/sandbox/bash/bash_tool.py index 6001847c1b..0b4e3ec70a 100644 --- a/api/core/sandbox/bash/bash_tool.py +++ b/api/core/sandbox/bash/bash_tool.py @@ -68,7 +68,10 @@ class SandboxBashTool(Tool): ), llm="Execute bash commands in current working directory. " "Use this tool to run shell commands, scripts, or interact with the system. " - "The command will be executed in the current working directory.", + "The command will be executed in the current working directory. " + "IMPORTANT: If you generate any output files (images, documents, etc.) that need to be " + "returned or referenced later, you MUST save them to the 'output/' directory " + "(e.g., 'mkdir -p output && cp result.png output/'). Only files in output/ will be collected.", ), ) diff --git a/api/core/sandbox/bash/session.py b/api/core/sandbox/bash/session.py index f29c8f2df4..57e790b2c2 100644 --- a/api/core/sandbox/bash/session.py +++ b/api/core/sandbox/bash/session.py @@ -2,13 +2,18 @@ from __future__ import annotations import json import logging +import mimetypes +import os from io import BytesIO from types import TracebackType +from core.file import File, FileTransferMethod, FileType from core.sandbox.sandbox import Sandbox from core.session.cli_api import CliApiSession, CliApiSessionManager, CliContext from core.skill.entities import ToolAccessPolicy from core.skill.entities.tool_dependencies import ToolDependencies +from core.tools.signature import sign_tool_file +from core.tools.tool_file_manager import ToolFileManager from core.virtual_environment.__base.helpers import pipeline from ..bash.dify_cli import DifyCliConfig @@ -19,6 +24,13 @@ logger = logging.getLogger(__name__) SANDBOX_READY_TIMEOUT = 60 * 10 +# Default output directory for sandbox-generated files +SANDBOX_OUTPUT_DIR = "output" +# Maximum number of files to collect from sandbox output +MAX_OUTPUT_FILES = 50 +# Maximum file size to collect (10MB) +MAX_OUTPUT_FILE_SIZE = 10 * 1024 * 1024 + class SandboxBashSession: def __init__(self, *, sandbox: Sandbox, node_id: str, tools: ToolDependencies | None) -> None: @@ -105,3 +117,115 @@ class SandboxBashSession: if self._bash_tool is None: raise RuntimeError("SandboxSession is not initialized") return self._bash_tool + + def collect_output_files(self, output_dir: str = SANDBOX_OUTPUT_DIR) -> list[File]: + """ + Collect files from sandbox output directory and save them as ToolFiles. + + Scans the specified output directory in sandbox, downloads each file, + saves it as a ToolFile, and returns a list of File objects. The File + objects will have valid tool_file_id that can be referenced by subsequent + nodes via structured output. + + Args: + output_dir: Directory path in sandbox to scan for output files. + Defaults to "output" (relative to workspace). + + Returns: + List of File objects representing the collected files. + """ + vm = self._sandbox.vm + collected_files: list[File] = [] + + try: + file_states = vm.list_files(output_dir, limit=MAX_OUTPUT_FILES) + except Exception as exc: + # Output directory may not exist if no files were generated + logger.debug("Failed to list sandbox output files in %s: %s", output_dir, exc) + return collected_files + + tool_file_manager = ToolFileManager() + + for file_state in file_states: + # Skip files that are too large + if file_state.size > MAX_OUTPUT_FILE_SIZE: + logger.warning( + "Skipping sandbox output file %s: size %d exceeds limit %d", + file_state.path, + file_state.size, + MAX_OUTPUT_FILE_SIZE, + ) + continue + + try: + # file_state.path is already relative to working_path (e.g., "output/file.png") + file_content = vm.download_file(file_state.path) + file_binary = file_content.getvalue() + + # Determine mime type from extension + filename = os.path.basename(file_state.path) + mime_type, _ = mimetypes.guess_type(filename) + if not mime_type: + mime_type = "application/octet-stream" + + # Save as ToolFile + tool_file = tool_file_manager.create_file_by_raw( + user_id=self._user_id, + tenant_id=self._tenant_id, + conversation_id=None, + file_binary=file_binary, + mimetype=mime_type, + filename=filename, + ) + + # Determine file type from mime type + file_type = _get_file_type_from_mime(mime_type) + extension = os.path.splitext(filename)[1] if "." in filename else ".bin" + url = sign_tool_file(tool_file.id, extension) + + # Create File object with tool_file_id as related_id + file_obj = File( + id=tool_file.id, # Use tool_file_id as the File id for easy reference + tenant_id=self._tenant_id, + type=file_type, + transfer_method=FileTransferMethod.TOOL_FILE, + filename=filename, + extension=extension, + mime_type=mime_type, + size=len(file_binary), + related_id=tool_file.id, + url=url, + storage_key=tool_file.file_key, + ) + collected_files.append(file_obj) + + logger.info( + "Collected sandbox output file: %s -> tool_file_id=%s", + file_state.path, + tool_file.id, + ) + + except Exception as exc: + logger.warning("Failed to collect sandbox output file %s: %s", file_state.path, exc) + continue + + logger.info( + "Collected %d files from sandbox output directory %s", + len(collected_files), + output_dir, + ) + return collected_files + + +def _get_file_type_from_mime(mime_type: str) -> FileType: + """Determine FileType from mime type.""" + if mime_type.startswith("image/"): + return FileType.IMAGE + elif mime_type.startswith("video/"): + return FileType.VIDEO + elif mime_type.startswith("audio/"): + return FileType.AUDIO + elif "text" in mime_type or "pdf" in mime_type: + return FileType.DOCUMENT + else: + return FileType.CUSTOM diff --git a/api/core/workflow/nodes/llm/llm_utils.py b/api/core/workflow/nodes/llm/llm_utils.py index b864d3b368..86bf8f473e 100644 --- a/api/core/workflow/nodes/llm/llm_utils.py +++ b/api/core/workflow/nodes/llm/llm_utils.py @@ -1,5 +1,5 @@ from collections.abc import Sequence -from typing import cast +from typing import Any, cast from sqlalchemy import select, update from sqlalchemy.orm import Session @@ -216,6 +216,7 @@ def build_context( prompt_messages: Sequence[PromptMessage], assistant_response: str, generation_data: LLMGenerationData | None = None, + files: Sequence[Any] | None = None, ) -> list[PromptMessage]: """ Build context from prompt messages and assistant response. @@ -229,24 +230,58 @@ def build_context( prompt_messages: Initial prompt messages (user query, etc.) assistant_response: Final assistant response text generation_data: Optional generation data containing trace for tool-enabled runs + files: Optional list of File objects generated during execution """ context_messages: list[PromptMessage] = [ _truncate_multimodal_content(m) for m in prompt_messages if m.role != PromptMessageRole.SYSTEM ] + # Build file description suffix if files were generated + file_suffix = "" + if files: + file_descriptions = _build_file_descriptions(files) + if file_descriptions: + file_suffix = f"\n\n{file_descriptions}" + # For tool-enabled runs, reconstruct messages from trace if generation_data and generation_data.trace: - context_messages.extend(_build_messages_from_trace(generation_data, assistant_response)) + context_messages.extend(_build_messages_from_trace(generation_data, assistant_response, file_suffix)) else: - context_messages.append(AssistantPromptMessage(content=assistant_response)) + context_messages.append(AssistantPromptMessage(content=assistant_response + file_suffix)) return context_messages +def _build_file_descriptions(files: Sequence[Any]) -> str: + """ + Build a text description of generated files for inclusion in context. + + The description includes file_id which can be used by subsequent nodes + to reference the files via structured output. + """ + if not files: + return "" + + descriptions: list[str] = ["[Generated Files]"] + for file in files: + # Get file attributes (File is a Pydantic model) + file_id = getattr(file, "id", None) or getattr(file, "related_id", None) + filename = getattr(file, "filename", "unknown") + file_type = getattr(file, "type", "unknown") + if hasattr(file_type, "value"): + file_type = file_type.value + + if file_id: + descriptions.append(f"- {filename} (id: {file_id}, type: {file_type})") + + return "\n".join(descriptions) + + def _build_messages_from_trace( generation_data: LLMGenerationData, assistant_response: str, + file_suffix: str = "", ) -> list[PromptMessage]: """ Build assistant and tool messages from trace segments. @@ -254,7 +289,7 @@ def _build_messages_from_trace( Processes trace in order to reconstruct the conversation flow: - Model segments with tool_calls -> AssistantPromptMessage with tool_calls - Tool segments -> ToolPromptMessage with result - - Final response -> AssistantPromptMessage with assistant_response + - Final response -> AssistantPromptMessage with assistant_response (with optional file_suffix) """ from core.workflow.nodes.llm.entities import ModelTraceSegment, ToolTraceSegment @@ -290,8 +325,8 @@ def _build_messages_from_trace( ) ) - # Add final assistant response as the authoritative text - messages.append(AssistantPromptMessage(content=assistant_response)) + # Add final assistant response as the authoritative text (with file info if present) + messages.append(AssistantPromptMessage(content=assistant_response + file_suffix)) return messages diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 1f1eae80f7..38924c27a8 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -383,16 +383,7 @@ class LLMNode(Node[LLMNodeData]): if tool.enabled ] - # Unified outputs building - outputs = { - "text": clean_text, - "reasoning_content": reasoning_content, - "usage": jsonable_encoder(usage), - "finish_reason": finish_reason, - "context": llm_utils.build_context(prompt_messages, clean_text, generation_data), - } - - # Build generation field + # Build generation field and determine files_to_output first if generation_data: # Use generation_data from tool invocation (supports multi-turn) generation = { @@ -420,6 +411,15 @@ class LLMNode(Node[LLMNodeData]): } files_to_output = self._file_outputs + # Unified outputs building (files passed to context for subsequent node reference) + outputs = { + "text": clean_text, + "reasoning_content": reasoning_content, + "usage": jsonable_encoder(usage), + "finish_reason": finish_reason, + "context": llm_utils.build_context(prompt_messages, clean_text, generation_data, files=files_to_output), + } + outputs["generation"] = generation if files_to_output: outputs["files"] = ArrayFileSegment(value=files_to_output) @@ -1921,6 +1921,7 @@ class LLMNode(Node[LLMNodeData]): tool_dependencies: ToolDependencies | None, ) -> Generator[NodeEventBase, None, LLMGenerationData]: result: LLMGenerationData | None = None + sandbox_output_files: list[File] = [] # FIXME(Mairuis): Async processing for bash session. with SandboxBashSession(sandbox=sandbox, node_id=self.id, tools=tool_dependencies) as session: @@ -1961,9 +1962,26 @@ class LLMNode(Node[LLMNodeData]): result = yield from self._process_tool_outputs(outputs) + # Collect output files from sandbox before session ends + # Files are saved as ToolFiles with valid tool_file_id for later reference + sandbox_output_files = session.collect_output_files() + if result is None: raise LLMNodeError("SandboxSession exited unexpectedly") + # Merge sandbox output files into result + if sandbox_output_files: + result = LLMGenerationData( + text=result.text, + reasoning_contents=result.reasoning_contents, + tool_calls=result.tool_calls, + sequence=result.sequence, + usage=result.usage, + finish_reason=result.finish_reason, + files=result.files + sandbox_output_files, + trace=result.trace, + ) + return result def _get_model_features(self, model_instance: ModelInstance) -> list[ModelFeature]: