From d070074164df123df5774b117d00b2299b9aff2d Mon Sep 17 00:00:00 2001 From: zyssyz123 <916125788@qq.com> Date: Thu, 25 Jun 2026 15:30:30 +0800 Subject: [PATCH] fix(agent-v2): pass chat files to agent backend (#37926) --- api/clients/agent_backend/__init__.py | 2 + api/clients/agent_backend/request_builder.py | 12 +++- api/core/app/apps/agent_app/app_generator.py | 19 +++++- api/core/app/apps/agent_app/app_runner.py | 5 ++ .../apps/agent_app/runtime_request_builder.py | 53 ++++++++++++++- .../agent_backend/test_request_builder.py | 43 ++++++++++++ .../agent_app/test_runtime_request_builder.py | 50 ++++++++++++++ .../dify_agent/layers/user_prompt/__init__.py | 13 ++++ .../dify_agent/layers/user_prompt/configs.py | 37 ++++++++++ .../dify_agent/layers/user_prompt/layer.py | 67 +++++++++++++++++++ .../dify_agent/runtime/compositor_factory.py | 2 + .../layers/test_user_prompt_layer.py | 56 ++++++++++++++++ .../dify_agent/test_client_safe_exports.py | 2 + 13 files changed, 356 insertions(+), 5 deletions(-) create mode 100644 dify-agent/src/dify_agent/layers/user_prompt/__init__.py create mode 100644 dify-agent/src/dify_agent/layers/user_prompt/configs.py create mode 100644 dify-agent/src/dify_agent/layers/user_prompt/layer.py create mode 100644 dify-agent/tests/local/dify_agent/layers/test_user_prompt_layer.py diff --git a/api/clients/agent_backend/__init__.py b/api/clients/agent_backend/__init__.py index b9032c521eb..cf0b59d80ca 100644 --- a/api/clients/agent_backend/__init__.py +++ b/api/clients/agent_backend/__init__.py @@ -31,6 +31,7 @@ from clients.agent_backend.event_adapter import ( from clients.agent_backend.factory import create_agent_backend_run_client from clients.agent_backend.fake_client import FakeAgentBackendRunClient, FakeAgentBackendScenario from clients.agent_backend.request_builder import ( + AGENT_APP_USER_PROMPT_LAYER_ID, AGENT_SOUL_PROMPT_LAYER_ID, DIFY_EXECUTION_CONTEXT_LAYER_ID, DIFY_KNOWLEDGE_BASE_LAYER_ID, @@ -46,6 +47,7 @@ from clients.agent_backend.request_builder import ( ) __all__ = [ + "AGENT_APP_USER_PROMPT_LAYER_ID", "AGENT_SOUL_PROMPT_LAYER_ID", "DIFY_EXECUTION_CONTEXT_LAYER_ID", "DIFY_KNOWLEDGE_BASE_LAYER_ID", diff --git a/api/clients/agent_backend/request_builder.py b/api/clients/agent_backend/request_builder.py index 29cc28a3179..acf8edc7425 100644 --- a/api/clients/agent_backend/request_builder.py +++ b/api/clients/agent_backend/request_builder.py @@ -35,6 +35,11 @@ from dify_agent.layers.execution_context import ( from dify_agent.layers.knowledge import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig from dify_agent.layers.output import DIFY_OUTPUT_LAYER_TYPE_ID, DifyOutputLayerConfig from dify_agent.layers.shell import DIFY_SHELL_LAYER_TYPE_ID, DifyShellLayerConfig +from dify_agent.layers.user_prompt import ( + DIFY_USER_PROMPT_LAYER_TYPE_ID, + DifyUserPromptFileConfig, + DifyUserPromptLayerConfig, +) from dify_agent.protocol import ( DIFY_AGENT_HISTORY_LAYER_ID, DIFY_AGENT_MODEL_LAYER_ID, @@ -190,6 +195,7 @@ class AgentBackendAgentAppRunInput(BaseModel): model: AgentBackendModelConfig execution_context: DifyExecutionContextLayerConfig user_prompt: str + user_files: list[DifyUserPromptFileConfig] = Field(default_factory=list) agent_soul_prompt: str | None = None purpose: RunPurpose = "agent_app" idempotency_key: str | None = None @@ -250,9 +256,9 @@ class AgentBackendRunRequestBuilder: [ RunLayerSpec( name=AGENT_APP_USER_PROMPT_LAYER_ID, - type=PLAIN_PROMPT_LAYER_TYPE_ID, + type=DIFY_USER_PROMPT_LAYER_TYPE_ID, metadata={**run_input.metadata, "origin": "agent_app_user_prompt"}, - config=PromptLayerConfig(user=run_input.user_prompt), + config=DifyUserPromptLayerConfig(text=run_input.user_prompt, files=run_input.user_files), ), RunLayerSpec( name=DIFY_EXECUTION_CONTEXT_LAYER_ID, @@ -579,7 +585,7 @@ class AgentBackendRunRequestBuilder: ) -_SENSITIVE_KEY_PARTS = ("secret", "credential", "token", "password", "api_key") +_SENSITIVE_KEY_PARTS = ("secret", "credential", "token", "password", "api_key", "base64_data") def redact_for_agent_backend_log(value: object) -> object: diff --git a/api/core/app/apps/agent_app/app_generator.py b/api/core/app/apps/agent_app/app_generator.py index bd8012a0f24..a414434e148 100644 --- a/api/core/app/apps/agent_app/app_generator.py +++ b/api/core/app/apps/agent_app/app_generator.py @@ -23,6 +23,7 @@ from clients.agent_backend.factory import create_agent_backend_run_client from configs import dify_config from constants import UUID_NIL from core.app.app_config.easy_ui_based_app.model_config.converter import ModelConfigConverter +from core.app.app_config.features.file_upload.manager import FileUploadConfigManager from core.app.apps.agent_app.app_config_manager import AgentAppConfigManager from core.app.apps.agent_app.app_runner import AgentAppRunner from core.app.apps.agent_app.generate_response_converter import AgentAppGenerateResponseConverter @@ -41,6 +42,7 @@ from core.app.entities.app_invoke_entities import ( from core.app.llm.model_access import build_dify_model_access from core.ops.ops_trace_manager import TraceQueueManager from extensions.ext_database import db +from factories import file_factory from models import Account, App, EndUser, Message from models.agent import ( Agent, @@ -109,6 +111,18 @@ class AgentAppGenerator(MessageBasedAppGenerator): conversation=conversation, ) model_conf = ModelConfigConverter.convert(app_config) + with self._bind_file_access_scope(tenant_id=app_model.tenant_id, user=user, invoke_from=invoke_from): + raw_files = args.get("files") or [] + file_extra_config = FileUploadConfigManager.convert(app_config.app_model_config_dict, is_vision=True) + if raw_files: + file_objs = file_factory.build_from_mappings( + mappings=raw_files, + tenant_id=app_model.tenant_id, + config=file_extra_config, + access_controller=self._file_access_controller, + ) + else: + file_objs = [] trace_manager = TraceQueueManager(app_model.id, user.id if isinstance(user, Account) else user.session_id) @@ -116,12 +130,13 @@ class AgentAppGenerator(MessageBasedAppGenerator): task_id=str(uuid.uuid4()), app_config=app_config, model_conf=model_conf, + file_upload_config=file_extra_config, conversation_id=conversation.id if conversation else None, inputs=self._prepare_user_inputs( user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id ), query=query, - files=[], + files=list(file_objs), parent_message_id=( args.get("parent_message_id") if invoke_from not in {InvokeFrom.SERVICE_API, InvokeFrom.OPENAPI} @@ -364,6 +379,8 @@ class AgentAppGenerator(MessageBasedAppGenerator): message_id=message.id, model_name=application_generate_entity.model_conf.model, queue_manager=queue_manager, + files=list(application_generate_entity.files), + file_upload_config=application_generate_entity.file_upload_config, session_scope_snapshot_id=application_generate_entity.agent_runtime_session_snapshot_id, ) except GenerateTaskStoppedError: diff --git a/api/core/app/apps/agent_app/app_runner.py b/api/core/app/apps/agent_app/app_runner.py index b482afcdf43..5cda1228610 100644 --- a/api/core/app/apps/agent_app/app_runner.py +++ b/api/core/app/apps/agent_app/app_runner.py @@ -45,6 +45,7 @@ from core.app.entities.queue_entities import QueueLLMChunkEvent, QueueMessageEnd from core.repositories.human_input_repository import HumanInputFormRepository, HumanInputFormRepositoryImpl from core.workflow.nodes.agent_v2.ask_human_hitl import AskHumanFormBuildError, create_ask_human_form from core.workflow.nodes.agent_v2.ask_human_resume import build_deferred_tool_results, resolve_ask_human_form +from graphon.file import File, FileUploadConfig from graphon.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from graphon.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage, UserPromptMessage from models.agent_config_entities import AgentSoulConfig @@ -162,6 +163,8 @@ class AgentAppRunner: message_id: str, model_name: str, queue_manager: AppQueueManager, + files: list[File] | None = None, + file_upload_config: FileUploadConfig | None = None, session_scope_snapshot_id: str | None | _DefaultSessionScopeSnapshotId = _DEFAULT_SESSION_SCOPE_SNAPSHOT_ID, ) -> None: if isinstance(session_scope_snapshot_id, _DefaultSessionScopeSnapshotId): @@ -192,6 +195,8 @@ class AgentAppRunner: conversation_id=conversation_id, user_query=query, idempotency_key=message_id, + files=tuple(files or ()), + file_upload_config=file_upload_config, session_snapshot=session_snapshot, deferred_tool_results=deferred_tool_results, ) diff --git a/api/core/app/apps/agent_app/runtime_request_builder.py b/api/core/app/apps/agent_app/runtime_request_builder.py index 9790f2fbca0..75183eaa061 100644 --- a/api/core/app/apps/agent_app/runtime_request_builder.py +++ b/api/core/app/apps/agent_app/runtime_request_builder.py @@ -10,9 +10,10 @@ used by workflow runs. from __future__ import annotations +import base64 from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Protocol, cast +from typing import Any, Literal, Protocol, cast from agenton.compositor import CompositorSessionSnapshot from dify_agent.layers.execution_context import ( @@ -20,6 +21,7 @@ from dify_agent.layers.execution_context import ( DifyExecutionContextLayerConfig, DifyExecutionContextUserFrom, ) +from dify_agent.layers.user_prompt import DifyUserPromptFileConfig from dify_agent.protocol import CreateRunRequest, DeferredToolResultsPayload from clients.agent_backend import ( @@ -42,6 +44,7 @@ from core.workflow.nodes.agent_v2.runtime_request_builder import ( build_knowledge_layer_config, build_shell_layer_config, ) +from graphon.file import File, FileType, FileUploadConfig, file_manager from models.agent_config_entities import AgentSoulConfig from models.provider_ids import ModelProviderID from services.agent.prompt_mentions import build_soul_mention_resolver, expand_prompt_mentions @@ -68,6 +71,8 @@ class AgentAppRuntimeBuildContext: conversation_id: str user_query: str idempotency_key: str + files: tuple[File, ...] = () + file_upload_config: FileUploadConfig | None = None session_snapshot: CompositorSessionSnapshot | None = None # ENG-638: set when resuming a chat turn after a submitted ask_human form. deferred_tool_results: DeferredToolResultsPayload | None = None @@ -168,6 +173,7 @@ class AgentAppRuntimeRequestBuilder: agent_soul_prompt=expand_prompt_mentions(agent_soul.prompt.system_prompt, soul_prompt_resolver).strip() or None, user_prompt=context.user_query, + user_files=self._build_user_files(context.files, context.file_upload_config), tools=tools_layer, knowledge=knowledge_config, drive_config=drive_config, @@ -217,6 +223,51 @@ class AgentAppRuntimeRequestBuilder: normalized[key] = str(value) return normalized + @staticmethod + def _build_user_files( + files: tuple[File, ...], + file_upload_config: FileUploadConfig | None, + ) -> list[DifyUserPromptFileConfig]: + detail = _image_detail(file_upload_config) + return [_build_user_file(file, detail=detail) for file in files] + + +def _build_user_file(file: File, *, detail: Literal["low", "high"] | None) -> DifyUserPromptFileConfig: + file_type = file.type.value if isinstance(file.type, FileType) else str(file.type) + if file_type not in {"image", "document", "audio", "video"}: + raise AgentAppRuntimeRequestBuildError( + "agent_user_file_unsupported", + f"Agent App does not support file type '{file_type}' in user prompt.", + ) + mime_type = file.mime_type or "application/octet-stream" + return DifyUserPromptFileConfig( + filename=file.filename or "file", + mime_type=mime_type, + format=_file_format(file), + type=cast(Any, file_type), + base64_data=base64.b64encode(file_manager.download(file)).decode(), + detail=detail if file_type == "image" else None, + ) + + +def _file_format(file: File) -> str: + extension = (file.extension or "").lstrip(".").lower() + if extension: + return extension + mime_type = file.mime_type or "" + if "/" in mime_type: + return mime_type.rsplit("/", 1)[-1].lower() + return "bin" + + +def _image_detail(file_upload_config: FileUploadConfig | None) -> Literal["low", "high"] | None: + image_config = file_upload_config.image_config if file_upload_config is not None else None + detail = image_config.detail if image_config is not None else None + if detail is None: + return None + detail_value = getattr(detail, "value", detail) + return cast(Literal["low", "high"], detail_value) if detail_value in {"low", "high"} else None + __all__ = [ "AgentAppRuntimeBuildContext", diff --git a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py index 3bb73289580..103a5b34e5f 100644 --- a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py +++ b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py @@ -19,6 +19,7 @@ from dify_agent.layers.execution_context import DIFY_EXECUTION_CONTEXT_LAYER_TYP from dify_agent.layers.knowledge import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig from dify_agent.layers.output import DIFY_OUTPUT_LAYER_TYPE_ID from dify_agent.layers.shell import DIFY_SHELL_LAYER_TYPE_ID, DifyShellEnvVarConfig, DifyShellLayerConfig +from dify_agent.layers.user_prompt import DIFY_USER_PROMPT_LAYER_TYPE_ID, DifyUserPromptFileConfig from dify_agent.protocol import ( DIFY_AGENT_HISTORY_LAYER_ID, DIFY_AGENT_MODEL_LAYER_ID, @@ -28,6 +29,7 @@ from dify_agent.protocol import ( from pydantic import ValidationError from clients.agent_backend import ( + AGENT_APP_USER_PROMPT_LAYER_ID, AGENT_SOUL_PROMPT_LAYER_ID, DIFY_EXECUTION_CONTEXT_LAYER_ID, DIFY_KNOWLEDGE_BASE_LAYER_ID, @@ -360,6 +362,47 @@ def test_agent_app_request_builder_omits_shell_layer_by_default(): assert DIFY_SHELL_LAYER_ID not in {layer.name for layer in request.composition.layers} +def test_agent_app_request_builder_uses_multimodal_user_prompt_layer(): + run_input = _agent_app_input() + run_input.user_files = [ + DifyUserPromptFileConfig( + filename="red.png", + mime_type="image/png", + format="png", + type="image", + base64_data="cmVk", + detail="high", + ) + ] + + request = AgentBackendRunRequestBuilder().build_for_agent_app(run_input) + layers = {layer.name: layer for layer in request.composition.layers} + user_prompt_layer = layers[AGENT_APP_USER_PROMPT_LAYER_ID] + + assert user_prompt_layer.type == DIFY_USER_PROMPT_LAYER_TYPE_ID + assert user_prompt_layer.config.text == "List files." + assert user_prompt_layer.config.files[0].filename == "red.png" + + +def test_redact_for_agent_backend_log_hides_user_file_base64_data(): + run_input = _agent_app_input() + run_input.user_files = [ + DifyUserPromptFileConfig( + filename="red.png", + mime_type="image/png", + format="png", + type="image", + base64_data="cmVk", + ) + ] + request = AgentBackendRunRequestBuilder().build_for_agent_app(run_input) + + redacted = cast(dict[str, Any], redact_for_agent_backend_log(request)) + layers = {layer["name"]: layer for layer in redacted["composition"]["layers"]} + + assert layers[AGENT_APP_USER_PROMPT_LAYER_ID]["config"]["files"][0]["base64_data"] == "[REDACTED]" + + def test_agent_app_request_builder_adds_shell_layer_when_include_shell(): run_input = _agent_app_input(include_shell=True) run_input.shell_config = DifyShellLayerConfig(env=[DifyShellEnvVarConfig(name="APP_ENV", value="enabled")]) diff --git a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py index ef5aff1dd41..660321a78ad 100644 --- a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py +++ b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py @@ -3,12 +3,14 @@ from __future__ import annotations +from dataclasses import replace from types import SimpleNamespace from typing import Any import pytest from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig +import core.app.apps.agent_app.runtime_request_builder as runtime_request_builder_module from clients.agent_backend import ( AgentBackendAgentAppRunInput, AgentBackendModelConfig, @@ -21,6 +23,7 @@ from core.app.apps.agent_app.runtime_request_builder import ( AgentAppRuntimeRequestBuildError, ) from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from graphon.file import File, FileTransferMethod, FileType, FileUploadConfig, ImageConfig from models.agent_config_entities import AgentSoulConfig @@ -104,6 +107,19 @@ def _ctx(soul: AgentSoulConfig, *, query: str = "hello") -> AgentAppRuntimeBuild ) +def _image_file() -> File: + return File( + transfer_method=FileTransferMethod.LOCAL_FILE, + id="file-1", + type=FileType.IMAGE, + filename="red.png", + extension=".png", + mime_type="image/png", + size=3, + reference="upload-file-id", + ) + + def _soul_with_model() -> AgentSoulConfig: return AgentSoulConfig.model_validate( { @@ -144,6 +160,40 @@ class TestAgentAppRuntimeRequestBuilder: assert result.redacted_request["composition"]["layers"][-1]["config"]["credentials"] == "[REDACTED]" assert result.metadata["conversation_id"] == "conv-1" + def test_build_maps_uploaded_files_to_user_prompt_layer(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(runtime_request_builder_module.file_manager, "download", lambda file: b"red") + builder = AgentAppRuntimeRequestBuilder( + credentials_provider=_FakeCredentialsProvider(), + plugin_tools_builder=_NoToolsBuilder(), # type: ignore[arg-type] + ) + ctx = replace( + _ctx(_soul_with_model()), + files=(_image_file(),), + file_upload_config=FileUploadConfig(image_config=ImageConfig(detail="high")), + ) + + result = builder.build(ctx) + + user_prompt = next( + layer for layer in result.request.composition.layers if layer.name == "agent_app_user_prompt" + ) + dumped = user_prompt.config.model_dump(mode="json") + assert dumped["text"] == "hello" + assert dumped["files"][0] == { + "filename": "red.png", + "mime_type": "image/png", + "format": "png", + "type": "image", + "base64_data": "cmVk", + "detail": "high", + } + redacted_user_prompt = next( + layer + for layer in result.redacted_request["composition"]["layers"] + if layer["name"] == "agent_app_user_prompt" + ) + assert redacted_user_prompt["config"]["files"][0]["base64_data"] == "[REDACTED]" + def test_build_normalizes_marketplace_model_plugin_id(self): soul = _soul_with_model() soul.model.plugin_id = ( diff --git a/dify-agent/src/dify_agent/layers/user_prompt/__init__.py b/dify-agent/src/dify_agent/layers/user_prompt/__init__.py new file mode 100644 index 00000000000..3fa5f38cccd --- /dev/null +++ b/dify-agent/src/dify_agent/layers/user_prompt/__init__.py @@ -0,0 +1,13 @@ +"""Client-safe exports for Agent App user prompt layer DTOs.""" + +from dify_agent.layers.user_prompt.configs import ( + DIFY_USER_PROMPT_LAYER_TYPE_ID, + DifyUserPromptFileConfig, + DifyUserPromptLayerConfig, +) + +__all__ = [ + "DIFY_USER_PROMPT_LAYER_TYPE_ID", + "DifyUserPromptFileConfig", + "DifyUserPromptLayerConfig", +] diff --git a/dify-agent/src/dify_agent/layers/user_prompt/configs.py b/dify-agent/src/dify_agent/layers/user_prompt/configs.py new file mode 100644 index 00000000000..bc68884c0c0 --- /dev/null +++ b/dify-agent/src/dify_agent/layers/user_prompt/configs.py @@ -0,0 +1,37 @@ +"""Serializable user prompt layer DTOs for Agent App chat turns.""" + +from typing import ClassVar, Final, Literal + +from pydantic import BaseModel, ConfigDict, Field + +from agenton.layers import LayerConfig + + +DIFY_USER_PROMPT_LAYER_TYPE_ID: Final[str] = "dify.user_prompt" + + +class DifyUserPromptFileConfig(BaseModel): + """One user-uploaded file carried inline to the agent backend.""" + + model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") + + filename: str + mime_type: str + format: str + type: Literal["image", "document", "audio", "video", "custom"] + base64_data: str + detail: Literal["low", "high"] | None = None + + +class DifyUserPromptLayerConfig(LayerConfig): + """User prompt text plus optional multimodal files.""" + + text: str + files: list[DifyUserPromptFileConfig] = Field(default_factory=list) + + +__all__ = [ + "DIFY_USER_PROMPT_LAYER_TYPE_ID", + "DifyUserPromptFileConfig", + "DifyUserPromptLayerConfig", +] diff --git a/dify-agent/src/dify_agent/layers/user_prompt/layer.py b/dify-agent/src/dify_agent/layers/user_prompt/layer.py new file mode 100644 index 00000000000..33c54226d20 --- /dev/null +++ b/dify-agent/src/dify_agent/layers/user_prompt/layer.py @@ -0,0 +1,67 @@ +"""Runtime layer that turns serialized Agent App user files into pydantic-ai content.""" + +from __future__ import annotations + +import base64 +from dataclasses import dataclass +from typing import ClassVar + +from pydantic_ai.messages import BinaryContent, UserContent +from typing_extensions import Self, override + +from agenton.layers import EmptyRuntimeState, NoLayerDeps, PydanticAILayer +from dify_agent.layers.user_prompt.configs import ( + DIFY_USER_PROMPT_LAYER_TYPE_ID, + DifyUserPromptFileConfig, + DifyUserPromptLayerConfig, +) + + +@dataclass(slots=True) +class DifyUserPromptLayer(PydanticAILayer[NoLayerDeps, object, DifyUserPromptLayerConfig, EmptyRuntimeState]): + """State-free pydantic-ai layer for text and uploaded user files.""" + + type_id: ClassVar[str | None] = DIFY_USER_PROMPT_LAYER_TYPE_ID + + config: DifyUserPromptLayerConfig + + @classmethod + @override + def from_config(cls, config: DifyUserPromptLayerConfig) -> Self: + """Create the layer from validated public config.""" + return cls(config=DifyUserPromptLayerConfig.model_validate(config)) + + @property + @override + def user_prompts(self) -> list[UserContent]: + if not self.config.files: + return [self.config.text] + + parts: list[UserContent] = [] + for file in self.config.files: + parts.append(_file_to_binary_content(file)) + if self.config.text: + parts.append(self.config.text) + return parts + + +def _file_to_binary_content(file: DifyUserPromptFileConfig) -> BinaryContent: + metadata: dict[str, str] = {"filename": file.filename} + if file.detail: + metadata["detail"] = file.detail + return BinaryContent( + data=base64.b64decode(file.base64_data), + media_type=file.mime_type, + identifier=_identifier_from_filename(file.filename, file.format), + vendor_metadata=metadata, + ) + + +def _identifier_from_filename(filename: str, file_format: str) -> str: + suffix = f".{file_format}" if file_format else "" + if suffix and filename.lower().endswith(suffix.lower()): + return filename[: -len(suffix)] or "file" + return filename or "file" + + +__all__ = ["DifyUserPromptLayer"] diff --git a/dify-agent/src/dify_agent/runtime/compositor_factory.py b/dify-agent/src/dify_agent/runtime/compositor_factory.py index 0cfab33ad0d..09c969aa963 100644 --- a/dify-agent/src/dify_agent/runtime/compositor_factory.py +++ b/dify-agent/src/dify_agent/runtime/compositor_factory.py @@ -47,6 +47,7 @@ from dify_agent.layers.knowledge.layer import DifyKnowledgeBaseLayer from dify_agent.layers.output.output_layer import DifyOutputLayer from dify_agent.layers.shell.configs import DifyShellLayerConfig from dify_agent.layers.shell.layer import DifyShellLayer, create_shellctl_client_factory +from dify_agent.layers.user_prompt.layer import DifyUserPromptLayer type DifyAgentLayerProvider = LayerProvider[Any] @@ -87,6 +88,7 @@ def create_default_layer_providers( agent_stub_token_factory = build_agent_stub_token return ( LayerProvider.from_layer_type(PromptLayer), + LayerProvider.from_layer_type(DifyUserPromptLayer), LayerProvider.from_layer_type(PydanticAIHistoryLayer), LayerProvider.from_layer_type(DifyOutputLayer), LayerProvider.from_layer_type(DifyAskHumanLayer), diff --git a/dify-agent/tests/local/dify_agent/layers/test_user_prompt_layer.py b/dify-agent/tests/local/dify_agent/layers/test_user_prompt_layer.py new file mode 100644 index 00000000000..bcba79200f1 --- /dev/null +++ b/dify-agent/tests/local/dify_agent/layers/test_user_prompt_layer.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from typing import cast + +from pydantic_ai.messages import BinaryContent + +from dify_agent.layers.user_prompt.configs import DIFY_USER_PROMPT_LAYER_TYPE_ID, DifyUserPromptLayerConfig +from dify_agent.layers.user_prompt.layer import DifyUserPromptLayer +from dify_agent.runtime.compositor_factory import create_default_layer_providers + + +def test_user_prompt_layer_restores_text_only_prompt() -> None: + layer = DifyUserPromptLayer.from_config(DifyUserPromptLayerConfig(text="hello")) + + assert layer.type_id == DIFY_USER_PROMPT_LAYER_TYPE_ID + assert layer.user_prompts == ["hello"] + + +def test_user_prompt_layer_restores_binary_file_prompt() -> None: + layer = DifyUserPromptLayer.from_config( + DifyUserPromptLayerConfig.model_validate( + { + "text": "what is in this image?", + "files": [ + { + "filename": "red.png", + "mime_type": "image/png", + "format": "png", + "type": "image", + "base64_data": "cmVk", + "detail": "high", + } + ], + } + ) + ) + + prompts = layer.user_prompts + + assert len(prompts) == 2 + file_part = prompts[0] + assert isinstance(file_part, BinaryContent) + assert file_part.data == b"red" + assert file_part.media_type == "image/png" + assert file_part.vendor_metadata == {"filename": "red.png", "detail": "high"} + assert prompts[1] == "what is in this image?" + + +def test_default_layer_providers_register_user_prompt_layer() -> None: + provider = next( + provider for provider in create_default_layer_providers() if provider.type_id == DIFY_USER_PROMPT_LAYER_TYPE_ID + ) + + layer = cast(DifyUserPromptLayer, provider.create_layer({"text": "hello"})) + + assert isinstance(layer, DifyUserPromptLayer) diff --git a/dify-agent/tests/local/dify_agent/test_client_safe_exports.py b/dify-agent/tests/local/dify_agent/test_client_safe_exports.py index 30f430521ad..72d60901cdc 100644 --- a/dify-agent/tests/local/dify_agent/test_client_safe_exports.py +++ b/dify-agent/tests/local/dify_agent/test_client_safe_exports.py @@ -77,6 +77,7 @@ def test_client_public_exports_work_with_default_dependencies_only(tmp_path: Pat plugin_module = importlib.import_module("dify_agent.layers.dify_plugin") ask_human_module = importlib.import_module("dify_agent.layers.ask_human") output_module = importlib.import_module("dify_agent.layers.output") + user_prompt_module = importlib.import_module("dify_agent.layers.user_prompt") assert agenton_layers.ExitIntent is not None assert agenton_layers.LayerConfig is not None @@ -97,6 +98,7 @@ def test_client_public_exports_work_with_default_dependencies_only(tmp_path: Pat assert plugin_module.DifyPluginLLMLayerConfig is not None assert ask_human_module.DifyAskHumanLayerConfig is not None assert output_module.DifyOutputLayerConfig is not None + assert user_prompt_module.DifyUserPromptLayerConfig is not None grpc_error = importlib.import_module("dify_agent.agent_stub.client._errors").AgentStubMissingGRPCDependencyError try: