Revert "fix(agent-v2): pass chat files to agent backend (#37926)"

This reverts commit d070074164.
This commit is contained in:
Yanli 盐粒 2026-06-25 16:09:13 +08:00
parent 8eb925201d
commit 3f653b2dfb
13 changed files with 5 additions and 356 deletions

View File

@ -31,7 +31,6 @@ from clients.agent_backend.event_adapter import (
from clients.agent_backend.factory import create_agent_backend_run_client
from clients.agent_backend.fake_client import FakeAgentBackendRunClient, FakeAgentBackendScenario
from clients.agent_backend.request_builder import (
AGENT_APP_USER_PROMPT_LAYER_ID,
AGENT_SOUL_PROMPT_LAYER_ID,
DIFY_EXECUTION_CONTEXT_LAYER_ID,
DIFY_KNOWLEDGE_BASE_LAYER_ID,
@ -47,7 +46,6 @@ from clients.agent_backend.request_builder import (
)
__all__ = [
"AGENT_APP_USER_PROMPT_LAYER_ID",
"AGENT_SOUL_PROMPT_LAYER_ID",
"DIFY_EXECUTION_CONTEXT_LAYER_ID",
"DIFY_KNOWLEDGE_BASE_LAYER_ID",

View File

@ -35,11 +35,6 @@ from dify_agent.layers.execution_context import (
from dify_agent.layers.knowledge import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
from dify_agent.layers.output import DIFY_OUTPUT_LAYER_TYPE_ID, DifyOutputLayerConfig
from dify_agent.layers.shell import DIFY_SHELL_LAYER_TYPE_ID, DifyShellLayerConfig
from dify_agent.layers.user_prompt import (
DIFY_USER_PROMPT_LAYER_TYPE_ID,
DifyUserPromptFileConfig,
DifyUserPromptLayerConfig,
)
from dify_agent.protocol import (
DIFY_AGENT_HISTORY_LAYER_ID,
DIFY_AGENT_MODEL_LAYER_ID,
@ -195,7 +190,6 @@ class AgentBackendAgentAppRunInput(BaseModel):
model: AgentBackendModelConfig
execution_context: DifyExecutionContextLayerConfig
user_prompt: str
user_files: list[DifyUserPromptFileConfig] = Field(default_factory=list)
agent_soul_prompt: str | None = None
purpose: RunPurpose = "agent_app"
idempotency_key: str | None = None
@ -256,9 +250,9 @@ class AgentBackendRunRequestBuilder:
[
RunLayerSpec(
name=AGENT_APP_USER_PROMPT_LAYER_ID,
type=DIFY_USER_PROMPT_LAYER_TYPE_ID,
type=PLAIN_PROMPT_LAYER_TYPE_ID,
metadata={**run_input.metadata, "origin": "agent_app_user_prompt"},
config=DifyUserPromptLayerConfig(text=run_input.user_prompt, files=run_input.user_files),
config=PromptLayerConfig(user=run_input.user_prompt),
),
RunLayerSpec(
name=DIFY_EXECUTION_CONTEXT_LAYER_ID,
@ -585,7 +579,7 @@ class AgentBackendRunRequestBuilder:
)
_SENSITIVE_KEY_PARTS = ("secret", "credential", "token", "password", "api_key", "base64_data")
_SENSITIVE_KEY_PARTS = ("secret", "credential", "token", "password", "api_key")
def redact_for_agent_backend_log(value: object) -> object:

View File

@ -23,7 +23,6 @@ from clients.agent_backend.factory import create_agent_backend_run_client
from configs import dify_config
from constants import UUID_NIL
from core.app.app_config.easy_ui_based_app.model_config.converter import ModelConfigConverter
from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
from core.app.apps.agent_app.app_config_manager import AgentAppConfigManager
from core.app.apps.agent_app.app_runner import AgentAppRunner
from core.app.apps.agent_app.generate_response_converter import AgentAppGenerateResponseConverter
@ -42,7 +41,6 @@ from core.app.entities.app_invoke_entities import (
from core.app.llm.model_access import build_dify_model_access
from core.ops.ops_trace_manager import TraceQueueManager
from extensions.ext_database import db
from factories import file_factory
from models import Account, App, EndUser, Message
from models.agent import (
Agent,
@ -111,18 +109,6 @@ class AgentAppGenerator(MessageBasedAppGenerator):
conversation=conversation,
)
model_conf = ModelConfigConverter.convert(app_config)
with self._bind_file_access_scope(tenant_id=app_model.tenant_id, user=user, invoke_from=invoke_from):
raw_files = args.get("files") or []
file_extra_config = FileUploadConfigManager.convert(app_config.app_model_config_dict, is_vision=True)
if raw_files:
file_objs = file_factory.build_from_mappings(
mappings=raw_files,
tenant_id=app_model.tenant_id,
config=file_extra_config,
access_controller=self._file_access_controller,
)
else:
file_objs = []
trace_manager = TraceQueueManager(app_model.id, user.id if isinstance(user, Account) else user.session_id)
@ -130,13 +116,12 @@ class AgentAppGenerator(MessageBasedAppGenerator):
task_id=str(uuid.uuid4()),
app_config=app_config,
model_conf=model_conf,
file_upload_config=file_extra_config,
conversation_id=conversation.id if conversation else None,
inputs=self._prepare_user_inputs(
user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id
),
query=query,
files=list(file_objs),
files=[],
parent_message_id=(
args.get("parent_message_id")
if invoke_from not in {InvokeFrom.SERVICE_API, InvokeFrom.OPENAPI}
@ -379,8 +364,6 @@ class AgentAppGenerator(MessageBasedAppGenerator):
message_id=message.id,
model_name=application_generate_entity.model_conf.model,
queue_manager=queue_manager,
files=list(application_generate_entity.files),
file_upload_config=application_generate_entity.file_upload_config,
session_scope_snapshot_id=application_generate_entity.agent_runtime_session_snapshot_id,
)
except GenerateTaskStoppedError:

View File

@ -45,7 +45,6 @@ from core.app.entities.queue_entities import QueueLLMChunkEvent, QueueMessageEnd
from core.repositories.human_input_repository import HumanInputFormRepository, HumanInputFormRepositoryImpl
from core.workflow.nodes.agent_v2.ask_human_hitl import AskHumanFormBuildError, create_ask_human_form
from core.workflow.nodes.agent_v2.ask_human_resume import build_deferred_tool_results, resolve_ask_human_form
from graphon.file import File, FileUploadConfig
from graphon.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from graphon.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage, UserPromptMessage
from models.agent_config_entities import AgentSoulConfig
@ -163,8 +162,6 @@ class AgentAppRunner:
message_id: str,
model_name: str,
queue_manager: AppQueueManager,
files: list[File] | None = None,
file_upload_config: FileUploadConfig | None = None,
session_scope_snapshot_id: str | None | _DefaultSessionScopeSnapshotId = _DEFAULT_SESSION_SCOPE_SNAPSHOT_ID,
) -> None:
if isinstance(session_scope_snapshot_id, _DefaultSessionScopeSnapshotId):
@ -195,8 +192,6 @@ class AgentAppRunner:
conversation_id=conversation_id,
user_query=query,
idempotency_key=message_id,
files=tuple(files or ()),
file_upload_config=file_upload_config,
session_snapshot=session_snapshot,
deferred_tool_results=deferred_tool_results,
)

View File

@ -10,10 +10,9 @@ used by workflow runs.
from __future__ import annotations
import base64
from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any, Literal, Protocol, cast
from typing import Any, Protocol, cast
from agenton.compositor import CompositorSessionSnapshot
from dify_agent.layers.execution_context import (
@ -21,7 +20,6 @@ from dify_agent.layers.execution_context import (
DifyExecutionContextLayerConfig,
DifyExecutionContextUserFrom,
)
from dify_agent.layers.user_prompt import DifyUserPromptFileConfig
from dify_agent.protocol import CreateRunRequest, DeferredToolResultsPayload
from clients.agent_backend import (
@ -44,7 +42,6 @@ from core.workflow.nodes.agent_v2.runtime_request_builder import (
build_knowledge_layer_config,
build_shell_layer_config,
)
from graphon.file import File, FileType, FileUploadConfig, file_manager
from models.agent_config_entities import AgentSoulConfig
from models.provider_ids import ModelProviderID
from services.agent.prompt_mentions import build_soul_mention_resolver, expand_prompt_mentions
@ -71,8 +68,6 @@ class AgentAppRuntimeBuildContext:
conversation_id: str
user_query: str
idempotency_key: str
files: tuple[File, ...] = ()
file_upload_config: FileUploadConfig | None = None
session_snapshot: CompositorSessionSnapshot | None = None
# ENG-638: set when resuming a chat turn after a submitted ask_human form.
deferred_tool_results: DeferredToolResultsPayload | None = None
@ -173,7 +168,6 @@ class AgentAppRuntimeRequestBuilder:
agent_soul_prompt=expand_prompt_mentions(agent_soul.prompt.system_prompt, soul_prompt_resolver).strip()
or None,
user_prompt=context.user_query,
user_files=self._build_user_files(context.files, context.file_upload_config),
tools=tools_layer,
knowledge=knowledge_config,
drive_config=drive_config,
@ -223,51 +217,6 @@ class AgentAppRuntimeRequestBuilder:
normalized[key] = str(value)
return normalized
@staticmethod
def _build_user_files(
files: tuple[File, ...],
file_upload_config: FileUploadConfig | None,
) -> list[DifyUserPromptFileConfig]:
detail = _image_detail(file_upload_config)
return [_build_user_file(file, detail=detail) for file in files]
def _build_user_file(file: File, *, detail: Literal["low", "high"] | None) -> DifyUserPromptFileConfig:
file_type = file.type.value if isinstance(file.type, FileType) else str(file.type)
if file_type not in {"image", "document", "audio", "video"}:
raise AgentAppRuntimeRequestBuildError(
"agent_user_file_unsupported",
f"Agent App does not support file type '{file_type}' in user prompt.",
)
mime_type = file.mime_type or "application/octet-stream"
return DifyUserPromptFileConfig(
filename=file.filename or "file",
mime_type=mime_type,
format=_file_format(file),
type=cast(Any, file_type),
base64_data=base64.b64encode(file_manager.download(file)).decode(),
detail=detail if file_type == "image" else None,
)
def _file_format(file: File) -> str:
extension = (file.extension or "").lstrip(".").lower()
if extension:
return extension
mime_type = file.mime_type or ""
if "/" in mime_type:
return mime_type.rsplit("/", 1)[-1].lower()
return "bin"
def _image_detail(file_upload_config: FileUploadConfig | None) -> Literal["low", "high"] | None:
image_config = file_upload_config.image_config if file_upload_config is not None else None
detail = image_config.detail if image_config is not None else None
if detail is None:
return None
detail_value = getattr(detail, "value", detail)
return cast(Literal["low", "high"], detail_value) if detail_value in {"low", "high"} else None
__all__ = [
"AgentAppRuntimeBuildContext",

View File

@ -19,7 +19,6 @@ from dify_agent.layers.execution_context import DIFY_EXECUTION_CONTEXT_LAYER_TYP
from dify_agent.layers.knowledge import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
from dify_agent.layers.output import DIFY_OUTPUT_LAYER_TYPE_ID
from dify_agent.layers.shell import DIFY_SHELL_LAYER_TYPE_ID, DifyShellEnvVarConfig, DifyShellLayerConfig
from dify_agent.layers.user_prompt import DIFY_USER_PROMPT_LAYER_TYPE_ID, DifyUserPromptFileConfig
from dify_agent.protocol import (
DIFY_AGENT_HISTORY_LAYER_ID,
DIFY_AGENT_MODEL_LAYER_ID,
@ -29,7 +28,6 @@ from dify_agent.protocol import (
from pydantic import ValidationError
from clients.agent_backend import (
AGENT_APP_USER_PROMPT_LAYER_ID,
AGENT_SOUL_PROMPT_LAYER_ID,
DIFY_EXECUTION_CONTEXT_LAYER_ID,
DIFY_KNOWLEDGE_BASE_LAYER_ID,
@ -362,47 +360,6 @@ def test_agent_app_request_builder_omits_shell_layer_by_default():
assert DIFY_SHELL_LAYER_ID not in {layer.name for layer in request.composition.layers}
def test_agent_app_request_builder_uses_multimodal_user_prompt_layer():
run_input = _agent_app_input()
run_input.user_files = [
DifyUserPromptFileConfig(
filename="red.png",
mime_type="image/png",
format="png",
type="image",
base64_data="cmVk",
detail="high",
)
]
request = AgentBackendRunRequestBuilder().build_for_agent_app(run_input)
layers = {layer.name: layer for layer in request.composition.layers}
user_prompt_layer = layers[AGENT_APP_USER_PROMPT_LAYER_ID]
assert user_prompt_layer.type == DIFY_USER_PROMPT_LAYER_TYPE_ID
assert user_prompt_layer.config.text == "List files."
assert user_prompt_layer.config.files[0].filename == "red.png"
def test_redact_for_agent_backend_log_hides_user_file_base64_data():
run_input = _agent_app_input()
run_input.user_files = [
DifyUserPromptFileConfig(
filename="red.png",
mime_type="image/png",
format="png",
type="image",
base64_data="cmVk",
)
]
request = AgentBackendRunRequestBuilder().build_for_agent_app(run_input)
redacted = cast(dict[str, Any], redact_for_agent_backend_log(request))
layers = {layer["name"]: layer for layer in redacted["composition"]["layers"]}
assert layers[AGENT_APP_USER_PROMPT_LAYER_ID]["config"]["files"][0]["base64_data"] == "[REDACTED]"
def test_agent_app_request_builder_adds_shell_layer_when_include_shell():
run_input = _agent_app_input(include_shell=True)
run_input.shell_config = DifyShellLayerConfig(env=[DifyShellEnvVarConfig(name="APP_ENV", value="enabled")])

View File

@ -3,14 +3,12 @@
from __future__ import annotations
from dataclasses import replace
from types import SimpleNamespace
from typing import Any
import pytest
from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig
import core.app.apps.agent_app.runtime_request_builder as runtime_request_builder_module
from clients.agent_backend import (
AgentBackendAgentAppRunInput,
AgentBackendModelConfig,
@ -23,7 +21,6 @@ from core.app.apps.agent_app.runtime_request_builder import (
AgentAppRuntimeRequestBuildError,
)
from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom
from graphon.file import File, FileTransferMethod, FileType, FileUploadConfig, ImageConfig
from models.agent_config_entities import AgentSoulConfig
@ -107,19 +104,6 @@ def _ctx(soul: AgentSoulConfig, *, query: str = "hello") -> AgentAppRuntimeBuild
)
def _image_file() -> File:
return File(
transfer_method=FileTransferMethod.LOCAL_FILE,
id="file-1",
type=FileType.IMAGE,
filename="red.png",
extension=".png",
mime_type="image/png",
size=3,
reference="upload-file-id",
)
def _soul_with_model() -> AgentSoulConfig:
return AgentSoulConfig.model_validate(
{
@ -160,40 +144,6 @@ class TestAgentAppRuntimeRequestBuilder:
assert result.redacted_request["composition"]["layers"][-1]["config"]["credentials"] == "[REDACTED]"
assert result.metadata["conversation_id"] == "conv-1"
def test_build_maps_uploaded_files_to_user_prompt_layer(self, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setattr(runtime_request_builder_module.file_manager, "download", lambda file: b"red")
builder = AgentAppRuntimeRequestBuilder(
credentials_provider=_FakeCredentialsProvider(),
plugin_tools_builder=_NoToolsBuilder(), # type: ignore[arg-type]
)
ctx = replace(
_ctx(_soul_with_model()),
files=(_image_file(),),
file_upload_config=FileUploadConfig(image_config=ImageConfig(detail="high")),
)
result = builder.build(ctx)
user_prompt = next(
layer for layer in result.request.composition.layers if layer.name == "agent_app_user_prompt"
)
dumped = user_prompt.config.model_dump(mode="json")
assert dumped["text"] == "hello"
assert dumped["files"][0] == {
"filename": "red.png",
"mime_type": "image/png",
"format": "png",
"type": "image",
"base64_data": "cmVk",
"detail": "high",
}
redacted_user_prompt = next(
layer
for layer in result.redacted_request["composition"]["layers"]
if layer["name"] == "agent_app_user_prompt"
)
assert redacted_user_prompt["config"]["files"][0]["base64_data"] == "[REDACTED]"
def test_build_normalizes_marketplace_model_plugin_id(self):
soul = _soul_with_model()
soul.model.plugin_id = (

View File

@ -1,13 +0,0 @@
"""Client-safe exports for Agent App user prompt layer DTOs."""
from dify_agent.layers.user_prompt.configs import (
DIFY_USER_PROMPT_LAYER_TYPE_ID,
DifyUserPromptFileConfig,
DifyUserPromptLayerConfig,
)
__all__ = [
"DIFY_USER_PROMPT_LAYER_TYPE_ID",
"DifyUserPromptFileConfig",
"DifyUserPromptLayerConfig",
]

View File

@ -1,37 +0,0 @@
"""Serializable user prompt layer DTOs for Agent App chat turns."""
from typing import ClassVar, Final, Literal
from pydantic import BaseModel, ConfigDict, Field
from agenton.layers import LayerConfig
DIFY_USER_PROMPT_LAYER_TYPE_ID: Final[str] = "dify.user_prompt"
class DifyUserPromptFileConfig(BaseModel):
"""One user-uploaded file carried inline to the agent backend."""
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
filename: str
mime_type: str
format: str
type: Literal["image", "document", "audio", "video", "custom"]
base64_data: str
detail: Literal["low", "high"] | None = None
class DifyUserPromptLayerConfig(LayerConfig):
"""User prompt text plus optional multimodal files."""
text: str
files: list[DifyUserPromptFileConfig] = Field(default_factory=list)
__all__ = [
"DIFY_USER_PROMPT_LAYER_TYPE_ID",
"DifyUserPromptFileConfig",
"DifyUserPromptLayerConfig",
]

View File

@ -1,67 +0,0 @@
"""Runtime layer that turns serialized Agent App user files into pydantic-ai content."""
from __future__ import annotations
import base64
from dataclasses import dataclass
from typing import ClassVar
from pydantic_ai.messages import BinaryContent, UserContent
from typing_extensions import Self, override
from agenton.layers import EmptyRuntimeState, NoLayerDeps, PydanticAILayer
from dify_agent.layers.user_prompt.configs import (
DIFY_USER_PROMPT_LAYER_TYPE_ID,
DifyUserPromptFileConfig,
DifyUserPromptLayerConfig,
)
@dataclass(slots=True)
class DifyUserPromptLayer(PydanticAILayer[NoLayerDeps, object, DifyUserPromptLayerConfig, EmptyRuntimeState]):
"""State-free pydantic-ai layer for text and uploaded user files."""
type_id: ClassVar[str | None] = DIFY_USER_PROMPT_LAYER_TYPE_ID
config: DifyUserPromptLayerConfig
@classmethod
@override
def from_config(cls, config: DifyUserPromptLayerConfig) -> Self:
"""Create the layer from validated public config."""
return cls(config=DifyUserPromptLayerConfig.model_validate(config))
@property
@override
def user_prompts(self) -> list[UserContent]:
if not self.config.files:
return [self.config.text]
parts: list[UserContent] = []
for file in self.config.files:
parts.append(_file_to_binary_content(file))
if self.config.text:
parts.append(self.config.text)
return parts
def _file_to_binary_content(file: DifyUserPromptFileConfig) -> BinaryContent:
metadata: dict[str, str] = {"filename": file.filename}
if file.detail:
metadata["detail"] = file.detail
return BinaryContent(
data=base64.b64decode(file.base64_data),
media_type=file.mime_type,
identifier=_identifier_from_filename(file.filename, file.format),
vendor_metadata=metadata,
)
def _identifier_from_filename(filename: str, file_format: str) -> str:
suffix = f".{file_format}" if file_format else ""
if suffix and filename.lower().endswith(suffix.lower()):
return filename[: -len(suffix)] or "file"
return filename or "file"
__all__ = ["DifyUserPromptLayer"]

View File

@ -47,7 +47,6 @@ from dify_agent.layers.knowledge.layer import DifyKnowledgeBaseLayer
from dify_agent.layers.output.output_layer import DifyOutputLayer
from dify_agent.layers.shell.configs import DifyShellLayerConfig
from dify_agent.layers.shell.layer import DifyShellLayer, create_shellctl_client_factory
from dify_agent.layers.user_prompt.layer import DifyUserPromptLayer
type DifyAgentLayerProvider = LayerProvider[Any]
@ -88,7 +87,6 @@ def create_default_layer_providers(
agent_stub_token_factory = build_agent_stub_token
return (
LayerProvider.from_layer_type(PromptLayer),
LayerProvider.from_layer_type(DifyUserPromptLayer),
LayerProvider.from_layer_type(PydanticAIHistoryLayer),
LayerProvider.from_layer_type(DifyOutputLayer),
LayerProvider.from_layer_type(DifyAskHumanLayer),

View File

@ -1,56 +0,0 @@
from __future__ import annotations
from typing import cast
from pydantic_ai.messages import BinaryContent
from dify_agent.layers.user_prompt.configs import DIFY_USER_PROMPT_LAYER_TYPE_ID, DifyUserPromptLayerConfig
from dify_agent.layers.user_prompt.layer import DifyUserPromptLayer
from dify_agent.runtime.compositor_factory import create_default_layer_providers
def test_user_prompt_layer_restores_text_only_prompt() -> None:
layer = DifyUserPromptLayer.from_config(DifyUserPromptLayerConfig(text="hello"))
assert layer.type_id == DIFY_USER_PROMPT_LAYER_TYPE_ID
assert layer.user_prompts == ["hello"]
def test_user_prompt_layer_restores_binary_file_prompt() -> None:
layer = DifyUserPromptLayer.from_config(
DifyUserPromptLayerConfig.model_validate(
{
"text": "what is in this image?",
"files": [
{
"filename": "red.png",
"mime_type": "image/png",
"format": "png",
"type": "image",
"base64_data": "cmVk",
"detail": "high",
}
],
}
)
)
prompts = layer.user_prompts
assert len(prompts) == 2
file_part = prompts[0]
assert isinstance(file_part, BinaryContent)
assert file_part.data == b"red"
assert file_part.media_type == "image/png"
assert file_part.vendor_metadata == {"filename": "red.png", "detail": "high"}
assert prompts[1] == "what is in this image?"
def test_default_layer_providers_register_user_prompt_layer() -> None:
provider = next(
provider for provider in create_default_layer_providers() if provider.type_id == DIFY_USER_PROMPT_LAYER_TYPE_ID
)
layer = cast(DifyUserPromptLayer, provider.create_layer({"text": "hello"}))
assert isinstance(layer, DifyUserPromptLayer)

View File

@ -77,7 +77,6 @@ def test_client_public_exports_work_with_default_dependencies_only(tmp_path: Pat
plugin_module = importlib.import_module("dify_agent.layers.dify_plugin")
ask_human_module = importlib.import_module("dify_agent.layers.ask_human")
output_module = importlib.import_module("dify_agent.layers.output")
user_prompt_module = importlib.import_module("dify_agent.layers.user_prompt")
assert agenton_layers.ExitIntent is not None
assert agenton_layers.LayerConfig is not None
@ -98,7 +97,6 @@ def test_client_public_exports_work_with_default_dependencies_only(tmp_path: Pat
assert plugin_module.DifyPluginLLMLayerConfig is not None
assert ask_human_module.DifyAskHumanLayerConfig is not None
assert output_module.DifyOutputLayerConfig is not None
assert user_prompt_module.DifyUserPromptLayerConfig is not None
grpc_error = importlib.import_module("dify_agent.agent_stub.client._errors").AgentStubMissingGRPCDependencyError
try: