mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 14:51:13 +08:00
fix(agent-v2): pass chat files to agent backend (#37926)
This commit is contained in:
parent
aa5c10af55
commit
d070074164
@ -31,6 +31,7 @@ from clients.agent_backend.event_adapter import (
|
||||
from clients.agent_backend.factory import create_agent_backend_run_client
|
||||
from clients.agent_backend.fake_client import FakeAgentBackendRunClient, FakeAgentBackendScenario
|
||||
from clients.agent_backend.request_builder import (
|
||||
AGENT_APP_USER_PROMPT_LAYER_ID,
|
||||
AGENT_SOUL_PROMPT_LAYER_ID,
|
||||
DIFY_EXECUTION_CONTEXT_LAYER_ID,
|
||||
DIFY_KNOWLEDGE_BASE_LAYER_ID,
|
||||
@ -46,6 +47,7 @@ from clients.agent_backend.request_builder import (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AGENT_APP_USER_PROMPT_LAYER_ID",
|
||||
"AGENT_SOUL_PROMPT_LAYER_ID",
|
||||
"DIFY_EXECUTION_CONTEXT_LAYER_ID",
|
||||
"DIFY_KNOWLEDGE_BASE_LAYER_ID",
|
||||
|
||||
@ -35,6 +35,11 @@ from dify_agent.layers.execution_context import (
|
||||
from dify_agent.layers.knowledge import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
|
||||
from dify_agent.layers.output import DIFY_OUTPUT_LAYER_TYPE_ID, DifyOutputLayerConfig
|
||||
from dify_agent.layers.shell import DIFY_SHELL_LAYER_TYPE_ID, DifyShellLayerConfig
|
||||
from dify_agent.layers.user_prompt import (
|
||||
DIFY_USER_PROMPT_LAYER_TYPE_ID,
|
||||
DifyUserPromptFileConfig,
|
||||
DifyUserPromptLayerConfig,
|
||||
)
|
||||
from dify_agent.protocol import (
|
||||
DIFY_AGENT_HISTORY_LAYER_ID,
|
||||
DIFY_AGENT_MODEL_LAYER_ID,
|
||||
@ -190,6 +195,7 @@ class AgentBackendAgentAppRunInput(BaseModel):
|
||||
model: AgentBackendModelConfig
|
||||
execution_context: DifyExecutionContextLayerConfig
|
||||
user_prompt: str
|
||||
user_files: list[DifyUserPromptFileConfig] = Field(default_factory=list)
|
||||
agent_soul_prompt: str | None = None
|
||||
purpose: RunPurpose = "agent_app"
|
||||
idempotency_key: str | None = None
|
||||
@ -250,9 +256,9 @@ class AgentBackendRunRequestBuilder:
|
||||
[
|
||||
RunLayerSpec(
|
||||
name=AGENT_APP_USER_PROMPT_LAYER_ID,
|
||||
type=PLAIN_PROMPT_LAYER_TYPE_ID,
|
||||
type=DIFY_USER_PROMPT_LAYER_TYPE_ID,
|
||||
metadata={**run_input.metadata, "origin": "agent_app_user_prompt"},
|
||||
config=PromptLayerConfig(user=run_input.user_prompt),
|
||||
config=DifyUserPromptLayerConfig(text=run_input.user_prompt, files=run_input.user_files),
|
||||
),
|
||||
RunLayerSpec(
|
||||
name=DIFY_EXECUTION_CONTEXT_LAYER_ID,
|
||||
@ -579,7 +585,7 @@ class AgentBackendRunRequestBuilder:
|
||||
)
|
||||
|
||||
|
||||
_SENSITIVE_KEY_PARTS = ("secret", "credential", "token", "password", "api_key")
|
||||
_SENSITIVE_KEY_PARTS = ("secret", "credential", "token", "password", "api_key", "base64_data")
|
||||
|
||||
|
||||
def redact_for_agent_backend_log(value: object) -> object:
|
||||
|
||||
@ -23,6 +23,7 @@ from clients.agent_backend.factory import create_agent_backend_run_client
|
||||
from configs import dify_config
|
||||
from constants import UUID_NIL
|
||||
from core.app.app_config.easy_ui_based_app.model_config.converter import ModelConfigConverter
|
||||
from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
|
||||
from core.app.apps.agent_app.app_config_manager import AgentAppConfigManager
|
||||
from core.app.apps.agent_app.app_runner import AgentAppRunner
|
||||
from core.app.apps.agent_app.generate_response_converter import AgentAppGenerateResponseConverter
|
||||
@ -41,6 +42,7 @@ from core.app.entities.app_invoke_entities import (
|
||||
from core.app.llm.model_access import build_dify_model_access
|
||||
from core.ops.ops_trace_manager import TraceQueueManager
|
||||
from extensions.ext_database import db
|
||||
from factories import file_factory
|
||||
from models import Account, App, EndUser, Message
|
||||
from models.agent import (
|
||||
Agent,
|
||||
@ -109,6 +111,18 @@ class AgentAppGenerator(MessageBasedAppGenerator):
|
||||
conversation=conversation,
|
||||
)
|
||||
model_conf = ModelConfigConverter.convert(app_config)
|
||||
with self._bind_file_access_scope(tenant_id=app_model.tenant_id, user=user, invoke_from=invoke_from):
|
||||
raw_files = args.get("files") or []
|
||||
file_extra_config = FileUploadConfigManager.convert(app_config.app_model_config_dict, is_vision=True)
|
||||
if raw_files:
|
||||
file_objs = file_factory.build_from_mappings(
|
||||
mappings=raw_files,
|
||||
tenant_id=app_model.tenant_id,
|
||||
config=file_extra_config,
|
||||
access_controller=self._file_access_controller,
|
||||
)
|
||||
else:
|
||||
file_objs = []
|
||||
|
||||
trace_manager = TraceQueueManager(app_model.id, user.id if isinstance(user, Account) else user.session_id)
|
||||
|
||||
@ -116,12 +130,13 @@ class AgentAppGenerator(MessageBasedAppGenerator):
|
||||
task_id=str(uuid.uuid4()),
|
||||
app_config=app_config,
|
||||
model_conf=model_conf,
|
||||
file_upload_config=file_extra_config,
|
||||
conversation_id=conversation.id if conversation else None,
|
||||
inputs=self._prepare_user_inputs(
|
||||
user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id
|
||||
),
|
||||
query=query,
|
||||
files=[],
|
||||
files=list(file_objs),
|
||||
parent_message_id=(
|
||||
args.get("parent_message_id")
|
||||
if invoke_from not in {InvokeFrom.SERVICE_API, InvokeFrom.OPENAPI}
|
||||
@ -364,6 +379,8 @@ class AgentAppGenerator(MessageBasedAppGenerator):
|
||||
message_id=message.id,
|
||||
model_name=application_generate_entity.model_conf.model,
|
||||
queue_manager=queue_manager,
|
||||
files=list(application_generate_entity.files),
|
||||
file_upload_config=application_generate_entity.file_upload_config,
|
||||
session_scope_snapshot_id=application_generate_entity.agent_runtime_session_snapshot_id,
|
||||
)
|
||||
except GenerateTaskStoppedError:
|
||||
|
||||
@ -45,6 +45,7 @@ from core.app.entities.queue_entities import QueueLLMChunkEvent, QueueMessageEnd
|
||||
from core.repositories.human_input_repository import HumanInputFormRepository, HumanInputFormRepositoryImpl
|
||||
from core.workflow.nodes.agent_v2.ask_human_hitl import AskHumanFormBuildError, create_ask_human_form
|
||||
from core.workflow.nodes.agent_v2.ask_human_resume import build_deferred_tool_results, resolve_ask_human_form
|
||||
from graphon.file import File, FileUploadConfig
|
||||
from graphon.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from graphon.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage, UserPromptMessage
|
||||
from models.agent_config_entities import AgentSoulConfig
|
||||
@ -162,6 +163,8 @@ class AgentAppRunner:
|
||||
message_id: str,
|
||||
model_name: str,
|
||||
queue_manager: AppQueueManager,
|
||||
files: list[File] | None = None,
|
||||
file_upload_config: FileUploadConfig | None = None,
|
||||
session_scope_snapshot_id: str | None | _DefaultSessionScopeSnapshotId = _DEFAULT_SESSION_SCOPE_SNAPSHOT_ID,
|
||||
) -> None:
|
||||
if isinstance(session_scope_snapshot_id, _DefaultSessionScopeSnapshotId):
|
||||
@ -192,6 +195,8 @@ class AgentAppRunner:
|
||||
conversation_id=conversation_id,
|
||||
user_query=query,
|
||||
idempotency_key=message_id,
|
||||
files=tuple(files or ()),
|
||||
file_upload_config=file_upload_config,
|
||||
session_snapshot=session_snapshot,
|
||||
deferred_tool_results=deferred_tool_results,
|
||||
)
|
||||
|
||||
@ -10,9 +10,10 @@ used by workflow runs.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Protocol, cast
|
||||
from typing import Any, Literal, Protocol, cast
|
||||
|
||||
from agenton.compositor import CompositorSessionSnapshot
|
||||
from dify_agent.layers.execution_context import (
|
||||
@ -20,6 +21,7 @@ from dify_agent.layers.execution_context import (
|
||||
DifyExecutionContextLayerConfig,
|
||||
DifyExecutionContextUserFrom,
|
||||
)
|
||||
from dify_agent.layers.user_prompt import DifyUserPromptFileConfig
|
||||
from dify_agent.protocol import CreateRunRequest, DeferredToolResultsPayload
|
||||
|
||||
from clients.agent_backend import (
|
||||
@ -42,6 +44,7 @@ from core.workflow.nodes.agent_v2.runtime_request_builder import (
|
||||
build_knowledge_layer_config,
|
||||
build_shell_layer_config,
|
||||
)
|
||||
from graphon.file import File, FileType, FileUploadConfig, file_manager
|
||||
from models.agent_config_entities import AgentSoulConfig
|
||||
from models.provider_ids import ModelProviderID
|
||||
from services.agent.prompt_mentions import build_soul_mention_resolver, expand_prompt_mentions
|
||||
@ -68,6 +71,8 @@ class AgentAppRuntimeBuildContext:
|
||||
conversation_id: str
|
||||
user_query: str
|
||||
idempotency_key: str
|
||||
files: tuple[File, ...] = ()
|
||||
file_upload_config: FileUploadConfig | None = None
|
||||
session_snapshot: CompositorSessionSnapshot | None = None
|
||||
# ENG-638: set when resuming a chat turn after a submitted ask_human form.
|
||||
deferred_tool_results: DeferredToolResultsPayload | None = None
|
||||
@ -168,6 +173,7 @@ class AgentAppRuntimeRequestBuilder:
|
||||
agent_soul_prompt=expand_prompt_mentions(agent_soul.prompt.system_prompt, soul_prompt_resolver).strip()
|
||||
or None,
|
||||
user_prompt=context.user_query,
|
||||
user_files=self._build_user_files(context.files, context.file_upload_config),
|
||||
tools=tools_layer,
|
||||
knowledge=knowledge_config,
|
||||
drive_config=drive_config,
|
||||
@ -217,6 +223,51 @@ class AgentAppRuntimeRequestBuilder:
|
||||
normalized[key] = str(value)
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _build_user_files(
|
||||
files: tuple[File, ...],
|
||||
file_upload_config: FileUploadConfig | None,
|
||||
) -> list[DifyUserPromptFileConfig]:
|
||||
detail = _image_detail(file_upload_config)
|
||||
return [_build_user_file(file, detail=detail) for file in files]
|
||||
|
||||
|
||||
def _build_user_file(file: File, *, detail: Literal["low", "high"] | None) -> DifyUserPromptFileConfig:
|
||||
file_type = file.type.value if isinstance(file.type, FileType) else str(file.type)
|
||||
if file_type not in {"image", "document", "audio", "video"}:
|
||||
raise AgentAppRuntimeRequestBuildError(
|
||||
"agent_user_file_unsupported",
|
||||
f"Agent App does not support file type '{file_type}' in user prompt.",
|
||||
)
|
||||
mime_type = file.mime_type or "application/octet-stream"
|
||||
return DifyUserPromptFileConfig(
|
||||
filename=file.filename or "file",
|
||||
mime_type=mime_type,
|
||||
format=_file_format(file),
|
||||
type=cast(Any, file_type),
|
||||
base64_data=base64.b64encode(file_manager.download(file)).decode(),
|
||||
detail=detail if file_type == "image" else None,
|
||||
)
|
||||
|
||||
|
||||
def _file_format(file: File) -> str:
|
||||
extension = (file.extension or "").lstrip(".").lower()
|
||||
if extension:
|
||||
return extension
|
||||
mime_type = file.mime_type or ""
|
||||
if "/" in mime_type:
|
||||
return mime_type.rsplit("/", 1)[-1].lower()
|
||||
return "bin"
|
||||
|
||||
|
||||
def _image_detail(file_upload_config: FileUploadConfig | None) -> Literal["low", "high"] | None:
|
||||
image_config = file_upload_config.image_config if file_upload_config is not None else None
|
||||
detail = image_config.detail if image_config is not None else None
|
||||
if detail is None:
|
||||
return None
|
||||
detail_value = getattr(detail, "value", detail)
|
||||
return cast(Literal["low", "high"], detail_value) if detail_value in {"low", "high"} else None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AgentAppRuntimeBuildContext",
|
||||
|
||||
@ -19,6 +19,7 @@ from dify_agent.layers.execution_context import DIFY_EXECUTION_CONTEXT_LAYER_TYP
|
||||
from dify_agent.layers.knowledge import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
|
||||
from dify_agent.layers.output import DIFY_OUTPUT_LAYER_TYPE_ID
|
||||
from dify_agent.layers.shell import DIFY_SHELL_LAYER_TYPE_ID, DifyShellEnvVarConfig, DifyShellLayerConfig
|
||||
from dify_agent.layers.user_prompt import DIFY_USER_PROMPT_LAYER_TYPE_ID, DifyUserPromptFileConfig
|
||||
from dify_agent.protocol import (
|
||||
DIFY_AGENT_HISTORY_LAYER_ID,
|
||||
DIFY_AGENT_MODEL_LAYER_ID,
|
||||
@ -28,6 +29,7 @@ from dify_agent.protocol import (
|
||||
from pydantic import ValidationError
|
||||
|
||||
from clients.agent_backend import (
|
||||
AGENT_APP_USER_PROMPT_LAYER_ID,
|
||||
AGENT_SOUL_PROMPT_LAYER_ID,
|
||||
DIFY_EXECUTION_CONTEXT_LAYER_ID,
|
||||
DIFY_KNOWLEDGE_BASE_LAYER_ID,
|
||||
@ -360,6 +362,47 @@ def test_agent_app_request_builder_omits_shell_layer_by_default():
|
||||
assert DIFY_SHELL_LAYER_ID not in {layer.name for layer in request.composition.layers}
|
||||
|
||||
|
||||
def test_agent_app_request_builder_uses_multimodal_user_prompt_layer():
|
||||
run_input = _agent_app_input()
|
||||
run_input.user_files = [
|
||||
DifyUserPromptFileConfig(
|
||||
filename="red.png",
|
||||
mime_type="image/png",
|
||||
format="png",
|
||||
type="image",
|
||||
base64_data="cmVk",
|
||||
detail="high",
|
||||
)
|
||||
]
|
||||
|
||||
request = AgentBackendRunRequestBuilder().build_for_agent_app(run_input)
|
||||
layers = {layer.name: layer for layer in request.composition.layers}
|
||||
user_prompt_layer = layers[AGENT_APP_USER_PROMPT_LAYER_ID]
|
||||
|
||||
assert user_prompt_layer.type == DIFY_USER_PROMPT_LAYER_TYPE_ID
|
||||
assert user_prompt_layer.config.text == "List files."
|
||||
assert user_prompt_layer.config.files[0].filename == "red.png"
|
||||
|
||||
|
||||
def test_redact_for_agent_backend_log_hides_user_file_base64_data():
|
||||
run_input = _agent_app_input()
|
||||
run_input.user_files = [
|
||||
DifyUserPromptFileConfig(
|
||||
filename="red.png",
|
||||
mime_type="image/png",
|
||||
format="png",
|
||||
type="image",
|
||||
base64_data="cmVk",
|
||||
)
|
||||
]
|
||||
request = AgentBackendRunRequestBuilder().build_for_agent_app(run_input)
|
||||
|
||||
redacted = cast(dict[str, Any], redact_for_agent_backend_log(request))
|
||||
layers = {layer["name"]: layer for layer in redacted["composition"]["layers"]}
|
||||
|
||||
assert layers[AGENT_APP_USER_PROMPT_LAYER_ID]["config"]["files"][0]["base64_data"] == "[REDACTED]"
|
||||
|
||||
|
||||
def test_agent_app_request_builder_adds_shell_layer_when_include_shell():
|
||||
run_input = _agent_app_input(include_shell=True)
|
||||
run_input.shell_config = DifyShellLayerConfig(env=[DifyShellEnvVarConfig(name="APP_ENV", value="enabled")])
|
||||
|
||||
@ -3,12 +3,14 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import replace
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig
|
||||
|
||||
import core.app.apps.agent_app.runtime_request_builder as runtime_request_builder_module
|
||||
from clients.agent_backend import (
|
||||
AgentBackendAgentAppRunInput,
|
||||
AgentBackendModelConfig,
|
||||
@ -21,6 +23,7 @@ from core.app.apps.agent_app.runtime_request_builder import (
|
||||
AgentAppRuntimeRequestBuildError,
|
||||
)
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom
|
||||
from graphon.file import File, FileTransferMethod, FileType, FileUploadConfig, ImageConfig
|
||||
from models.agent_config_entities import AgentSoulConfig
|
||||
|
||||
|
||||
@ -104,6 +107,19 @@ def _ctx(soul: AgentSoulConfig, *, query: str = "hello") -> AgentAppRuntimeBuild
|
||||
)
|
||||
|
||||
|
||||
def _image_file() -> File:
|
||||
return File(
|
||||
transfer_method=FileTransferMethod.LOCAL_FILE,
|
||||
id="file-1",
|
||||
type=FileType.IMAGE,
|
||||
filename="red.png",
|
||||
extension=".png",
|
||||
mime_type="image/png",
|
||||
size=3,
|
||||
reference="upload-file-id",
|
||||
)
|
||||
|
||||
|
||||
def _soul_with_model() -> AgentSoulConfig:
|
||||
return AgentSoulConfig.model_validate(
|
||||
{
|
||||
@ -144,6 +160,40 @@ class TestAgentAppRuntimeRequestBuilder:
|
||||
assert result.redacted_request["composition"]["layers"][-1]["config"]["credentials"] == "[REDACTED]"
|
||||
assert result.metadata["conversation_id"] == "conv-1"
|
||||
|
||||
def test_build_maps_uploaded_files_to_user_prompt_layer(self, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setattr(runtime_request_builder_module.file_manager, "download", lambda file: b"red")
|
||||
builder = AgentAppRuntimeRequestBuilder(
|
||||
credentials_provider=_FakeCredentialsProvider(),
|
||||
plugin_tools_builder=_NoToolsBuilder(), # type: ignore[arg-type]
|
||||
)
|
||||
ctx = replace(
|
||||
_ctx(_soul_with_model()),
|
||||
files=(_image_file(),),
|
||||
file_upload_config=FileUploadConfig(image_config=ImageConfig(detail="high")),
|
||||
)
|
||||
|
||||
result = builder.build(ctx)
|
||||
|
||||
user_prompt = next(
|
||||
layer for layer in result.request.composition.layers if layer.name == "agent_app_user_prompt"
|
||||
)
|
||||
dumped = user_prompt.config.model_dump(mode="json")
|
||||
assert dumped["text"] == "hello"
|
||||
assert dumped["files"][0] == {
|
||||
"filename": "red.png",
|
||||
"mime_type": "image/png",
|
||||
"format": "png",
|
||||
"type": "image",
|
||||
"base64_data": "cmVk",
|
||||
"detail": "high",
|
||||
}
|
||||
redacted_user_prompt = next(
|
||||
layer
|
||||
for layer in result.redacted_request["composition"]["layers"]
|
||||
if layer["name"] == "agent_app_user_prompt"
|
||||
)
|
||||
assert redacted_user_prompt["config"]["files"][0]["base64_data"] == "[REDACTED]"
|
||||
|
||||
def test_build_normalizes_marketplace_model_plugin_id(self):
|
||||
soul = _soul_with_model()
|
||||
soul.model.plugin_id = (
|
||||
|
||||
13
dify-agent/src/dify_agent/layers/user_prompt/__init__.py
Normal file
13
dify-agent/src/dify_agent/layers/user_prompt/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
"""Client-safe exports for Agent App user prompt layer DTOs."""
|
||||
|
||||
from dify_agent.layers.user_prompt.configs import (
|
||||
DIFY_USER_PROMPT_LAYER_TYPE_ID,
|
||||
DifyUserPromptFileConfig,
|
||||
DifyUserPromptLayerConfig,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"DIFY_USER_PROMPT_LAYER_TYPE_ID",
|
||||
"DifyUserPromptFileConfig",
|
||||
"DifyUserPromptLayerConfig",
|
||||
]
|
||||
37
dify-agent/src/dify_agent/layers/user_prompt/configs.py
Normal file
37
dify-agent/src/dify_agent/layers/user_prompt/configs.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""Serializable user prompt layer DTOs for Agent App chat turns."""
|
||||
|
||||
from typing import ClassVar, Final, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from agenton.layers import LayerConfig
|
||||
|
||||
|
||||
DIFY_USER_PROMPT_LAYER_TYPE_ID: Final[str] = "dify.user_prompt"
|
||||
|
||||
|
||||
class DifyUserPromptFileConfig(BaseModel):
|
||||
"""One user-uploaded file carried inline to the agent backend."""
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
filename: str
|
||||
mime_type: str
|
||||
format: str
|
||||
type: Literal["image", "document", "audio", "video", "custom"]
|
||||
base64_data: str
|
||||
detail: Literal["low", "high"] | None = None
|
||||
|
||||
|
||||
class DifyUserPromptLayerConfig(LayerConfig):
|
||||
"""User prompt text plus optional multimodal files."""
|
||||
|
||||
text: str
|
||||
files: list[DifyUserPromptFileConfig] = Field(default_factory=list)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"DIFY_USER_PROMPT_LAYER_TYPE_ID",
|
||||
"DifyUserPromptFileConfig",
|
||||
"DifyUserPromptLayerConfig",
|
||||
]
|
||||
67
dify-agent/src/dify_agent/layers/user_prompt/layer.py
Normal file
67
dify-agent/src/dify_agent/layers/user_prompt/layer.py
Normal file
@ -0,0 +1,67 @@
|
||||
"""Runtime layer that turns serialized Agent App user files into pydantic-ai content."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from dataclasses import dataclass
|
||||
from typing import ClassVar
|
||||
|
||||
from pydantic_ai.messages import BinaryContent, UserContent
|
||||
from typing_extensions import Self, override
|
||||
|
||||
from agenton.layers import EmptyRuntimeState, NoLayerDeps, PydanticAILayer
|
||||
from dify_agent.layers.user_prompt.configs import (
|
||||
DIFY_USER_PROMPT_LAYER_TYPE_ID,
|
||||
DifyUserPromptFileConfig,
|
||||
DifyUserPromptLayerConfig,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DifyUserPromptLayer(PydanticAILayer[NoLayerDeps, object, DifyUserPromptLayerConfig, EmptyRuntimeState]):
|
||||
"""State-free pydantic-ai layer for text and uploaded user files."""
|
||||
|
||||
type_id: ClassVar[str | None] = DIFY_USER_PROMPT_LAYER_TYPE_ID
|
||||
|
||||
config: DifyUserPromptLayerConfig
|
||||
|
||||
@classmethod
|
||||
@override
|
||||
def from_config(cls, config: DifyUserPromptLayerConfig) -> Self:
|
||||
"""Create the layer from validated public config."""
|
||||
return cls(config=DifyUserPromptLayerConfig.model_validate(config))
|
||||
|
||||
@property
|
||||
@override
|
||||
def user_prompts(self) -> list[UserContent]:
|
||||
if not self.config.files:
|
||||
return [self.config.text]
|
||||
|
||||
parts: list[UserContent] = []
|
||||
for file in self.config.files:
|
||||
parts.append(_file_to_binary_content(file))
|
||||
if self.config.text:
|
||||
parts.append(self.config.text)
|
||||
return parts
|
||||
|
||||
|
||||
def _file_to_binary_content(file: DifyUserPromptFileConfig) -> BinaryContent:
|
||||
metadata: dict[str, str] = {"filename": file.filename}
|
||||
if file.detail:
|
||||
metadata["detail"] = file.detail
|
||||
return BinaryContent(
|
||||
data=base64.b64decode(file.base64_data),
|
||||
media_type=file.mime_type,
|
||||
identifier=_identifier_from_filename(file.filename, file.format),
|
||||
vendor_metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
def _identifier_from_filename(filename: str, file_format: str) -> str:
|
||||
suffix = f".{file_format}" if file_format else ""
|
||||
if suffix and filename.lower().endswith(suffix.lower()):
|
||||
return filename[: -len(suffix)] or "file"
|
||||
return filename or "file"
|
||||
|
||||
|
||||
__all__ = ["DifyUserPromptLayer"]
|
||||
@ -47,6 +47,7 @@ from dify_agent.layers.knowledge.layer import DifyKnowledgeBaseLayer
|
||||
from dify_agent.layers.output.output_layer import DifyOutputLayer
|
||||
from dify_agent.layers.shell.configs import DifyShellLayerConfig
|
||||
from dify_agent.layers.shell.layer import DifyShellLayer, create_shellctl_client_factory
|
||||
from dify_agent.layers.user_prompt.layer import DifyUserPromptLayer
|
||||
|
||||
type DifyAgentLayerProvider = LayerProvider[Any]
|
||||
|
||||
@ -87,6 +88,7 @@ def create_default_layer_providers(
|
||||
agent_stub_token_factory = build_agent_stub_token
|
||||
return (
|
||||
LayerProvider.from_layer_type(PromptLayer),
|
||||
LayerProvider.from_layer_type(DifyUserPromptLayer),
|
||||
LayerProvider.from_layer_type(PydanticAIHistoryLayer),
|
||||
LayerProvider.from_layer_type(DifyOutputLayer),
|
||||
LayerProvider.from_layer_type(DifyAskHumanLayer),
|
||||
|
||||
@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import cast
|
||||
|
||||
from pydantic_ai.messages import BinaryContent
|
||||
|
||||
from dify_agent.layers.user_prompt.configs import DIFY_USER_PROMPT_LAYER_TYPE_ID, DifyUserPromptLayerConfig
|
||||
from dify_agent.layers.user_prompt.layer import DifyUserPromptLayer
|
||||
from dify_agent.runtime.compositor_factory import create_default_layer_providers
|
||||
|
||||
|
||||
def test_user_prompt_layer_restores_text_only_prompt() -> None:
|
||||
layer = DifyUserPromptLayer.from_config(DifyUserPromptLayerConfig(text="hello"))
|
||||
|
||||
assert layer.type_id == DIFY_USER_PROMPT_LAYER_TYPE_ID
|
||||
assert layer.user_prompts == ["hello"]
|
||||
|
||||
|
||||
def test_user_prompt_layer_restores_binary_file_prompt() -> None:
|
||||
layer = DifyUserPromptLayer.from_config(
|
||||
DifyUserPromptLayerConfig.model_validate(
|
||||
{
|
||||
"text": "what is in this image?",
|
||||
"files": [
|
||||
{
|
||||
"filename": "red.png",
|
||||
"mime_type": "image/png",
|
||||
"format": "png",
|
||||
"type": "image",
|
||||
"base64_data": "cmVk",
|
||||
"detail": "high",
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
prompts = layer.user_prompts
|
||||
|
||||
assert len(prompts) == 2
|
||||
file_part = prompts[0]
|
||||
assert isinstance(file_part, BinaryContent)
|
||||
assert file_part.data == b"red"
|
||||
assert file_part.media_type == "image/png"
|
||||
assert file_part.vendor_metadata == {"filename": "red.png", "detail": "high"}
|
||||
assert prompts[1] == "what is in this image?"
|
||||
|
||||
|
||||
def test_default_layer_providers_register_user_prompt_layer() -> None:
|
||||
provider = next(
|
||||
provider for provider in create_default_layer_providers() if provider.type_id == DIFY_USER_PROMPT_LAYER_TYPE_ID
|
||||
)
|
||||
|
||||
layer = cast(DifyUserPromptLayer, provider.create_layer({"text": "hello"}))
|
||||
|
||||
assert isinstance(layer, DifyUserPromptLayer)
|
||||
@ -77,6 +77,7 @@ def test_client_public_exports_work_with_default_dependencies_only(tmp_path: Pat
|
||||
plugin_module = importlib.import_module("dify_agent.layers.dify_plugin")
|
||||
ask_human_module = importlib.import_module("dify_agent.layers.ask_human")
|
||||
output_module = importlib.import_module("dify_agent.layers.output")
|
||||
user_prompt_module = importlib.import_module("dify_agent.layers.user_prompt")
|
||||
|
||||
assert agenton_layers.ExitIntent is not None
|
||||
assert agenton_layers.LayerConfig is not None
|
||||
@ -97,6 +98,7 @@ def test_client_public_exports_work_with_default_dependencies_only(tmp_path: Pat
|
||||
assert plugin_module.DifyPluginLLMLayerConfig is not None
|
||||
assert ask_human_module.DifyAskHumanLayerConfig is not None
|
||||
assert output_module.DifyOutputLayerConfig is not None
|
||||
assert user_prompt_module.DifyUserPromptLayerConfig is not None
|
||||
|
||||
grpc_error = importlib.import_module("dify_agent.agent_stub.client._errors").AgentStubMissingGRPCDependencyError
|
||||
try:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user