dify/api/services/agent/composer_validator.py
zyssyz123 2c5c8e82c3
feat: agent slash menu backend (#37268)
Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-06-10 10:40:03 +00:00

336 lines
15 KiB
Python

import re
from typing import Any
from pydantic import ValidationError
from services.agent.errors import AgentSoulLockedError, InvalidComposerConfigError, PlaintextSecretNotAllowedError
from services.agent.prompt_mentions import (
MAX_MENTIONS_PER_PROMPT,
NODE_JOB_PROMPT_ALLOWED_KINDS,
SOUL_PROMPT_ALLOWED_KINDS,
MentionKind,
MentionResolver,
build_node_job_mention_resolver,
build_soul_mention_resolver,
find_malformed_mention_markers,
parse_prompt_mentions,
)
from services.entities.agent_entities import (
AgentSoulConfig,
ComposerSavePayload,
ComposerVariant,
WorkflowNodeJobConfig,
)
_PLAINTEXT_SECRET_KEYS = {
"api_key",
"apikey",
"authorization",
"password",
"secret",
"secret_key",
}
# Env/secret names become shell ``export`` identifiers in the sandbox bootstrap, so
# they must be valid shell identifiers. Validating here fails fast at composer save
# with a friendly error instead of at run time in the agent backend shell layer.
_SHELL_ENV_NAME_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_CLI_TOOL_NAME_KEYS = ("name", "tool_name", "label")
_CLI_TOOL_COMMAND_KEYS = ("command", "install_command", "install", "setup_command")
_DENIED_PERMISSION_STATUSES = frozenset({"unauthorized", "denied", "forbidden", "invalid", "unavailable"})
_DANGEROUS_FLAG_KEYS = ("dangerous", "dangerous_command", "requires_confirmation")
_DANGEROUS_ACK_KEYS = (
"dangerous_acknowledged",
"dangerous_accepted",
"risk_accepted",
"approved",
)
class ComposerConfigValidator:
@classmethod
def validate_save_payload(cls, payload: ComposerSavePayload) -> None:
if payload.variant == ComposerVariant.WORKFLOW and payload.soul_lock.locked and payload.agent_soul is not None:
raise AgentSoulLockedError()
if payload.agent_soul is not None:
cls.validate_agent_soul(payload.agent_soul)
if payload.node_job is not None:
cls.validate_node_job(payload.node_job)
cls._validate_prompt_mentions(payload)
@classmethod
def _validate_prompt_mentions(cls, payload: ComposerSavePayload) -> None:
"""ENG-616 §2.4 allowlists + ENG-617 §5.2 human-must-be-referenced.
Error messages start with a stable code token (``mention_kind_not_allowed``
/ ``mention_limit_exceeded`` / ``human_involvement_not_referenced``) so
the frontend can switch on it.
"""
if payload.agent_soul is not None:
cls._validate_surface_mentions(
prompt=payload.agent_soul.prompt.system_prompt,
allowed=SOUL_PROMPT_ALLOWED_KINDS,
surface="agent soul prompt",
)
cls._require_human_mentions(
prompt=payload.agent_soul.prompt.system_prompt,
contacts=payload.agent_soul.human.contacts,
surface="agent soul prompt",
)
if payload.node_job is not None:
cls._validate_surface_mentions(
prompt=payload.node_job.workflow_prompt,
allowed=NODE_JOB_PROMPT_ALLOWED_KINDS,
surface="workflow job prompt",
)
cls._require_human_mentions(
prompt=payload.node_job.workflow_prompt,
contacts=payload.node_job.human_contacts,
surface="workflow job prompt",
)
@classmethod
def _validate_surface_mentions(cls, *, prompt: str, allowed: frozenset[MentionKind], surface: str) -> None:
mentions = parse_prompt_mentions(prompt)
if len(mentions) > MAX_MENTIONS_PER_PROMPT:
raise InvalidComposerConfigError(
f"mention_limit_exceeded: {surface} has {len(mentions)} mentions, "
f"exceeding the limit of {MAX_MENTIONS_PER_PROMPT}."
)
for mention in mentions:
if mention.kind not in allowed:
raise InvalidComposerConfigError(
f"mention_kind_not_allowed: {surface} cannot reference {mention.kind.value} (id={mention.ref_id})."
)
@classmethod
def _require_human_mentions(cls, *, prompt: str, contacts: list[Any], surface: str) -> None:
"""ENG-617 §5.2 (PRD: human involvement must be slash-referenced or save errors).
Every configured human contact must appear as ``{{#human:<id>#}}`` in the
corresponding prompt. A contact matches via any identity alias; contacts
carrying no identity at all cannot be referenced and are skipped.
"""
if not contacts:
return
mentioned = {mention.ref_id for mention in parse_prompt_mentions(prompt) if mention.kind == MentionKind.HUMAN}
for contact in contacts:
aliases = {
alias
for alias in (contact.id, contact.contact_id, contact.human_id, contact.email, contact.name)
if alias
}
if not aliases:
continue
if aliases.isdisjoint(mentioned):
display = contact.name or contact.email or contact.id or "human involvement"
raise InvalidComposerConfigError(
f"human_involvement_not_referenced: configured human involvement '{display}' "
f"must be referenced in the {surface} via the slash menu."
)
@classmethod
def collect_soft_findings(
cls,
payload: ComposerSavePayload,
*,
existing_dataset_ids: set[str] | None = None,
) -> dict[str, Any]:
"""ENG-617 §5.3/§5.4 soft findings — never block save.
``warnings`` carries ``mention_target_missing`` / ``mention_malformed``
entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge
mentions with a placeholder name (0522 consensus) instead of dropping or
rejecting them. With ``existing_dataset_ids`` provided, configured-but-
deleted datasets surface as placeholders too.
"""
warnings: list[dict[str, Any]] = []
placeholders: list[dict[str, str]] = []
surfaces: list[tuple[str, str, MentionResolver, frozenset[MentionKind]]] = []
if payload.agent_soul is not None:
surfaces.append(
(
"agent_soul",
payload.agent_soul.prompt.system_prompt,
build_soul_mention_resolver(payload.agent_soul),
SOUL_PROMPT_ALLOWED_KINDS,
)
)
if payload.node_job is not None:
surfaces.append(
(
"node_job",
payload.node_job.workflow_prompt,
build_node_job_mention_resolver(payload.node_job),
NODE_JOB_PROMPT_ALLOWED_KINDS,
)
)
for surface, prompt, resolver, allowed in surfaces:
for mention in parse_prompt_mentions(prompt):
if mention.kind not in allowed:
continue # hard-rejected by validate_save_payload
resolved = resolver(mention)
if mention.kind == MentionKind.KNOWLEDGE:
dangling = resolved is None or (
existing_dataset_ids is not None and mention.ref_id not in existing_dataset_ids
)
if dangling:
placeholders.append(
{
"id": mention.ref_id,
"placeholder_name": mention.label or f"Knowledge {mention.ref_id[:8]}",
}
)
continue
if resolved is None:
warnings.append(
{
"code": "mention_target_missing",
"surface": surface,
"kind": mention.kind.value,
"id": mention.ref_id,
"message": f"{mention.kind.value} mention (id={mention.ref_id}) does not match "
"any configured item.",
}
)
for marker in find_malformed_mention_markers(prompt):
warnings.append(
{
"code": "mention_malformed",
"surface": surface,
"kind": None,
"id": None,
"message": f"mention-shaped marker {marker!r} is malformed and will be "
"degraded to plain text at runtime.",
}
)
return {"warnings": warnings, "knowledge_retrieval_placeholder": placeholders}
@classmethod
def validate_agent_soul(cls, agent_soul: AgentSoulConfig) -> None:
dumped = agent_soul.model_dump(mode="json")
cls._reject_plaintext_secrets(dumped, path="agent_soul")
cls._validate_shell_config(dumped)
@classmethod
def validate_node_job(cls, node_job: WorkflowNodeJobConfig) -> None:
cls._reject_plaintext_secrets(node_job.model_dump(mode="json"), path="node_job")
@classmethod
def validate_agent_soul_dict(cls, value: dict[str, Any]) -> AgentSoulConfig:
try:
config = AgentSoulConfig.model_validate(value)
except ValidationError as exc:
raise InvalidComposerConfigError(str(exc)) from exc
cls.validate_agent_soul(config)
return config
@classmethod
def validate_node_job_dict(cls, value: dict[str, Any]) -> WorkflowNodeJobConfig:
try:
config = WorkflowNodeJobConfig.model_validate(value)
except ValidationError as exc:
raise InvalidComposerConfigError(str(exc)) from exc
cls.validate_node_job(config)
return config
@classmethod
def _validate_shell_config(cls, soul: dict[str, Any]) -> None:
"""Fail fast on shell env/secret/CLI config the sandbox would otherwise reject at run time."""
env = soul.get("env") or {}
seen_env_names: set[str] = set()
for section in ("variables", "secret_refs"):
entries = env.get(section)
if not isinstance(entries, list):
continue
for entry in entries:
if not isinstance(entry, dict):
continue
raw_name = entry.get("name")
if not isinstance(raw_name, str) or not raw_name.strip():
# Unnamed draft rows are tolerated; only named entries are bound to the shell.
continue
name = raw_name.strip()
if not _SHELL_ENV_NAME_PATTERN.fullmatch(name):
raise InvalidComposerConfigError(
f"env/secret name '{name}' must be a valid shell identifier (^[A-Za-z_][A-Za-z0-9_]*$)."
)
if section == "secret_refs" and cls._permission_denied(entry):
raise InvalidComposerConfigError(f"secret reference '{name}' is not authorized for this agent.")
if name in seen_env_names:
raise InvalidComposerConfigError(
f"duplicate env/secret name '{name}': environment variables and secret references "
"share the shell namespace."
)
seen_env_names.add(name)
tools = soul.get("tools") or {}
cli_tools = tools.get("cli_tools")
if isinstance(cli_tools, list):
for entry in cli_tools:
if not isinstance(entry, dict) or entry.get("enabled") is False:
continue
has_name = any(isinstance(entry.get(key), str) and entry[key].strip() for key in _CLI_TOOL_NAME_KEYS)
has_command = cls._has_install_command(entry)
if not has_name and not has_command:
raise InvalidComposerConfigError("an enabled CLI tool must declare a name or an install command.")
if cls._permission_denied(entry) or entry.get("pre_authorized") is False:
raise InvalidComposerConfigError("an enabled CLI tool is not authorized for runtime bootstrap.")
if cls._dangerous_without_acknowledgement(entry):
raise InvalidComposerConfigError(
"a dangerous CLI tool command must be explicitly acknowledged before save."
)
@classmethod
def _reject_plaintext_secrets(cls, value: Any, *, path: str) -> None:
if isinstance(value, dict):
for key, nested in value.items():
normalized_key = key.lower().replace("-", "_")
nested_path = f"{path}.{key}"
if normalized_key in _PLAINTEXT_SECRET_KEYS and isinstance(nested, str) and nested:
raise PlaintextSecretNotAllowedError(f"Plaintext secret is not allowed at {nested_path}")
cls._reject_plaintext_secrets(nested, path=nested_path)
elif isinstance(value, list):
for index, nested in enumerate(value):
cls._reject_plaintext_secrets(nested, path=f"{path}[{index}]")
@classmethod
def _has_install_command(cls, entry: dict[str, Any]) -> bool:
raw_commands = entry.get("install_commands")
if isinstance(raw_commands, list) and any(
isinstance(command, str) and command.strip() for command in raw_commands
):
return True
return any(isinstance(entry.get(key), str) and entry[key].strip() for key in _CLI_TOOL_COMMAND_KEYS)
@classmethod
def _permission_denied(cls, entry: dict[str, Any]) -> bool:
permission = entry.get("permission")
if isinstance(permission, dict):
allowed = permission.get("allowed")
if allowed is False:
return True
status = permission.get("status") or permission.get("state")
if isinstance(status, str) and status in _DENIED_PERMISSION_STATUSES:
return True
for key in ("authorization_status", "permission_status", "status"):
status = entry.get(key)
if isinstance(status, str) and status in _DENIED_PERMISSION_STATUSES:
return True
return False
@classmethod
def _dangerous_without_acknowledgement(cls, entry: dict[str, Any]) -> bool:
dangerous = any(entry.get(key) is True for key in _DANGEROUS_FLAG_KEYS)
risk_level = entry.get("risk_level")
if isinstance(risk_level, str) and risk_level == "dangerous":
dangerous = True
if not dangerous:
return False
return not any(entry.get(key) is True for key in _DANGEROUS_ACK_KEYS)