dify/api/services/agent/skill_tool_inference_service.py

"""Infer CLI tool + ENV suggestions from a standardized skill (ENG-371).

Reads the skill's SKILL.md from the agent drive, asks the tenant's default
reasoning model once (a plain LLM call, never an agent run), and returns
*draft* suggestions only — nothing is persisted here. The frontend prefills
the TOOLS box (``inferred from <skill>`` badge) and the Pre-Authorize ENV
panel, and saving still goes through the composer's full shell/env/secret/
dangerous-command validation, so inference opens no bypass.

ENV suggestions carry only ``key`` + ``reason`` — the model never produces a
value; users fill those in themselves and the runtime injects ``$VAR`` only.
"""

from __future__ import annotations

import json
import logging
from typing import Any

import json_repair
from pydantic import BaseModel, Field, ValidationError
from sqlalchemy import select

from core.errors.error import ProviderTokenNotInitError
from core.model_manager import ModelManager
from extensions.ext_database import db
from graphon.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage
from graphon.model_runtime.entities.model_entities import ModelType
from models.agent import Agent
from models.agent_config_entities import AgentSoulConfig
from services.agent_drive_service import AgentDriveError, AgentDriveService

logger = logging.getLogger(__name__)


class SkillToolInferenceError(Exception):
    """Stable-code error for the infer-tools endpoint."""

    def __init__(self, code: str, message: str, *, status_code: int = 400) -> None:
        self.code = code
        self.message = message
        self.status_code = status_code
        super().__init__(message)


class EnvSuggestion(BaseModel):
    key: str
    reason: str = ""
    secret_likely: bool = False


class CliToolSuggestion(BaseModel):
    name: str
    description: str = ""
    command: str = ""
    install_commands: list[str] = Field(default_factory=list)
    env_suggestions: list[EnvSuggestion] = Field(default_factory=list)
    inferred_from: str = ""


class SkillToolInferenceResult(BaseModel):
    inferable: bool
    cli_tools: list[CliToolSuggestion] = Field(default_factory=list)
    reason: str | None = None


_SYSTEM_PROMPT = """\
You analyze an agent skill document (SKILL.md) and infer which command-line \
tools the skill depends on at runtime, so a user can pre-install them in the \
agent's sandbox.

Rules:
- Only suggest tools the document explicitly uses or clearly requires; never guess.
- For each tool give: name, a one-line reason-style description referencing the \
document, the base command, and install commands for a Debian-based sandbox \
(apt-get / pip / npm).
- If a step needs an environment variable (an API key, token, endpoint), add it \
to env_suggestions with the variable key and the reason. NEVER produce a value. \
Mark secret_likely=true for credentials.
- If the document describes no external command-line dependency, return \
{"inferable": false, "cli_tools": [], "reason": "<one short sentence why>"}.

Respond with JSON only, matching exactly:
{"inferable": bool,
 "cli_tools": [{"name": str, "description": str, "command": str,
                "install_commands": [str], "env_suggestions":
                [{"key": str, "reason": str, "secret_likely": bool}]}],
 "reason": str | null}
"""


class SkillToolInferenceService:
    """Single-shot LLM inference over a drive-stored SKILL.md."""

    def __init__(self, *, drive_service: AgentDriveService | None = None) -> None:
        self._drive = drive_service or AgentDriveService()

    def infer(self, *, tenant_id: str, agent_id: str, slug: str) -> dict[str, Any]:
        skill_md = self._load_skill_md(tenant_id=tenant_id, agent_id=agent_id, slug=slug)
        manifest_files = self._manifest_files_from_soul(tenant_id=tenant_id, agent_id=agent_id, slug=slug)

        user_prompt = f"SKILL.md of skill '{slug}':\n\n{skill_md}"
        if manifest_files:
            listing = "\n".join(manifest_files[:200])
            user_prompt += f"\n\nFiles inside the skill package:\n{listing}"

        raw = self._invoke(tenant_id=tenant_id, user_prompt=user_prompt)
        try:
            result = self._parse(raw)
        except (ValidationError, ValueError):
            logger.warning("skill tool inference output unparsable, retrying once")
            raw = self._invoke(tenant_id=tenant_id, user_prompt=user_prompt)
            try:
                result = self._parse(raw)
            except (ValidationError, ValueError) as exc:
                raise SkillToolInferenceError(
                    "inference_failed",
                    "inference_failed: the model output could not be parsed into tool suggestions.",
                    status_code=422,
                ) from exc

        for tool in result.cli_tools:
            tool.inferred_from = slug
        return result.model_dump(mode="json")

    def _load_skill_md(self, *, tenant_id: str, agent_id: str, slug: str) -> str:
        try:
            preview = self._drive.preview(tenant_id=tenant_id, agent_id=agent_id, key=f"{slug}/SKILL.md")
        except AgentDriveError as exc:
            if exc.code == "drive_key_not_found":
                raise SkillToolInferenceError(
                    "skill_not_found", f"skill_not_found: no drive entry for skill '{slug}'.", status_code=404
                ) from exc
            raise SkillToolInferenceError(exc.code, exc.message, status_code=exc.status_code) from exc
        if preview["binary"] or not preview["text"]:
            raise SkillToolInferenceError(
                "skill_not_found", f"skill_not_found: SKILL.md of '{slug}' is not readable text.", status_code=404
            )
        return str(preview["text"])

    @staticmethod
    def _manifest_files_from_soul(*, tenant_id: str, agent_id: str, slug: str) -> list[str]:
        """The zip path listing standardize persisted onto the ref, if present.

        Degrades to an empty list (SKILL.md-only inference) for refs that
        predate ``manifest_files``.
        """
        agent = db.session.scalar(select(Agent).where(Agent.tenant_id == tenant_id, Agent.id == agent_id).limit(1))
        if agent is None or not agent.active_config_snapshot_id:
            return []
        from models.agent import AgentConfigSnapshot

        snapshot = db.session.scalar(
            select(AgentConfigSnapshot).where(
                AgentConfigSnapshot.tenant_id == tenant_id,
                AgentConfigSnapshot.agent_id == agent_id,
                AgentConfigSnapshot.id == agent.active_config_snapshot_id,
            )
        )
        if snapshot is None:
            return []
        soul = AgentSoulConfig.model_validate(snapshot.config_snapshot_dict)
        for skill in soul.skills_files.skills:
            ref_slug = (skill.skill_md_key or "").split("/", 1)[0] or (skill.path or "").strip("/")
            if ref_slug != slug:
                continue
            files = skill.get("manifest_files")
            if isinstance(files, list):
                return [str(item) for item in files]
        return []

    @staticmethod
    def _invoke(*, tenant_id: str, user_prompt: str) -> str:
        try:
            model_manager = ModelManager.for_tenant(tenant_id=tenant_id)
            model_instance = model_manager.get_default_model_instance(tenant_id=tenant_id, model_type=ModelType.LLM)
        except ProviderTokenNotInitError as exc:
            raise SkillToolInferenceError(
                "default_model_not_configured",
                "default_model_not_configured: the workspace has no default reasoning model.",
                status_code=400,
            ) from exc
        try:
            response = model_instance.invoke_llm(
                prompt_messages=[
                    SystemPromptMessage(content=_SYSTEM_PROMPT),
                    UserPromptMessage(content=user_prompt),
                ],
                model_parameters={"temperature": 0.1},
                stream=False,
            )
        except Exception as exc:
            raise SkillToolInferenceError(
                "inference_failed", f"inference_failed: model invocation failed: {exc}", status_code=422
            ) from exc
        return response.message.get_text_content()

    @staticmethod
    def _parse(raw: str) -> SkillToolInferenceResult:
        try:
            parsed = json.loads(raw)
        except json.JSONDecodeError:
            parsed = json_repair.loads(raw)
        if not isinstance(parsed, dict):
            raise ValueError("model output is not a JSON object")
        return SkillToolInferenceResult.model_validate(parsed)


__all__ = [
    "CliToolSuggestion",
    "EnvSuggestion",
    "SkillToolInferenceError",
    "SkillToolInferenceResult",
    "SkillToolInferenceService",
]