From 5565546295d9a9d54f5cfd7ac9b4535a4b401d9a Mon Sep 17 00:00:00 2001 From: Harry Date: Thu, 22 Jan 2026 03:06:41 +0800 Subject: [PATCH] feat(skill-compiler): skill compiler --- .../skill-creator/scripts/quick_validate.py | 1 - api/core/app_assets/__init__.py | 10 - api/core/app_assets/builder/__init__.py | 12 + api/core/app_assets/builder/base.py | 20 + api/core/app_assets/builder/file_builder.py | 30 + api/core/app_assets/builder/pipeline.py | 29 + api/core/app_assets/builder/skill_builder.py | 85 ++ api/core/app_assets/entities/__init__.py | 14 +- api/core/app_assets/entities/assets.py | 2 +- api/core/app_assets/entities/skill.py | 51 +- api/core/app_assets/parser/base.py | 6 +- api/core/app_assets/parser/skill_parser.py | 162 +-- api/core/app_assets/paths.py | 4 + api/core/sandbox/bash/dify_cli.py | 3 +- api/core/skill/__init__.py | 4 +- api/core/skill/entities/__init__.py | 21 + api/core/skill/entities/file_artifact.py | 13 + api/core/skill/entities/skill_artifact.py | 30 + api/core/skill/entities/skill_artifact_set.py | 105 ++ api/core/skill/entities/skill_document.py | 14 + api/core/skill/entities/skill_metadata.py | 47 + api/core/skill/entities/tool_artifact.py | 14 +- api/core/skill/skill_compiler.py | 259 ++++ api/core/skill/skill_manager.py | 81 +- api/services/app_asset_service.py | 49 +- api/tests/unit_tests/core/skill/__init__.py | 0 .../core/skill/test_skill_compiler.py | 1177 +++++++++++++++++ 27 files changed, 1952 insertions(+), 291 deletions(-) create mode 100644 api/core/app_assets/builder/__init__.py create mode 100644 api/core/app_assets/builder/base.py create mode 100644 api/core/app_assets/builder/file_builder.py create mode 100644 api/core/app_assets/builder/pipeline.py create mode 100644 api/core/app_assets/builder/skill_builder.py create mode 100644 api/core/skill/entities/file_artifact.py create mode 100644 api/core/skill/entities/skill_artifact_set.py create mode 100644 api/core/skill/entities/skill_document.py create mode 100644 api/core/skill/entities/skill_metadata.py create mode 100644 api/core/skill/skill_compiler.py create mode 100644 api/tests/unit_tests/core/skill/__init__.py create mode 100644 api/tests/unit_tests/core/skill/test_skill_compiler.py diff --git a/.agents/skills/skill-creator/scripts/quick_validate.py b/.agents/skills/skill-creator/scripts/quick_validate.py index 66eb0a71bf..6443b2be26 100755 --- a/.agents/skills/skill-creator/scripts/quick_validate.py +++ b/.agents/skills/skill-creator/scripts/quick_validate.py @@ -4,7 +4,6 @@ Quick validation script for skills - minimal version """ import sys -import os import re import yaml from pathlib import Path diff --git a/api/core/app_assets/__init__.py b/api/core/app_assets/__init__.py index 50ba92258e..3191c851ae 100644 --- a/api/core/app_assets/__init__.py +++ b/api/core/app_assets/__init__.py @@ -1,12 +1,7 @@ from .entities import ( AssetItem, FileAsset, - FileReference, SkillAsset, - SkillMetadata, - ToolConfiguration, - ToolFieldConfig, - ToolReference, ) from .packager import AssetPackager, ZipPackager from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser @@ -20,12 +15,7 @@ __all__ = [ "AssetPaths", "FileAsset", "FileAssetParser", - "FileReference", "SkillAsset", "SkillAssetParser", - "SkillMetadata", - "ToolConfiguration", - "ToolFieldConfig", - "ToolReference", "ZipPackager", ] diff --git a/api/core/app_assets/builder/__init__.py b/api/core/app_assets/builder/__init__.py new file mode 100644 index 0000000000..9e64a31884 --- /dev/null +++ b/api/core/app_assets/builder/__init__.py @@ -0,0 +1,12 @@ +from .base import AssetBuilder, BuildContext +from .file_builder import FileBuilder +from .pipeline import AssetBuildPipeline +from .skill_builder import SkillBuilder + +__all__ = [ + "AssetBuildPipeline", + "AssetBuilder", + "BuildContext", + "FileBuilder", + "SkillBuilder", +] diff --git a/api/core/app_assets/builder/base.py b/api/core/app_assets/builder/base.py new file mode 100644 index 0000000000..595ce84882 --- /dev/null +++ b/api/core/app_assets/builder/base.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass +from typing import Protocol + +from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode +from core.app_assets.entities import AssetItem + + +@dataclass +class BuildContext: + tenant_id: str + app_id: str + build_id: str + + +class AssetBuilder(Protocol): + def accept(self, node: AppAssetNode) -> bool: ... + + def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None: ... + + def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]: ... diff --git a/api/core/app_assets/builder/file_builder.py b/api/core/app_assets/builder/file_builder.py new file mode 100644 index 0000000000..d1ef0446b4 --- /dev/null +++ b/api/core/app_assets/builder/file_builder.py @@ -0,0 +1,30 @@ +from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode +from core.app_assets.entities import AssetItem, FileAsset +from core.app_assets.paths import AssetPaths + +from .base import BuildContext + + +class FileBuilder: + _nodes: list[tuple[AppAssetNode, str]] + + def __init__(self) -> None: + self._nodes = [] + + def accept(self, node: AppAssetNode) -> bool: + return True + + def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None: + self._nodes.append((node, path)) + + def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]: + return [ + FileAsset( + asset_id=node.id, + path=path, + file_name=node.name, + extension=node.extension or "", + storage_key=AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id), + ) + for node, path in self._nodes + ] diff --git a/api/core/app_assets/builder/pipeline.py b/api/core/app_assets/builder/pipeline.py new file mode 100644 index 0000000000..d266ecf2d1 --- /dev/null +++ b/api/core/app_assets/builder/pipeline.py @@ -0,0 +1,29 @@ +from core.app.entities.app_asset_entities import AppAssetFileTree +from core.app_assets.builder.file_builder import FileBuilder +from core.app_assets.builder.skill_builder import SkillBuilder +from core.app_assets.entities import AssetItem + +from .base import AssetBuilder, BuildContext + + +class AssetBuildPipeline: + _builders: list[AssetBuilder] + + def __init__(self, builders: list[AssetBuilder] | None = None) -> None: + self._builders = builders or [SkillBuilder(), FileBuilder()] + + def build_all(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]: + # 1. Distribute: each node goes to first accepting builder + for node in tree.walk_files(): + path = tree.get_path(node.id) + for builder in self._builders: + if builder.accept(node): + builder.collect(node, path, ctx) + break + + # 2. Each builder builds its collected nodes + results: list[AssetItem] = [] + for builder in self._builders: + results.extend(builder.build(tree, ctx)) + + return results diff --git a/api/core/app_assets/builder/skill_builder.py b/api/core/app_assets/builder/skill_builder.py new file mode 100644 index 0000000000..4f395517cf --- /dev/null +++ b/api/core/app_assets/builder/skill_builder.py @@ -0,0 +1,85 @@ +import json + +from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode +from core.app_assets.entities import AssetItem, FileAsset +from core.app_assets.paths import AssetPaths +from core.skill.entities.skill_document import SkillDocument +from core.skill.skill_compiler import SkillCompiler +from core.skill.skill_manager import SkillManager +from extensions.ext_storage import storage + +from .base import BuildContext + + +class SkillBuilder: + _nodes: list[tuple[AppAssetNode, str]] + + def __init__(self) -> None: + self._nodes = [] + + def accept(self, node: AppAssetNode) -> bool: + return node.extension == "md" + + def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None: + self._nodes.append((node, path)) + + def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]: + if not self._nodes: + return [] + + # 1. Load and create documents + documents: list[SkillDocument] = [] + for node, _ in self._nodes: + draft_key = AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id) + try: + data = json.loads(storage.load_once(draft_key)) + content = data.get("content", "") if isinstance(data, dict) else "" + metadata = data.get("metadata", {}) if isinstance(data, dict) else {} + except Exception: + content = "" + metadata = {} + + documents.append( + SkillDocument( + skill_id=node.id, + content=content, + metadata=metadata, + ) + ) + + # 2. Compile all skills + compiler = SkillCompiler() + artifact_set = compiler.compile_all(documents, tree, ctx.build_id) + + # 3. Save tool artifact + SkillManager.save_tool_artifact( + ctx.tenant_id, + ctx.app_id, + ctx.build_id, + artifact_set.get_tool_artifact(), + ) + + # 4. Save compiled content to storage and return FileAssets + results: list[AssetItem] = [] + for node, path in self._nodes: + artifact = artifact_set.get(node.id) + if artifact is None: + continue + + # Write compiled content to storage + resolved_key = AssetPaths.build_resolved_file( + ctx.tenant_id, ctx.app_id, ctx.build_id, node.id + ) + storage.save(resolved_key, artifact.content.encode("utf-8")) + + results.append( + FileAsset( + asset_id=node.id, + path=path, + file_name=node.name, + extension=node.extension or "", + storage_key=resolved_key, + ) + ) + + return results diff --git a/api/core/app_assets/entities/__init__.py b/api/core/app_assets/entities/__init__.py index 39969b55e6..ee43a452dc 100644 --- a/api/core/app_assets/entities/__init__.py +++ b/api/core/app_assets/entities/__init__.py @@ -1,20 +1,8 @@ from .assets import AssetItem, FileAsset -from .skill import ( - FileReference, - SkillAsset, - SkillMetadata, - ToolConfiguration, - ToolFieldConfig, - ToolReference, -) +from .skill import SkillAsset __all__ = [ "AssetItem", "FileAsset", - "FileReference", "SkillAsset", - "SkillMetadata", - "ToolConfiguration", - "ToolFieldConfig", - "ToolReference", ] diff --git a/api/core/app_assets/entities/assets.py b/api/core/app_assets/entities/assets.py index b3ded10683..97064442c9 100644 --- a/api/core/app_assets/entities/assets.py +++ b/api/core/app_assets/entities/assets.py @@ -4,7 +4,7 @@ from dataclasses import dataclass @dataclass class AssetItem(ABC): - node_id: str + asset_id: str path: str file_name: str extension: str diff --git a/api/core/app_assets/entities/skill.py b/api/core/app_assets/entities/skill.py index bf47ab8166..08ec65158d 100644 --- a/api/core/app_assets/entities/skill.py +++ b/api/core/app_assets/entities/skill.py @@ -1,59 +1,14 @@ -from dataclasses import dataclass +from collections.abc import Mapping +from dataclasses import dataclass, field from typing import Any -from pydantic import BaseModel, ConfigDict, Field - -from core.tools.entities.tool_entities import ToolProviderType - from .assets import AssetItem -class ToolFieldConfig(BaseModel): - model_config = ConfigDict(extra="forbid") - - id: str - value: Any - auto: bool = False - - -class ToolConfiguration(BaseModel): - model_config = ConfigDict(extra="forbid") - - fields: list[ToolFieldConfig] = Field(default_factory=list) - - def default_values(self) -> dict[str, Any]: - return {field.id: field.value for field in self.fields if field.value is not None} - - -class ToolReference(BaseModel): - model_config = ConfigDict(extra="forbid") - - uuid: str = Field(description="Unique identifier for this tool reference") - type: ToolProviderType = Field(description="Tool provider type") - provider: str = Field(description="Tool provider") - tool_name: str = Field(description="Tool name") - credential_id: str | None = Field(default=None, description="Credential ID") - configuration: ToolConfiguration | None = Field(default=None, description="Tool configuration") - - -class FileReference(BaseModel): - model_config = ConfigDict(extra="forbid") - - source: str = Field(description="Source location or identifier of the file") - uuid: str = Field(description="Unique identifier for this file reference") - - -class SkillMetadata(BaseModel): - model_config = ConfigDict(extra="allow") - - tools: dict[str, ToolReference] = Field(default_factory=dict, description="Map of tool references by UUID") - files: list[FileReference] = Field(default_factory=list, description="List of file references") - - @dataclass class SkillAsset(AssetItem): storage_key: str - metadata: SkillMetadata + metadata: Mapping[str, Any] = field(default_factory=dict) def get_storage_key(self) -> str: return self.storage_key diff --git a/api/core/app_assets/parser/base.py b/api/core/app_assets/parser/base.py index 01696c5c7e..cd1807c79e 100644 --- a/api/core/app_assets/parser/base.py +++ b/api/core/app_assets/parser/base.py @@ -7,7 +7,7 @@ class AssetItemParser(ABC): @abstractmethod def parse( self, - node_id: str, + asset_id: str, path: str, file_name: str, extension: str, @@ -19,14 +19,14 @@ class AssetItemParser(ABC): class FileAssetParser(AssetItemParser): def parse( self, - node_id: str, + asset_id: str, path: str, file_name: str, extension: str, storage_key: str, ) -> FileAsset: return FileAsset( - node_id=node_id, + asset_id=asset_id, path=path, file_name=file_name, extension=extension, diff --git a/api/core/app_assets/parser/skill_parser.py b/api/core/app_assets/parser/skill_parser.py index d11c68dbe2..af7155c31a 100644 --- a/api/core/app_assets/parser/skill_parser.py +++ b/api/core/app_assets/parser/skill_parser.py @@ -1,161 +1,57 @@ import json import logging -import re from typing import Any -from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode -from core.app_assets.entities import ( - SkillAsset, - SkillMetadata, -) -from core.app_assets.entities.skill import FileReference, ToolConfiguration, ToolReference -from core.app_assets.paths import AssetPaths -from core.tools.entities.tool_entities import ToolProviderType +from core.app_assets.entities import SkillAsset +from core.app_assets.entities.assets import AssetItem, FileAsset from extensions.ext_storage import storage from .base import AssetItemParser -TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") -FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") - logger = logging.getLogger(__name__) class SkillAssetParser(AssetItemParser): - def __init__( - self, - tenant_id: str, - app_id: str, - assets_id: str, - tree: AppAssetFileTree, - ) -> None: - self._tenant_id = tenant_id - self._app_id = app_id - self._assets_id = assets_id - self._tree = tree + """ + Parser for skill assets. + + Responsibilities: + - Read file from storage + - Parse JSON structure + - Return SkillAsset with raw metadata (no parsing/resolution) + + Metadata parsing and content resolution are handled by SkillCompiler. + """ def parse( self, - node_id: str, + asset_id: str, path: str, file_name: str, extension: str, storage_key: str, - ) -> SkillAsset: - try: - return self._parse_skill_asset(node_id, path, file_name, extension, storage_key) - except Exception: - logger.exception("Failed to parse skill asset %s", node_id) - # handle as plain text - return SkillAsset( - node_id=node_id, - path=path, - file_name=file_name, - extension=extension, - storage_key=storage_key, - metadata=SkillMetadata(), - ) - - def _parse_skill_asset( - self, node_id: str, path: str, file_name: str, extension: str, storage_key: str - ) -> SkillAsset: + ) -> AssetItem: try: data = json.loads(storage.load_once(storage_key)) - except (json.JSONDecodeError, UnicodeDecodeError): - # handle as plain text + if not isinstance(data, dict): + raise ValueError(f"Skill document {asset_id} must be a JSON object") + + metadata_raw: dict[str, Any] = data.get("metadata", {}) + return SkillAsset( - node_id=node_id, + asset_id=asset_id, path=path, file_name=file_name, extension=extension, storage_key=storage_key, - metadata=SkillMetadata(), + metadata=metadata_raw, ) - - if not isinstance(data, dict): - raise ValueError(f"Skill document {node_id} must be a JSON object") - - data_dict: dict[str, Any] = data - metadata_raw = data_dict.get("metadata", {}) - content = data_dict.get("content", "") - - if not isinstance(content, str): - raise ValueError(f"Skill document {node_id} 'content' must be a string") - - resolved_key = AssetPaths.build_resolved_file(self._tenant_id, self._app_id, self._assets_id, node_id) - current_file = self._tree.get(node_id) - if current_file is None: - raise ValueError(f"File not found for id={node_id}") - - metadata = self._resolve_metadata(content, metadata_raw) - storage.save(resolved_key, self._resolve_content(current_file, content, metadata).encode("utf-8")) - - return SkillAsset( - node_id=node_id, - path=path, - file_name=file_name, - extension=extension, - storage_key=resolved_key, - metadata=metadata, - ) - - def _resolve_content(self, current_file: AppAssetNode, content: str, metadata: SkillMetadata) -> str: - for match in FILE_REFERENCE_PATTERN.finditer(content): - # replace with file relative path - file_id = match.group(2) - file = self._tree.get(file_id) - if file is None: - logger.warning("File not found for id=%s, skipping", file_id) - # replace with file not found placeholder - content = content.replace(match.group(0), "[File not found]") - continue - content = content.replace(match.group(0), self._tree.relative_path(current_file, file)) - - for match in TOOL_REFERENCE_PATTERN.finditer(content): - tool_id = match.group(3) - tool = metadata.tools.get(tool_id) - if tool is None: - logger.warning("Tool not found for id=%s, skipping", tool_id) - # replace with tool not found placeholder - content = content.replace(match.group(0), f"[Tool not found: {tool_id}]") - continue - content = content.replace(match.group(0), f"[Bash Command: {tool.tool_name}_{tool_id}]") - return content - - def _resolve_file_references(self, content: str) -> list[FileReference]: - file_references: list[FileReference] = [] - for match in FILE_REFERENCE_PATTERN.finditer(content): - file_references.append( - FileReference( - source=match.group(1), - uuid=match.group(2), - ) + except Exception: + logger.exception("Failed to parse skill asset %s", asset_id) + return FileAsset( + asset_id=asset_id, + path=path, + file_name=file_name, + extension=extension, + storage_key=storage_key, ) - return file_references - - def _resolve_tool_references(self, content: str, tools: dict[str, Any]) -> dict[str, ToolReference]: - tool_references: dict[str, ToolReference] = {} - for match in TOOL_REFERENCE_PATTERN.finditer(content): - tool_id = match.group(3) - tool_name = match.group(2) - tool_provider = match.group(1) - metadata = tools.get(tool_id) - if metadata is None: - raise ValueError(f"Tool metadata for {tool_id} not found") - - configuration = ToolConfiguration.model_validate(metadata.get("configuration", {})) - tool_references[tool_id] = ToolReference( - uuid=tool_id, - type=ToolProviderType.value_of(metadata.get("type", None)), - provider=tool_provider, - tool_name=tool_name, - credential_id=metadata.get("credential_id", None), - configuration=configuration, - ) - return tool_references - - def _resolve_metadata(self, content: str, metadata: dict[str, Any]) -> SkillMetadata: - return SkillMetadata( - files=self._resolve_file_references(content=content), - tools=self._resolve_tool_references(content=content, tools=metadata.get("tools", {})), - ) diff --git a/api/core/app_assets/paths.py b/api/core/app_assets/paths.py index 3c80891f74..c30f73f7fc 100644 --- a/api/core/app_assets/paths.py +++ b/api/core/app_assets/paths.py @@ -16,3 +16,7 @@ class AssetPaths: @staticmethod def build_tool_artifact(tenant_id: str, app_id: str, assets_id: str) -> str: return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/tool_artifact.json" + + @staticmethod + def build_skill_artifact_set(tenant_id: str, app_id: str, assets_id: str) -> str: + return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/skill_artifact_set.json" diff --git a/api/core/sandbox/bash/dify_cli.py b/api/core/sandbox/bash/dify_cli.py index df6dff992c..055f65465c 100644 --- a/api/core/sandbox/bash/dify_cli.py +++ b/api/core/sandbox/bash/dify_cli.py @@ -6,10 +6,9 @@ from typing import TYPE_CHECKING, Any from pydantic import BaseModel, Field from core.app.entities.app_invoke_entities import InvokeFrom -from core.app_assets.entities import ToolReference from core.model_runtime.utils.encoders import jsonable_encoder from core.session.cli_api import CliApiSession -from core.skill.entities import ToolArtifact +from core.skill.entities import ToolArtifact, ToolReference from core.tools.entities.tool_entities import ToolParameter, ToolProviderType from core.tools.tool_manager import ToolManager from core.virtual_environment.__base.entities import Arch, OperatingSystem diff --git a/api/core/skill/__init__.py b/api/core/skill/__init__.py index e6525cad61..6515a514ab 100644 --- a/api/core/skill/__init__.py +++ b/api/core/skill/__init__.py @@ -1,6 +1,4 @@ -from core.app_assets.entities import ToolReference - -from .entities import ToolArtifact, ToolDependency +from .entities import ToolArtifact, ToolDependency, ToolReference from .skill_manager import SkillManager __all__ = [ diff --git a/api/core/skill/entities/__init__.py b/api/core/skill/entities/__init__.py index ca57927dcb..bc650f83b0 100644 --- a/api/core/skill/entities/__init__.py +++ b/api/core/skill/entities/__init__.py @@ -1,6 +1,27 @@ +from .file_artifact import FilesArtifact +from .skill_artifact import SkillArtifact, SkillSourceInfo +from .skill_artifact_set import SkillArtifactSet +from .skill_document import SkillDocument +from .skill_metadata import ( + FileReference, + SkillMetadata, + ToolConfiguration, + ToolFieldConfig, + ToolReference, +) from .tool_artifact import ToolArtifact, ToolDependency __all__ = [ + "FileReference", + "FilesArtifact", + "SkillArtifact", + "SkillArtifactSet", + "SkillDocument", + "SkillMetadata", + "SkillSourceInfo", "ToolArtifact", + "ToolConfiguration", "ToolDependency", + "ToolFieldConfig", + "ToolReference", ] diff --git a/api/core/skill/entities/file_artifact.py b/api/core/skill/entities/file_artifact.py new file mode 100644 index 0000000000..629dd40972 --- /dev/null +++ b/api/core/skill/entities/file_artifact.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel, ConfigDict, Field + +from core.skill.entities.skill_metadata import FileReference + + +class FilesArtifact(BaseModel): + """ + File artifact - contains all file references (transitive closure) + """ + + model_config = ConfigDict(extra="forbid") + + references: list[FileReference] = Field(default_factory=list, description="All file references") diff --git a/api/core/skill/entities/skill_artifact.py b/api/core/skill/entities/skill_artifact.py index e69de29bb2..0ee14914c2 100644 --- a/api/core/skill/entities/skill_artifact.py +++ b/api/core/skill/entities/skill_artifact.py @@ -0,0 +1,30 @@ +from pydantic import BaseModel, ConfigDict, Field + +from core.skill.entities.file_artifact import FilesArtifact +from core.skill.entities.tool_artifact import ToolArtifact + + +class SkillSourceInfo(BaseModel): + """Source file information for change detection.""" + + model_config = ConfigDict(extra="forbid") + + asset_id: str = Field(description="Asset ID of the source skill file") + content_digest: str = Field(description="Hash of the original content for change detection") + + +class SkillArtifact(BaseModel): + """ + Compiled artifact for a single skill. + + Contains the transitive closure of all tool and file dependencies, + plus the resolved content with all references replaced. + """ + + model_config = ConfigDict(extra="forbid") + + skill_id: str = Field(description="Unique identifier for this skill") + source: SkillSourceInfo = Field(description="Source file information") + tools: ToolArtifact = Field(description="All tool dependencies (transitive closure)") + files: FilesArtifact = Field(description="All file references (transitive closure)") + content: str = Field(description="Resolved content with all references replaced") diff --git a/api/core/skill/entities/skill_artifact_set.py b/api/core/skill/entities/skill_artifact_set.py new file mode 100644 index 0000000000..5a4983e5e3 --- /dev/null +++ b/api/core/skill/entities/skill_artifact_set.py @@ -0,0 +1,105 @@ +from collections.abc import Iterable +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + +from core.skill.entities.skill_artifact import SkillArtifact +from core.skill.entities.skill_metadata import ToolReference +from core.skill.entities.tool_artifact import ToolArtifact, ToolDependency + + +class SkillArtifactSet(BaseModel): + """ + Compiled index for an entire skill project. + + - Corresponds to a single JSON file in S3 + - Load once, query multiple times + - All persistence operations handled by SkillManager + """ + + model_config = ConfigDict(extra="forbid") + + assets_id: str = Field(description="Assets ID this artifact set belongs to") + schema_version: int = Field(default=1, description="Schema version for forward compatibility") + built_at: datetime | None = Field(default=None, description="Build timestamp") + + items: dict[str, SkillArtifact] = Field(default_factory=dict, description="skill_id -> SkillArtifact") + + dependency_graph: dict[str, list[str]] = Field( + default_factory=dict, + description="skill_id -> list of skill_ids it depends on", + ) + + reverse_graph: dict[str, list[str]] = Field( + default_factory=dict, + description="skill_id -> list of skill_ids that depend on it", + ) + + def get(self, skill_id: str) -> SkillArtifact | None: + return self.items.get(skill_id) + + def upsert(self, artifact: SkillArtifact) -> None: + self.items[artifact.skill_id] = artifact + + def remove(self, skill_id: str) -> None: + self.items.pop(skill_id, None) + self.dependency_graph.pop(skill_id, None) + self.reverse_graph.pop(skill_id, None) + for deps in self.reverse_graph.values(): + if skill_id in deps: + deps.remove(skill_id) + for deps in self.dependency_graph.values(): + if skill_id in deps: + deps.remove(skill_id) + + def referenced_skill_ids(self, skill_id: str) -> set[str]: + return set(self.dependency_graph.get(skill_id, [])) + + def recompile_group_ids(self, skill_id: str) -> set[str]: + result: set[str] = {skill_id} + queue = [skill_id] + while queue: + current = queue.pop() + for dependent in self.reverse_graph.get(current, []): + if dependent not in result: + result.add(dependent) + queue.append(dependent) + return result + + def subset(self, skill_ids: Iterable[str]) -> "SkillArtifactSet": + skill_id_set = set(skill_ids) + return SkillArtifactSet( + assets_id=self.assets_id, + schema_version=self.schema_version, + built_at=self.built_at, + items={sid: self.items[sid] for sid in skill_id_set if sid in self.items}, + dependency_graph={ + sid: [dep for dep in deps if dep in skill_id_set] + for sid, deps in self.dependency_graph.items() + if sid in skill_id_set + }, + reverse_graph={ + sid: [dep for dep in deps if dep in skill_id_set] + for sid, deps in self.reverse_graph.items() + if sid in skill_id_set + }, + ) + + def get_tool_artifact(self) -> ToolArtifact: + dependencies: dict[str, ToolDependency] = {} + references: dict[str, ToolReference] = {} + + for artifact in self.items.values(): + for dep in artifact.tools.dependencies: + key = f"{dep.provider}.{dep.tool_name}" + if key not in dependencies: + dependencies[key] = dep + + for ref in artifact.tools.references: + if ref.uuid not in references: + references[ref.uuid] = ref + + return ToolArtifact( + dependencies=list(dependencies.values()), + references=list(references.values()), + ) diff --git a/api/core/skill/entities/skill_document.py b/api/core/skill/entities/skill_document.py new file mode 100644 index 0000000000..8d31176325 --- /dev/null +++ b/api/core/skill/entities/skill_document.py @@ -0,0 +1,14 @@ +from collections.abc import Mapping +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class SkillDocument(BaseModel): + """Input document for skill compilation.""" + + model_config = ConfigDict(extra="forbid") + + skill_id: str = Field(description="Unique identifier, must match SkillAsset.asset_id") + content: str = Field(description="Raw content with reference placeholders") + metadata: Mapping[str, Any] = Field(default_factory=dict, description="Raw metadata dict") diff --git a/api/core/skill/entities/skill_metadata.py b/api/core/skill/entities/skill_metadata.py new file mode 100644 index 0000000000..7778ac1830 --- /dev/null +++ b/api/core/skill/entities/skill_metadata.py @@ -0,0 +1,47 @@ +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from core.tools.entities.tool_entities import ToolProviderType + + +class ToolFieldConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + id: str + value: Any + auto: bool = False + + +class ToolConfiguration(BaseModel): + model_config = ConfigDict(extra="forbid") + + fields: list[ToolFieldConfig] = Field(default_factory=list) + + def default_values(self) -> dict[str, Any]: + return {field.id: field.value for field in self.fields if field.value is not None} + + +class ToolReference(BaseModel): + model_config = ConfigDict(extra="forbid") + + uuid: str + type: ToolProviderType + provider: str + tool_name: str + credential_id: str | None = None + configuration: ToolConfiguration | None = None + + +class FileReference(BaseModel): + model_config = ConfigDict(extra="forbid") + + source: str + asset_id: str + + +class SkillMetadata(BaseModel): + model_config = ConfigDict(extra="allow") + + tools: dict[str, ToolReference] = Field(default_factory=dict) + files: list[FileReference] = Field(default_factory=list) diff --git a/api/core/skill/entities/tool_artifact.py b/api/core/skill/entities/tool_artifact.py index c19d2d0fa2..dcd6d682b0 100644 --- a/api/core/skill/entities/tool_artifact.py +++ b/api/core/skill/entities/tool_artifact.py @@ -1,6 +1,6 @@ from pydantic import BaseModel, ConfigDict, Field -from core.app_assets.entities import ToolReference +from core.skill.entities.skill_metadata import ToolReference from core.tools.entities.tool_entities import ToolProviderType @@ -15,16 +15,8 @@ class ToolDependency(BaseModel): class ToolArtifact(BaseModel): model_config = ConfigDict(extra="forbid") - dependencies: list[ToolDependency] = Field(default_factory=list, description="List of tool dependencies") - - references: list[ToolReference] = Field(default_factory=list, description="List of tool references") - - """ - Filter the tool artifact to only include the given tools - - :param tools: Tuple of (provider, tool_name) - :return: Filtered tool artifact - """ + dependencies: list[ToolDependency] = Field(default_factory=list) + references: list[ToolReference] = Field(default_factory=list) def is_empty(self) -> bool: return not self.dependencies and not self.references diff --git a/api/core/skill/skill_compiler.py b/api/core/skill/skill_compiler.py new file mode 100644 index 0000000000..a078bb1da5 --- /dev/null +++ b/api/core/skill/skill_compiler.py @@ -0,0 +1,259 @@ +import hashlib +import logging +import re +from collections.abc import Mapping +from datetime import UTC, datetime +from typing import Any + +from core.app.entities.app_asset_entities import AppAssetFileTree +from core.skill.entities.file_artifact import FilesArtifact +from core.skill.entities.skill_artifact import SkillArtifact, SkillSourceInfo +from core.skill.entities.skill_artifact_set import SkillArtifactSet +from core.skill.entities.skill_document import SkillDocument +from core.skill.entities.skill_metadata import ( + FileReference, + SkillMetadata, + ToolConfiguration, + ToolReference, +) +from core.skill.entities.tool_artifact import ToolArtifact, ToolDependency +from core.tools.entities.tool_entities import ToolProviderType + +logger = logging.getLogger(__name__) + +TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") +FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") + + +class SkillCompiler: + """ + Stateless skill compiler. + + Responsibilities: + - Parse raw metadata dict into SkillMetadata + - Parse direct dependencies from skill content + - Compute transitive closure based on existing artifact set + - Resolve content by replacing references + - Generate SkillArtifact + """ + + def _parse_metadata(self, content: str, raw_metadata: Mapping[str, Any]) -> SkillMetadata: + tools_raw: dict[str, Any] = dict(raw_metadata.get("tools", {})) + tools: dict[str, ToolReference] = {} + files: list[FileReference] = [] + + for match in TOOL_REFERENCE_PATTERN.finditer(content): + tool_id = match.group(3) + tool_name = match.group(2) + tool_provider = match.group(1) + tool_meta = tools_raw.get(tool_id) + if tool_meta is None: + continue + + config_raw = tool_meta.get("configuration", {}) + configuration = ToolConfiguration.model_validate(config_raw) if config_raw else None + tools[tool_id] = ToolReference( + uuid=tool_id, + type=ToolProviderType.value_of(tool_meta.get("type")), + provider=tool_provider, + tool_name=tool_name, + credential_id=tool_meta.get("credential_id"), + configuration=configuration, + ) + + for match in FILE_REFERENCE_PATTERN.finditer(content): + files.append( + FileReference( + source=match.group(1), + asset_id=match.group(2), + ) + ) + + return SkillMetadata(tools=tools, files=files) + + def compile_all( + self, + documents: list[SkillDocument], + file_tree: AppAssetFileTree, + assets_id: str, + ) -> SkillArtifactSet: + artifact_set = SkillArtifactSet( + assets_id=assets_id, + built_at=datetime.now(UTC), + ) + + doc_map: dict[str, SkillDocument] = {doc.skill_id: doc for doc in documents} + parsed_metadata: dict[str, SkillMetadata] = {} + + for doc in documents: + metadata = self._parse_metadata(doc.content, doc.metadata) + parsed_metadata[doc.skill_id] = metadata + direct_skill_refs = self._extract_skill_refs(metadata, doc_map) + artifact_set.dependency_graph[doc.skill_id] = list(direct_skill_refs) + for ref_id in direct_skill_refs: + if ref_id not in artifact_set.reverse_graph: + artifact_set.reverse_graph[ref_id] = [] + artifact_set.reverse_graph[ref_id].append(doc.skill_id) + + for doc in documents: + metadata = parsed_metadata[doc.skill_id] + artifact = self._compile_single(doc, metadata, artifact_set, parsed_metadata, file_tree) + artifact_set.upsert(artifact) + + return artifact_set + + def compile_one( + self, + artifact_set: SkillArtifactSet, + document: SkillDocument, + file_tree: AppAssetFileTree, + all_documents: dict[str, SkillDocument] | None = None, + ) -> SkillArtifact: + doc_map = all_documents or {} + if document.skill_id not in doc_map: + doc_map[document.skill_id] = document + + parsed_metadata: dict[str, SkillMetadata] = {} + for skill_id, doc in doc_map.items(): + parsed_metadata[skill_id] = self._parse_metadata(doc.content, doc.metadata) + + metadata = parsed_metadata[document.skill_id] + direct_skill_refs = self._extract_skill_refs(metadata, doc_map) + artifact_set.dependency_graph[document.skill_id] = list(direct_skill_refs) + for ref_id in direct_skill_refs: + if ref_id not in artifact_set.reverse_graph: + artifact_set.reverse_graph[ref_id] = [] + if document.skill_id not in artifact_set.reverse_graph[ref_id]: + artifact_set.reverse_graph[ref_id].append(document.skill_id) + + return self._compile_single(document, metadata, artifact_set, parsed_metadata, file_tree) + + def _compile_single( + self, + document: SkillDocument, + metadata: SkillMetadata, + artifact_set: SkillArtifactSet, + parsed_metadata: dict[str, SkillMetadata], + file_tree: AppAssetFileTree, + ) -> SkillArtifact: + all_tools, all_files = self._compute_transitive_closure( + document.skill_id, artifact_set, parsed_metadata + ) + + current_node = file_tree.get(document.skill_id) + + resolved_content = self._resolve_content( + document.content, metadata, current_node, file_tree + ) + + content_digest = hashlib.sha256(document.content.encode("utf-8")).hexdigest() + + return SkillArtifact( + skill_id=document.skill_id, + source=SkillSourceInfo( + asset_id=document.skill_id, + content_digest=content_digest, + ), + tools=ToolArtifact( + dependencies=list(all_tools.values()), + references=list(metadata.tools.values()), + ), + files=FilesArtifact( + references=list(all_files.values()), + ), + content=resolved_content, + ) + + def _extract_skill_refs( + self, + metadata: SkillMetadata, + doc_map: dict[str, SkillDocument], + ) -> set[str]: + skill_refs: set[str] = set() + for file_ref in metadata.files: + if file_ref.asset_id in doc_map: + skill_refs.add(file_ref.asset_id) + return skill_refs + + def _compute_transitive_closure( + self, + skill_id: str, + artifact_set: SkillArtifactSet, + parsed_metadata: dict[str, SkillMetadata], + ) -> tuple[dict[str, ToolDependency], dict[str, FileReference]]: + all_tools: dict[str, ToolDependency] = {} + all_files: dict[str, FileReference] = {} + + visited: set[str] = set() + queue = [skill_id] + + while queue: + current_id = queue.pop(0) + if current_id in visited: + continue + visited.add(current_id) + + metadata = parsed_metadata.get(current_id) + if metadata is None: + existing_artifact = artifact_set.get(current_id) + if existing_artifact: + for dep in existing_artifact.tools.dependencies: + key = f"{dep.provider}.{dep.tool_name}" + if key not in all_tools: + all_tools[key] = dep + for file_ref in existing_artifact.files.references: + if file_ref.asset_id not in all_files: + all_files[file_ref.asset_id] = file_ref + continue + + for tool_ref in metadata.tools.values(): + key = f"{tool_ref.provider}.{tool_ref.tool_name}" + if key not in all_tools: + all_tools[key] = ToolDependency( + type=tool_ref.type, + provider=tool_ref.provider, + tool_name=tool_ref.tool_name, + ) + + for file_ref in metadata.files: + if file_ref.asset_id not in all_files: + all_files[file_ref.asset_id] = file_ref + + for dep_id in artifact_set.dependency_graph.get(current_id, []): + if dep_id not in visited: + queue.append(dep_id) + + return all_tools, all_files + + def _resolve_content( + self, + content: str, + metadata: SkillMetadata, + current_node: Any, + file_tree: AppAssetFileTree, + ) -> str: + if not content: + return content + + for match in FILE_REFERENCE_PATTERN.finditer(content): + file_id = match.group(2) + file_node = file_tree.get(file_id) + if file_node is None: + logger.warning("File not found for id=%s, skipping", file_id) + content = content.replace(match.group(0), "[File not found]") + continue + if current_node is not None: + content = content.replace(match.group(0), file_tree.relative_path(current_node, file_node)) + else: + content = content.replace(match.group(0), f"[{file_node.name}]") + + for match in TOOL_REFERENCE_PATTERN.finditer(content): + tool_id = match.group(3) + tool = metadata.tools.get(tool_id) + if tool is None: + logger.warning("Tool not found for id=%s, skipping", tool_id) + content = content.replace(match.group(0), f"[Tool not found: {tool_id}]") + continue + content = content.replace(match.group(0), f"[Bash Command: {tool.tool_name}_{tool_id}]") + + return content diff --git a/api/core/skill/skill_manager.py b/api/core/skill/skill_manager.py index e76ed0711f..16021561f5 100644 --- a/api/core/skill/skill_manager.py +++ b/api/core/skill/skill_manager.py @@ -1,7 +1,9 @@ +from core.app.entities.app_asset_entities import AppAssetFileTree from core.app_assets.entities import SkillAsset -from core.app_assets.entities.skill import ToolReference from core.app_assets.paths import AssetPaths -from core.skill.entities.tool_artifact import ToolDependency +from core.skill.entities.skill_artifact_set import SkillArtifactSet +from core.skill.entities.skill_document import SkillDocument +from core.skill.skill_compiler import SkillCompiler from extensions.ext_storage import storage from .entities import ToolArtifact @@ -9,29 +11,14 @@ from .entities import ToolArtifact class SkillManager: @staticmethod - def generate_tool_artifact(assets: list[SkillAsset]) -> ToolArtifact: - # provider + tool_name -> ToolDependency - dependencies: dict[str, ToolDependency] = {} - references: list[ToolReference] = [] + def _load_content(storage_key: str) -> str: + import json - for asset in assets: - for id, tool in asset.metadata.tools.items(): - dependencies[f"{tool.provider}.{tool.tool_name}"] = ToolDependency( - type=tool.type, - provider=tool.provider, - tool_name=tool.tool_name, - ) - - references.append( - ToolReference( - uuid=id, - type=tool.type, - provider=tool.provider, - tool_name=tool.tool_name, - ) - ) - - return ToolArtifact(dependencies=list(dependencies.values()), references=references) + try: + data = json.loads(storage.load_once(storage_key)) + return data.get("content", "") if isinstance(data, dict) else "" + except Exception: + return "" @staticmethod def save_tool_artifact( @@ -55,3 +42,49 @@ class SkillManager: return ToolArtifact.model_validate_json(data) except Exception: return None + + @staticmethod + def compile_all( + documents: list[SkillDocument], + file_tree: AppAssetFileTree, + assets_id: str, + ) -> SkillArtifactSet: + compiler = SkillCompiler() + return compiler.compile_all(documents, file_tree, assets_id) + + @staticmethod + def assets_to_documents(assets: list[SkillAsset]) -> list[SkillDocument]: + documents: list[SkillDocument] = [] + for asset in assets: + content = SkillManager._load_content(asset.storage_key) + documents.append( + SkillDocument( + skill_id=asset.asset_id, + content=content, + metadata=asset.metadata, + ) + ) + return documents + + @staticmethod + def load_artifact( + tenant_id: str, + app_id: str, + assets_id: str, + ) -> SkillArtifactSet | None: + key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id) + try: + data = storage.load_once(key) + return SkillArtifactSet.model_validate_json(data) + except Exception: + return None + + @staticmethod + def save_artifact( + tenant_id: str, + app_id: str, + assets_id: str, + artifact_set: SkillArtifactSet, + ) -> None: + key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id) + storage.save(key, artifact_set.model_dump_json(indent=2).encode("utf-8")) diff --git a/api/services/app_asset_service.py b/api/services/app_asset_service.py index c4282264ed..76db7ce044 100644 --- a/api/services/app_asset_service.py +++ b/api/services/app_asset_service.py @@ -12,12 +12,9 @@ from core.app.entities.app_asset_entities import ( TreeParentNotFoundError, TreePathConflictError, ) -from core.app_assets.entities import SkillAsset +from core.app_assets.builder import AssetBuildPipeline, BuildContext from core.app_assets.packager.zip_packager import ZipPackager -from core.app_assets.parser.asset_parser import AssetParser -from core.app_assets.parser.skill_parser import SkillAssetParser from core.app_assets.paths import AssetPaths -from core.skill.skill_manager import SkillManager from extensions.ext_database import db from extensions.ext_storage import storage from extensions.storage.file_presign_storage import FilePresignStorage @@ -315,28 +312,11 @@ class AppAssetService: session.add(published) session.flush() - parser = AssetParser(tree, tenant_id, app_id) - parser.register( - "md", - SkillAssetParser(tenant_id, app_id, publish_id, tree), - ) + ctx = BuildContext(tenant_id=tenant_id, app_id=app_id, build_id=publish_id) + built_assets = AssetBuildPipeline().build_all(tree, ctx) - assets = parser.parse() - artifact = SkillManager.generate_tool_artifact( - assets=[asset for asset in assets if isinstance(asset, SkillAsset)] - ) - - SkillManager.save_tool_artifact( - tenant_id, - app_id, - publish_id, - artifact, - ) - - # TODO: use VM zip packager and make this process async packager = ZipPackager(storage) - - zip_bytes = packager.package(assets) + zip_bytes = packager.package(built_assets) zip_key = AssetPaths.build_zip(tenant_id, app_id, publish_id) storage.save(zip_key, zip_bytes) @@ -348,26 +328,11 @@ class AppAssetService: def build_assets(tenant_id: str, app_id: str, assets: AppAssets) -> None: tree = assets.asset_tree - parser = AssetParser(tree, tenant_id, app_id) - parser.register( - "md", - SkillAssetParser(tenant_id, app_id, assets.id, tree), - ) - - parsed_assets = parser.parse() - artifact = SkillManager.generate_tool_artifact( - assets=[asset for asset in parsed_assets if isinstance(asset, SkillAsset)] - ) - - SkillManager.save_tool_artifact( - tenant_id, - app_id, - assets.id, - artifact, - ) + ctx = BuildContext(tenant_id=tenant_id, app_id=app_id, build_id=assets.id) + built_assets = AssetBuildPipeline().build_all(tree, ctx) packager = ZipPackager(storage) - zip_bytes = packager.package(parsed_assets) + zip_bytes = packager.package(built_assets) zip_key = AssetPaths.build_zip(tenant_id, app_id, assets.id) storage.save(zip_key, zip_bytes) diff --git a/api/tests/unit_tests/core/skill/__init__.py b/api/tests/unit_tests/core/skill/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/skill/test_skill_compiler.py b/api/tests/unit_tests/core/skill/test_skill_compiler.py new file mode 100644 index 0000000000..a12870ee68 --- /dev/null +++ b/api/tests/unit_tests/core/skill/test_skill_compiler.py @@ -0,0 +1,1177 @@ +from typing import Any + +from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode +from core.skill.entities.skill_artifact_set import SkillArtifactSet +from core.skill.entities.skill_document import SkillDocument +from core.skill.entities.skill_metadata import FileReference, ToolConfiguration, ToolReference +from core.skill.skill_compiler import SkillCompiler +from core.tools.entities.tool_entities import ToolProviderType + + +def create_file_tree(*nodes: AppAssetNode) -> AppAssetFileTree: + tree = AppAssetFileTree() + for node in nodes: + tree.nodes.append(node) + return tree + + +def make_metadata( + tools: dict[str, ToolReference] | None = None, + files: list[FileReference] | None = None, +) -> dict[str, Any]: + result: dict[str, Any] = {"tools": {}} + if tools: + for tool_id, tool in tools.items(): + result["tools"][tool_id] = { + "type": tool.type.value, + "credential_id": tool.credential_id, + "configuration": tool.configuration.model_dump() if tool.configuration else {}, + } + return result + + +class TestSkillCompilerBasic: + def test_compile_single_skill_no_dependencies(self): + # given + doc = SkillDocument( + skill_id="skill-1", + content="This is a simple skill with no references.", + metadata=make_metadata(tools={}, files=[]), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-1", "skill.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc], tree, "assets-1") + + # then + assert artifact_set.assets_id == "assets-1" + assert len(artifact_set.items) == 1 + + artifact = artifact_set.get("skill-1") + assert artifact is not None + assert artifact.skill_id == "skill-1" + assert artifact.content == "This is a simple skill with no references." + assert len(artifact.tools.dependencies) == 0 + assert len(artifact.files.references) == 0 + + def test_compile_skill_with_tool_reference(self): + # given + tool_ref = ToolReference( + uuid="tool-uuid-1", + type=ToolProviderType.BUILT_IN, + provider="sandbox", + tool_name="bash", + credential_id=None, + configuration=ToolConfiguration(fields=[]), + ) + doc = SkillDocument( + skill_id="skill-1", + content="Run this command: §[tool].[sandbox].[bash].[tool-uuid-1]§", + metadata=make_metadata( + tools={"tool-uuid-1": tool_ref}, + files=[], + ), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-1", "skill.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc], tree, "assets-1") + + # then + artifact = artifact_set.get("skill-1") + assert artifact is not None + assert artifact.content == "Run this command: [Bash Command: bash_tool-uuid-1]" + assert len(artifact.tools.dependencies) == 1 + assert artifact.tools.dependencies[0].provider == "sandbox" + assert artifact.tools.dependencies[0].tool_name == "bash" + + def test_compile_skill_with_file_reference(self): + # given + doc = SkillDocument( + skill_id="skill-1", + content="See this file: §[file].[app].[file-1]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="file-1")], + ), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-1", "skill.md"), + AppAssetNode.create_file("file-1", "readme.txt"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc], tree, "assets-1") + + # then + artifact = artifact_set.get("skill-1") + assert artifact is not None + assert artifact.content == "See this file: ./readme.txt" + assert len(artifact.files.references) == 1 + assert artifact.files.references[0].asset_id == "file-1" + + +class TestSkillCompilerTransitiveDependencies: + def test_compile_skill_with_skill_dependency(self): + # given + # skill-a references skill-b + # skill-b has a tool dependency + tool_ref = ToolReference( + uuid="tool-1", + type=ToolProviderType.BUILT_IN, + provider="sandbox", + tool_name="python", + credential_id=None, + configuration=None, + ) + doc_a = SkillDocument( + skill_id="skill-a", + content="See: §[file].[app].[skill-b]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-b")], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="Run: §[tool].[sandbox].[python].[tool-1]§", + metadata=make_metadata( + tools={"tool-1": tool_ref}, + files=[], + ), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "skill-a.md"), + AppAssetNode.create_file("skill-b", "skill-b.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc_a, doc_b], tree, "assets-1") + + # then + artifact_a = artifact_set.get("skill-a") + assert artifact_a is not None + # skill-a should have transitive tool dependency from skill-b + assert len(artifact_a.tools.dependencies) == 1 + assert artifact_a.tools.dependencies[0].tool_name == "python" + + # dependency graph should show skill-a depends on skill-b + assert "skill-b" in artifact_set.dependency_graph.get("skill-a", []) + # reverse graph should show skill-b is depended by skill-a + assert "skill-a" in artifact_set.reverse_graph.get("skill-b", []) + + def test_compile_chain_dependency(self): + # given + # skill-a -> skill-b -> skill-c + # each has its own tool + tool_a = ToolReference( + uuid="tool-a", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_a" + ) + tool_b = ToolReference( + uuid="tool-b", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_b" + ) + tool_c = ToolReference( + uuid="tool-c", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_c" + ) + + doc_a = SkillDocument( + skill_id="skill-a", + content="A refs B: §[file].[app].[skill-b]§ §[tool].[p].[tool_a].[tool-a]§", + metadata=make_metadata( + tools={"tool-a": tool_a}, + files=[FileReference(source="app", asset_id="skill-b")], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="B refs C: §[file].[app].[skill-c]§ §[tool].[p].[tool_b].[tool-b]§", + metadata=make_metadata( + tools={"tool-b": tool_b}, + files=[FileReference(source="app", asset_id="skill-c")], + ), + ) + doc_c = SkillDocument( + skill_id="skill-c", + content="C is leaf §[tool].[p].[tool_c].[tool-c]§", + metadata=make_metadata( + tools={"tool-c": tool_c}, + files=[], + ), + ) + + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + AppAssetNode.create_file("skill-c", "c.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c], tree, "assets-1") + + # then + artifact_a = artifact_set.get("skill-a") + assert artifact_a is not None + # skill-a should have all 3 tools (own + transitive) + tool_names = {d.tool_name for d in artifact_a.tools.dependencies} + assert tool_names == {"tool_a", "tool_b", "tool_c"} + + artifact_b = artifact_set.get("skill-b") + assert artifact_b is not None + tool_names_b = {d.tool_name for d in artifact_b.tools.dependencies} + assert tool_names_b == {"tool_b", "tool_c"} + + artifact_c = artifact_set.get("skill-c") + assert artifact_c is not None + tool_names_c = {d.tool_name for d in artifact_c.tools.dependencies} + assert tool_names_c == {"tool_c"} + + +class TestSkillArtifactSetQueries: + def test_recompile_group_ids(self): + # given + # skill-a -> skill-b -> skill-c + doc_a = SkillDocument( + skill_id="skill-a", + content="refs B: §[file].[app].[skill-b]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-b")], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="refs C: §[file].[app].[skill-c]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-c")], + ), + ) + doc_c = SkillDocument( + skill_id="skill-c", + content="leaf", + metadata=make_metadata(tools={}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + AppAssetNode.create_file("skill-c", "c.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c], tree, "assets-1") + + # when - if skill-c changes, who needs recompile? + affected = artifact_set.recompile_group_ids("skill-c") + + # then - all upstream skills need recompile + assert affected == {"skill-a", "skill-b", "skill-c"} + + def test_referenced_skill_ids(self): + # given + doc_a = SkillDocument( + skill_id="skill-a", + content="refs B and C: §[file].[app].[skill-b]§ §[file].[app].[skill-c]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="skill-b"), + FileReference(source="app", asset_id="skill-c"), + ], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="B", + metadata=make_metadata(tools={}, files=[]), + ) + doc_c = SkillDocument( + skill_id="skill-c", + content="C", + metadata=make_metadata(tools={}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + AppAssetNode.create_file("skill-c", "c.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c], tree, "assets-1") + + # when + refs = artifact_set.referenced_skill_ids("skill-a") + + # then + assert refs == {"skill-b", "skill-c"} + + +class TestSkillCompilerIncrementalCompile: + def test_compile_one_updates_artifact_set(self): + # given - initial compile + doc_a = SkillDocument( + skill_id="skill-a", + content="original content", + metadata=make_metadata(tools={}, files=[]), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a], tree, "assets-1") + + # when - update skill-a + updated_doc = SkillDocument( + skill_id="skill-a", + content="updated content", + metadata=make_metadata(tools={}, files=[]), + ) + updated_artifact = compiler.compile_one(artifact_set, updated_doc, tree) + artifact_set.upsert(updated_artifact) + + # then + artifact = artifact_set.get("skill-a") + assert artifact is not None + assert artifact.content == "updated content" + + +class TestSkillCompilerEdgeCases: + def test_missing_file_reference_replaced_with_placeholder(self): + # given + doc = SkillDocument( + skill_id="skill-1", + content="See: §[file].[app].[non-existent]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="non-existent")], + ), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-1", "skill.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc], tree, "assets-1") + + # then + artifact = artifact_set.get("skill-1") + assert artifact is not None + assert "[File not found]" in artifact.content + + def test_missing_tool_reference_replaced_with_placeholder(self): + # given + doc = SkillDocument( + skill_id="skill-1", + content="Run: §[tool].[sandbox].[bash].[missing-tool]§", + metadata=make_metadata(tools={}, files=[]), + ) + tree = create_file_tree( + AppAssetNode.create_file("skill-1", "skill.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc], tree, "assets-1") + + # then + artifact = artifact_set.get("skill-1") + assert artifact is not None + assert "[Tool not found: missing-tool]" in artifact.content + + def test_content_digest_changes_when_content_changes(self): + # given + tree = create_file_tree( + AppAssetNode.create_file("skill-1", "skill.md"), + ) + compiler = SkillCompiler() + + doc1 = SkillDocument( + skill_id="skill-1", + content="content version 1", + metadata=make_metadata(tools={}, files=[]), + ) + artifact_set1 = compiler.compile_all([doc1], tree, "assets-1") + artifact1 = artifact_set1.get("skill-1") + assert artifact1 is not None + digest1 = artifact1.source.content_digest + + doc2 = SkillDocument( + skill_id="skill-1", + content="content version 2", + metadata=make_metadata(tools={}, files=[]), + ) + artifact_set2 = compiler.compile_all([doc2], tree, "assets-1") + artifact2 = artifact_set2.get("skill-1") + assert artifact2 is not None + digest2 = artifact2.source.content_digest + + # then + assert digest1 != digest2 + + +class TestSkillCompilerComplexScenarios: + def test_diamond_dependency(self): + # given + # skill-a + # / \ + # skill-b skill-c + # \ / + # skill-d (has tool) + tool_d = ToolReference( + uuid="tool-d", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_d" + ) + + doc_a = SkillDocument( + skill_id="skill-a", + content="A refs B and C: §[file].[app].[skill-b]§ §[file].[app].[skill-c]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="skill-b"), + FileReference(source="app", asset_id="skill-c"), + ], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="B refs D: §[file].[app].[skill-d]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-d")], + ), + ) + doc_c = SkillDocument( + skill_id="skill-c", + content="C refs D: §[file].[app].[skill-d]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-d")], + ), + ) + doc_d = SkillDocument( + skill_id="skill-d", + content="D is leaf with tool: §[tool].[p].[tool_d].[tool-d]§", + metadata=make_metadata( + tools={"tool-d": tool_d}, + files=[], + ), + ) + + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + AppAssetNode.create_file("skill-c", "c.md"), + AppAssetNode.create_file("skill-d", "d.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c, doc_d], tree, "assets-1") + + # then + # skill-a should have tool_d (via both B and C paths, but only once) + artifact_a = artifact_set.get("skill-a") + assert artifact_a is not None + assert len(artifact_a.tools.dependencies) == 1 + assert artifact_a.tools.dependencies[0].tool_name == "tool_d" + + # if skill-d changes, all upstream need recompile + affected = artifact_set.recompile_group_ids("skill-d") + assert affected == {"skill-a", "skill-b", "skill-c", "skill-d"} + + def test_multiple_tools_from_multiple_paths(self): + # given + # skill-a + # / \ + # skill-b skill-c + # (tool_b) (tool_c) + # \ / + # skill-d + # (tool_d) + tool_b = ToolReference( + uuid="tool-b", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_b" + ) + tool_c = ToolReference( + uuid="tool-c", type=ToolProviderType.API, provider="q", tool_name="tool_c" + ) + tool_d = ToolReference( + uuid="tool-d", type=ToolProviderType.WORKFLOW, provider="r", tool_name="tool_d" + ) + + doc_a = SkillDocument( + skill_id="skill-a", + content="A: §[file].[app].[skill-b]§ §[file].[app].[skill-c]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="skill-b"), + FileReference(source="app", asset_id="skill-c"), + ], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="B: §[file].[app].[skill-d]§ §[tool].[p].[tool_b].[tool-b]§", + metadata=make_metadata( + tools={"tool-b": tool_b}, + files=[FileReference(source="app", asset_id="skill-d")], + ), + ) + doc_c = SkillDocument( + skill_id="skill-c", + content="C: §[file].[app].[skill-d]§ §[tool].[q].[tool_c].[tool-c]§", + metadata=make_metadata( + tools={"tool-c": tool_c}, + files=[FileReference(source="app", asset_id="skill-d")], + ), + ) + doc_d = SkillDocument( + skill_id="skill-d", + content="D: §[tool].[r].[tool_d].[tool-d]§", + metadata=make_metadata( + tools={"tool-d": tool_d}, + files=[], + ), + ) + + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + AppAssetNode.create_file("skill-c", "c.md"), + AppAssetNode.create_file("skill-d", "d.md"), + ) + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c, doc_d], tree, "assets-1") + + # then + artifact_a = artifact_set.get("skill-a") + assert artifact_a is not None + tool_names = {d.tool_name for d in artifact_a.tools.dependencies} + assert tool_names == {"tool_b", "tool_c", "tool_d"} + + # verify different tool types are preserved + tool_types = {d.type for d in artifact_a.tools.dependencies} + assert tool_types == {ToolProviderType.BUILT_IN, ToolProviderType.API, ToolProviderType.WORKFLOW} + + def test_deep_nested_folder_structure_with_relative_paths(self): + # given + # /root/ + # main.md (refs helper and asset) + # helpers/ + # helper.md (refs deep asset) + # deep/ + # deep-helper.md + # assets/ + # image.png + folder_root = AppAssetNode.create_folder("folder-root", "root") + folder_helpers = AppAssetNode.create_folder("folder-helpers", "helpers", parent_id="folder-root") + folder_deep = AppAssetNode.create_folder("folder-deep", "deep", parent_id="folder-helpers") + folder_assets = AppAssetNode.create_folder("folder-assets", "assets", parent_id="folder-root") + + file_main = AppAssetNode.create_file("file-main", "main.md", parent_id="folder-root") + file_helper = AppAssetNode.create_file("file-helper", "helper.md", parent_id="folder-helpers") + file_deep = AppAssetNode.create_file("file-deep", "deep-helper.md", parent_id="folder-deep") + file_image = AppAssetNode.create_file("file-image", "image.png", parent_id="folder-assets") + + tree = create_file_tree( + folder_root, folder_helpers, folder_deep, folder_assets, + file_main, file_helper, file_deep, file_image, + ) + + doc_main = SkillDocument( + skill_id="file-main", + content="Main refs helper: §[file].[app].[file-helper]§ and image: §[file].[app].[file-image]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="file-helper"), + FileReference(source="app", asset_id="file-image"), + ], + ), + ) + doc_helper = SkillDocument( + skill_id="file-helper", + content="Helper refs deep: §[file].[app].[file-deep]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="file-deep")], + ), + ) + doc_deep = SkillDocument( + skill_id="file-deep", + content="Deep helper content", + metadata=make_metadata(tools={}, files=[]), + ) + + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc_main, doc_helper, doc_deep], tree, "assets-1") + + # then + artifact_main = artifact_set.get("file-main") + assert artifact_main is not None + # main.md -> helpers/helper.md = ./helpers/helper.md + assert "./helpers/helper.md" in artifact_main.content + # main.md -> assets/image.png = ./assets/image.png + assert "./assets/image.png" in artifact_main.content + + artifact_helper = artifact_set.get("file-helper") + assert artifact_helper is not None + # helpers/helper.md -> helpers/deep/deep-helper.md = ./deep/deep-helper.md + assert "./deep/deep-helper.md" in artifact_helper.content + + def test_skill_with_many_tools_and_files(self): + # given - skill with 10 tools and 5 file references + tools = { + f"tool-{i}": ToolReference( + uuid=f"tool-{i}", + type=ToolProviderType.BUILT_IN, + provider=f"provider-{i}", + tool_name=f"tool_name_{i}", + ) + for i in range(10) + } + files = [ + FileReference(source="app", asset_id=f"file-{i}") + for i in range(5) + ] + + tool_refs_in_content = " ".join( + f"§[tool].[provider-{i}].[tool_name_{i}].[tool-{i}]§" for i in range(10) + ) + file_refs_in_content = " ".join( + f"§[file].[app].[file-{i}]§" for i in range(5) + ) + + doc = SkillDocument( + skill_id="skill-main", + content=f"Tools: {tool_refs_in_content}\nFiles: {file_refs_in_content}", + metadata=make_metadata(tools=tools, files=files), + ) + + nodes = [AppAssetNode.create_file("skill-main", "main.md")] + nodes.extend(AppAssetNode.create_file(f"file-{i}", f"file-{i}.txt") for i in range(5)) + tree = create_file_tree(*nodes) + + compiler = SkillCompiler() + + # when + artifact_set = compiler.compile_all([doc], tree, "assets-1") + + # then + artifact = artifact_set.get("skill-main") + assert artifact is not None + assert len(artifact.tools.dependencies) == 10 + assert len(artifact.tools.references) == 10 + assert len(artifact.files.references) == 5 + + # all tool references should be replaced + for i in range(10): + assert f"[Bash Command: tool_name_{i}_tool-{i}]" in artifact.content + # all file references should be replaced + for i in range(5): + assert f"./file-{i}.txt" in artifact.content + + def test_incremental_compile_with_new_dependency(self): + # given - initial state: skill-a standalone + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + ) + + doc_a_v1 = SkillDocument( + skill_id="skill-a", + content="A standalone", + metadata=make_metadata(tools={}, files=[]), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="B with tool: §[tool].[p].[tool_b].[tool-b]§", + metadata=make_metadata( + tools={ + "tool-b": ToolReference( + uuid="tool-b", + type=ToolProviderType.BUILT_IN, + provider="p", + tool_name="tool_b", + ) + }, + files=[], + ), + ) + + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a_v1, doc_b], tree, "assets-1") + + # skill-a has no dependencies initially + artifact_a_v1 = artifact_set.get("skill-a") + assert artifact_a_v1 is not None + assert len(artifact_a_v1.tools.dependencies) == 0 + + # when - update skill-a to reference skill-b + doc_a_v2 = SkillDocument( + skill_id="skill-a", + content="A now refs B: §[file].[app].[skill-b]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-b")], + ), + ) + doc_map = {"skill-a": doc_a_v2, "skill-b": doc_b} + artifact_a_v2 = compiler.compile_one(artifact_set, doc_a_v2, tree, doc_map) + artifact_set.upsert(artifact_a_v2) + + # then - skill-a now has tool_b from skill-b + artifact_a_final = artifact_set.get("skill-a") + assert artifact_a_final is not None + assert len(artifact_a_final.tools.dependencies) == 1 + assert artifact_a_final.tools.dependencies[0].tool_name == "tool_b" + + # dependency graph updated + assert "skill-b" in artifact_set.dependency_graph.get("skill-a", []) + assert "skill-a" in artifact_set.reverse_graph.get("skill-b", []) + + def test_serialization_roundtrip(self): + # given - complex artifact set + tool = ToolReference( + uuid="tool-1", + type=ToolProviderType.BUILT_IN, + provider="sandbox", + tool_name="bash", + ) + doc_a = SkillDocument( + skill_id="skill-a", + content="A refs B: §[file].[app].[skill-b]§ §[tool].[sandbox].[bash].[tool-1]§", + metadata=make_metadata( + tools={"tool-1": tool}, + files=[FileReference(source="app", asset_id="skill-b")], + ), + ) + doc_b = SkillDocument( + skill_id="skill-b", + content="B leaf", + metadata=make_metadata(tools={}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + ) + compiler = SkillCompiler() + original = compiler.compile_all([doc_a, doc_b], tree, "assets-1") + + # when - serialize and deserialize + json_str = original.model_dump_json() + restored = SkillArtifactSet.model_validate_json(json_str) + + # then - all data preserved + assert restored.assets_id == original.assets_id + assert len(restored.items) == len(original.items) + assert restored.dependency_graph == original.dependency_graph + assert restored.reverse_graph == original.reverse_graph + + original_a = original.get("skill-a") + assert original_a is not None + artifact_a = restored.get("skill-a") + assert artifact_a is not None + assert artifact_a.content == original_a.content + assert len(artifact_a.tools.dependencies) == 1 + + def test_subset_preserves_internal_dependencies(self): + # given + # skill-a -> skill-b -> skill-c -> skill-d + docs = [ + SkillDocument( + skill_id="skill-a", + content="A: §[file].[app].[skill-b]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-b")], + ), + ), + SkillDocument( + skill_id="skill-b", + content="B: §[file].[app].[skill-c]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-c")], + ), + ), + SkillDocument( + skill_id="skill-c", + content="C: §[file].[app].[skill-d]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="skill-d")], + ), + ), + SkillDocument( + skill_id="skill-d", + content="D", + metadata=make_metadata(tools={}, files=[]), + ), + ] + tree = create_file_tree( + AppAssetNode.create_file("skill-a", "a.md"), + AppAssetNode.create_file("skill-b", "b.md"), + AppAssetNode.create_file("skill-c", "c.md"), + AppAssetNode.create_file("skill-d", "d.md"), + ) + compiler = SkillCompiler() + full_set = compiler.compile_all(docs, tree, "assets-1") + + # when - get subset of B and C only + subset = full_set.subset(["skill-b", "skill-c"]) + + # then + assert len(subset.items) == 2 + assert subset.get("skill-b") is not None + assert subset.get("skill-c") is not None + assert subset.get("skill-a") is None + assert subset.get("skill-d") is None + + # internal dependency preserved (B -> C) + assert "skill-c" in subset.dependency_graph.get("skill-b", []) + # external dependencies filtered out + assert "skill-d" not in subset.dependency_graph.get("skill-c", []) + + +class TestSkillCompilerIncrementalRecompile: + def test_single_change_triggers_upstream_recompile(self): + # given + # skill-a -> skill-b -> skill-c (leaf) + # each has unique tool + tool_a = ToolReference(uuid="t-a", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_a") + tool_b = ToolReference(uuid="t-b", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_b") + tool_c = ToolReference(uuid="t-c", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_c") + + doc_a = SkillDocument( + skill_id="a", + content="A content v1 §[tool].[p].[tool_a].[t-a]§ §[file].[app].[b]§", + metadata=make_metadata( + tools={"t-a": tool_a}, + files=[FileReference(source="app", asset_id="b")], + ), + ) + doc_b = SkillDocument( + skill_id="b", + content="B content v1 §[tool].[p].[tool_b].[t-b]§ §[file].[app].[c]§", + metadata=make_metadata( + tools={"t-b": tool_b}, + files=[FileReference(source="app", asset_id="c")], + ), + ) + doc_c = SkillDocument( + skill_id="c", + content="C content v1 §[tool].[p].[tool_c].[t-c]§", + metadata=make_metadata(tools={"t-c": tool_c}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("a", "a.md"), + AppAssetNode.create_file("b", "b.md"), + AppAssetNode.create_file("c", "c.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c], tree, "assets-1") + + original_a_digest = artifact_set.get("a").source.content_digest + original_b_digest = artifact_set.get("b").source.content_digest + original_c_digest = artifact_set.get("c").source.content_digest + + # when - skill-c changes + doc_c_v2 = SkillDocument( + skill_id="c", + content="C content v2 - UPDATED §[tool].[p].[tool_c].[t-c]§", + metadata=make_metadata(tools={"t-c": tool_c}, files=[]), + ) + + # find affected skills using recompile_group_ids + affected_ids = artifact_set.recompile_group_ids("c") + + # then - all upstream skills are affected + assert affected_ids == {"a", "b", "c"} + + # simulate incremental recompile for affected skills + doc_map = {"a": doc_a, "b": doc_b, "c": doc_c_v2} + for skill_id in affected_ids: + updated = compiler.compile_one(artifact_set, doc_map[skill_id], tree, doc_map) + artifact_set.upsert(updated) + + # verify c's content changed + assert artifact_set.get("c").source.content_digest != original_c_digest + assert "v2 - UPDATED" in artifact_set.get("c").content + + # a and b content didn't change (only their dependencies were refreshed) + assert artifact_set.get("a").source.content_digest == original_a_digest + assert artifact_set.get("b").source.content_digest == original_b_digest + + def test_branch_change_only_affects_upstream_branch(self): + # given + # skill-root + # / \ + # skill-left skill-right + # | | + # skill-l-leaf skill-r-leaf + tool = ToolReference(uuid="t", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool") + + doc_root = SkillDocument( + skill_id="root", + content="root §[file].[app].[left]§ §[file].[app].[right]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="left"), + FileReference(source="app", asset_id="right"), + ], + ), + ) + doc_left = SkillDocument( + skill_id="left", + content="left §[file].[app].[l-leaf]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="l-leaf")], + ), + ) + doc_right = SkillDocument( + skill_id="right", + content="right §[file].[app].[r-leaf]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="r-leaf")], + ), + ) + doc_l_leaf = SkillDocument( + skill_id="l-leaf", + content="left leaf §[tool].[p].[tool].[t]§", + metadata=make_metadata(tools={"t": tool}, files=[]), + ) + doc_r_leaf = SkillDocument( + skill_id="r-leaf", + content="right leaf", + metadata=make_metadata(tools={}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("root", "root.md"), + AppAssetNode.create_file("left", "left.md"), + AppAssetNode.create_file("right", "right.md"), + AppAssetNode.create_file("l-leaf", "l-leaf.md"), + AppAssetNode.create_file("r-leaf", "r-leaf.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all( + [doc_root, doc_left, doc_right, doc_l_leaf, doc_r_leaf], tree, "assets-1" + ) + + # when - l-leaf changes + affected_by_l_leaf = artifact_set.recompile_group_ids("l-leaf") + + # then - only left branch + root affected (not right branch) + assert affected_by_l_leaf == {"root", "left", "l-leaf"} + assert "right" not in affected_by_l_leaf + assert "r-leaf" not in affected_by_l_leaf + + # when - r-leaf changes + affected_by_r_leaf = artifact_set.recompile_group_ids("r-leaf") + + # then - only right branch + root affected (not left branch) + assert affected_by_r_leaf == {"root", "right", "r-leaf"} + assert "left" not in affected_by_r_leaf + assert "l-leaf" not in affected_by_r_leaf + + def test_add_new_tool_to_leaf_propagates_to_all_upstream(self): + # given - chain without tools initially + doc_a = SkillDocument( + skill_id="a", + content="A §[file].[app].[b]§", + metadata=make_metadata(tools={}, files=[FileReference(source="app", asset_id="b")]), + ) + doc_b = SkillDocument( + skill_id="b", + content="B §[file].[app].[c]§", + metadata=make_metadata(tools={}, files=[FileReference(source="app", asset_id="c")]), + ) + doc_c = SkillDocument( + skill_id="c", + content="C - no tools", + metadata=make_metadata(tools={}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("a", "a.md"), + AppAssetNode.create_file("b", "b.md"), + AppAssetNode.create_file("c", "c.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c], tree, "assets-1") + + # initially no tools anywhere + assert len(artifact_set.get("a").tools.dependencies) == 0 + assert len(artifact_set.get("b").tools.dependencies) == 0 + assert len(artifact_set.get("c").tools.dependencies) == 0 + + # when - add tool to c + new_tool = ToolReference(uuid="new-t", type=ToolProviderType.BUILT_IN, provider="p", tool_name="new_tool") + doc_c_v2 = SkillDocument( + skill_id="c", + content="C - now has tool: §[tool].[p].[new_tool].[new-t]§", + metadata=make_metadata(tools={"new-t": new_tool}, files=[]), + ) + + # recompile affected + affected = artifact_set.recompile_group_ids("c") + doc_map = {"a": doc_a, "b": doc_b, "c": doc_c_v2} + for skill_id in affected: + updated = compiler.compile_one(artifact_set, doc_map[skill_id], tree, doc_map) + artifact_set.upsert(updated) + + # then - new tool propagated to all upstream + assert len(artifact_set.get("c").tools.dependencies) == 1 + assert len(artifact_set.get("b").tools.dependencies) == 1 + assert len(artifact_set.get("a").tools.dependencies) == 1 + + assert artifact_set.get("a").tools.dependencies[0].tool_name == "new_tool" + assert artifact_set.get("b").tools.dependencies[0].tool_name == "new_tool" + assert artifact_set.get("c").tools.dependencies[0].tool_name == "new_tool" + + def test_remove_dependency_link_affects_recompile_group(self): + # given - a -> b -> c + doc_a = SkillDocument( + skill_id="a", + content="A refs B §[file].[app].[b]§", + metadata=make_metadata(tools={}, files=[FileReference(source="app", asset_id="b")]), + ) + doc_b = SkillDocument( + skill_id="b", + content="B refs C §[file].[app].[c]§", + metadata=make_metadata(tools={}, files=[FileReference(source="app", asset_id="c")]), + ) + doc_c = SkillDocument( + skill_id="c", + content="C leaf", + metadata=make_metadata(tools={}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("a", "a.md"), + AppAssetNode.create_file("b", "b.md"), + AppAssetNode.create_file("c", "c.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c], tree, "assets-1") + + # initially c change affects a, b, c + assert artifact_set.recompile_group_ids("c") == {"a", "b", "c"} + + # when - b no longer refs c + doc_b_v2 = SkillDocument( + skill_id="b", + content="B standalone now", + metadata=make_metadata(tools={}, files=[]), + ) + doc_map = {"a": doc_a, "b": doc_b_v2, "c": doc_c} + + # recompile b (which changes its dependencies) + updated_b = compiler.compile_one(artifact_set, doc_b_v2, tree, doc_map) + artifact_set.upsert(updated_b) + + # then - c change now only affects c (b no longer depends on c) + # note: reverse_graph still has old data until we clean it + # in real usage, we'd rebuild graphs or clean stale entries + assert "c" not in artifact_set.dependency_graph.get("b", []) + + def test_complex_graph_multiple_changes(self): + # given - complex dependency graph + # + # A -----> B -----> E + # | | + # v v + # C -----> D + # + # A depends on B, C + # B depends on D, E + # C depends on D + tool_d = ToolReference(uuid="t-d", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_d") + tool_e = ToolReference(uuid="t-e", type=ToolProviderType.BUILT_IN, provider="p", tool_name="tool_e") + + doc_a = SkillDocument( + skill_id="a", + content="A §[file].[app].[b]§ §[file].[app].[c]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="b"), + FileReference(source="app", asset_id="c"), + ], + ), + ) + doc_b = SkillDocument( + skill_id="b", + content="B §[file].[app].[d]§ §[file].[app].[e]§", + metadata=make_metadata( + tools={}, + files=[ + FileReference(source="app", asset_id="d"), + FileReference(source="app", asset_id="e"), + ], + ), + ) + doc_c = SkillDocument( + skill_id="c", + content="C §[file].[app].[d]§", + metadata=make_metadata( + tools={}, + files=[FileReference(source="app", asset_id="d")], + ), + ) + doc_d = SkillDocument( + skill_id="d", + content="D §[tool].[p].[tool_d].[t-d]§", + metadata=make_metadata(tools={"t-d": tool_d}, files=[]), + ) + doc_e = SkillDocument( + skill_id="e", + content="E §[tool].[p].[tool_e].[t-e]§", + metadata=make_metadata(tools={"t-e": tool_e}, files=[]), + ) + + tree = create_file_tree( + AppAssetNode.create_file("a", "a.md"), + AppAssetNode.create_file("b", "b.md"), + AppAssetNode.create_file("c", "c.md"), + AppAssetNode.create_file("d", "d.md"), + AppAssetNode.create_file("e", "e.md"), + ) + compiler = SkillCompiler() + artifact_set = compiler.compile_all([doc_a, doc_b, doc_c, doc_d, doc_e], tree, "assets-1") + + # verify initial state + a_tools = {t.tool_name for t in artifact_set.get("a").tools.dependencies} + assert a_tools == {"tool_d", "tool_e"} + + # when - d changes, who needs recompile? + affected_by_d = artifact_set.recompile_group_ids("d") + # d is depended by: b, c, and transitively a + assert affected_by_d == {"a", "b", "c", "d"} + assert "e" not in affected_by_d + + # when - e changes, who needs recompile? + affected_by_e = artifact_set.recompile_group_ids("e") + # e is depended by: b, and transitively a + assert affected_by_e == {"a", "b", "e"} + assert "c" not in affected_by_e + assert "d" not in affected_by_e