diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index dad184c54b..d71864302c 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -99,6 +99,11 @@ class AppExportQuery(BaseModel): workflow_id: str | None = Field(default=None, description="Specific workflow ID to export") +class AppExportBundleQuery(BaseModel): + include_secret: bool = Field(default=False, description="Include secrets in export") + workflow_id: str | None = Field(default=None, description="Specific workflow ID to export") + + class AppNamePayload(BaseModel): name: str = Field(..., min_length=1, description="Name to check") @@ -650,6 +655,36 @@ class AppExportApi(Resource): return payload.model_dump(mode="json") +@console_ns.route("/apps//export-bundle") +class AppExportBundleApi(Resource): + @get_app_model + @setup_required + @login_required + @account_initialization_required + @edit_permission_required + def get(self, app_model): + from io import BytesIO + + from flask import send_file + + from services.app_bundle_service import AppBundleService + + args = AppExportBundleQuery.model_validate(request.args.to_dict(flat=True)) + + result = AppBundleService.export_bundle( + app_model=app_model, + include_secret=args.include_secret, + workflow_id=args.workflow_id, + ) + + return send_file( + BytesIO(result.zip_bytes), + mimetype="application/zip", + as_attachment=True, + download_name=result.filename, + ) + + @console_ns.route("/apps//name") class AppNameApi(Resource): @console_ns.doc("check_app_name") diff --git a/api/controllers/console/app/app_asset.py b/api/controllers/console/app/app_asset.py index 80d7673995..e71c4a83c2 100644 --- a/api/controllers/console/app/app_asset.py +++ b/api/controllers/console/app/app_asset.py @@ -243,22 +243,6 @@ class AppAssetNodeReorderResource(Resource): raise AppAssetNodeNotFoundError() -@console_ns.route("/apps//assets/publish") -class AppAssetPublishResource(Resource): - @setup_required - @login_required - @account_initialization_required - @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) - def post(self, app_model: App): - current_user, _ = current_account_with_tenant() - published = AppAssetService.publish(app_model, current_user.id) - return { - "id": published.id, - "version": published.version, - "asset_tree": published.asset_tree.model_dump(), - }, 201 - - @console_ns.route("/apps//assets/files//download-url") class AppAssetFileDownloadUrlResource(Resource): @setup_required diff --git a/api/controllers/console/app/app_import.py b/api/controllers/console/app/app_import.py index 22e2aeb720..eb7c3ce1c3 100644 --- a/api/controllers/console/app/app_import.py +++ b/api/controllers/console/app/app_import.py @@ -51,6 +51,14 @@ class AppImportPayload(BaseModel): app_id: str | None = None +class AppImportBundlePayload(BaseModel): + name: str | None = None + description: str | None = None + icon_type: str | None = None + icon: str | None = None + icon_background: str | None = None + + console_ns.schema_model( AppImportPayload.__name__, AppImportPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0) ) @@ -139,3 +147,55 @@ class AppImportCheckDependenciesApi(Resource): result = import_service.check_dependencies(app_model=app_model) return result.model_dump(mode="json"), 200 + + +@console_ns.route("/apps/imports-bundle") +class AppImportBundleApi(Resource): + @setup_required + @login_required + @account_initialization_required + @marshal_with(app_import_model) + @cloud_edition_billing_resource_check("apps") + @edit_permission_required + def post(self): + from flask import request + + from core.app.entities.app_bundle_entities import BundleFormatError + from services.app_bundle_service import AppBundleService + + current_user, _ = current_account_with_tenant() + + if "file" not in request.files: + return {"error": "No file provided"}, 400 + + file = request.files["file"] + if not file.filename or not file.filename.endswith(".zip"): + return {"error": "Invalid file format, expected .zip"}, 400 + + zip_bytes = file.read() + + form_data = request.form.to_dict() + args = AppImportBundlePayload.model_validate(form_data) + + try: + result = AppBundleService.import_bundle( + account=current_user, + zip_bytes=zip_bytes, + name=args.name, + description=args.description, + icon_type=args.icon_type, + icon=args.icon, + icon_background=args.icon_background, + ) + except BundleFormatError as e: + return {"error": str(e)}, 400 + + if result.app_id and FeatureService.get_system_features().webapp_auth.enabled: + EnterpriseService.WebAppAuth.update_app_access_mode(result.app_id, "private") + + status = result.status + if status == ImportStatus.FAILED: + return result.model_dump(mode="json"), 400 + elif status == ImportStatus.PENDING: + return result.model_dump(mode="json"), 202 + return result.model_dump(mode="json"), 200 diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index 085c5cec3f..29200a3d22 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -686,13 +686,14 @@ class PublishedWorkflowApi(Resource): """ Publish workflow """ + from services.app_bundle_service import AppBundleService + current_user, _ = current_account_with_tenant() args = PublishWorkflowPayload.model_validate(console_ns.payload or {}) - workflow_service = WorkflowService() with Session(db.engine) as session: - workflow = workflow_service.publish_workflow( + workflow = AppBundleService.publish( session=session, app_model=app_model, account=current_user, diff --git a/api/core/app/entities/app_bundle_entities.py b/api/core/app/entities/app_bundle_entities.py new file mode 100644 index 0000000000..b81fec62ae --- /dev/null +++ b/api/core/app/entities/app_bundle_entities.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import re + +from pydantic import BaseModel, Field + +# Constants +BUNDLE_DSL_FILENAME_PATTERN = re.compile(r"^[^/]+\.ya?ml$") +BUNDLE_MAX_SIZE = 50 * 1024 * 1024 # 50MB + + +# Exceptions +class BundleFormatError(Exception): + """Raised when bundle format is invalid.""" + + pass + + +class ZipSecurityError(Exception): + """Raised when zip file contains security violations.""" + + pass + + +# Entities +class BundleExportResult(BaseModel): + zip_bytes: bytes = Field(description="ZIP file content as bytes") + filename: str = Field(description="Suggested filename for the ZIP") + + +class SourceFileEntry(BaseModel): + path: str = Field(description="File path within the ZIP") + node_id: str = Field(description="Node ID in the asset tree") + + +class ExtractedFile(BaseModel): + path: str = Field(description="Relative path of the extracted file") + content: bytes = Field(description="File content as bytes") + + +class ExtractedFolder(BaseModel): + path: str = Field(description="Relative path of the extracted folder") diff --git a/api/core/app_assets/__init__.py b/api/core/app_assets/__init__.py index fa57145176..4490a8eda7 100644 --- a/api/core/app_assets/__init__.py +++ b/api/core/app_assets/__init__.py @@ -4,7 +4,7 @@ from .entities import ( FileAsset, SkillAsset, ) -from .packager import AssetPackager, ZipPackager +from .packager import AssetPackager, AssetZipPackager from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser from .paths import AssetPaths @@ -15,9 +15,9 @@ __all__ = [ "AssetPackager", "AssetParser", "AssetPaths", + "AssetZipPackager", "FileAsset", "FileAssetParser", "SkillAsset", "SkillAssetParser", - "ZipPackager", ] diff --git a/api/core/app_assets/converters.py b/api/core/app_assets/converters.py new file mode 100644 index 0000000000..c610fff542 --- /dev/null +++ b/api/core/app_assets/converters.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from core.app.entities.app_asset_entities import AppAssetFileTree, AssetNodeType +from core.app_assets.entities import FileAsset +from core.app_assets.paths import AssetPaths + + +def tree_to_asset_items( + tree: AppAssetFileTree, + tenant_id: str, + app_id: str, +) -> list[FileAsset]: + """ + Convert AppAssetFileTree to list of FileAsset for packaging. + + Args: + tree: The asset file tree to convert + tenant_id: Tenant ID for storage key generation + app_id: App ID for storage key generation + + Returns: + List of FileAsset items ready for packaging + """ + items: list[FileAsset] = [] + for node in tree.nodes: + if node.node_type == AssetNodeType.FILE: + path = tree.get_path(node.id) + storage_key = AssetPaths.draft_file(tenant_id, app_id, node.id) + items.append( + FileAsset( + asset_id=node.id, + path=path, + file_name=node.name, + extension=node.extension or "", + storage_key=storage_key, + ) + ) + return items diff --git a/api/core/app_assets/packager/__init__.py b/api/core/app_assets/packager/__init__.py index a9bc9147fa..2a52edcaa5 100644 --- a/api/core/app_assets/packager/__init__.py +++ b/api/core/app_assets/packager/__init__.py @@ -1,7 +1,7 @@ +from .asset_zip_packager import AssetZipPackager from .base import AssetPackager -from .zip_packager import ZipPackager __all__ = [ "AssetPackager", - "ZipPackager", + "AssetZipPackager", ] diff --git a/api/core/app_assets/packager/asset_zip_packager.py b/api/core/app_assets/packager/asset_zip_packager.py new file mode 100644 index 0000000000..b48099bf3c --- /dev/null +++ b/api/core/app_assets/packager/asset_zip_packager.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import io +import zipfile +from concurrent.futures import ThreadPoolExecutor +from threading import Lock +from typing import TYPE_CHECKING + +from core.app_assets.entities import AssetItem + +if TYPE_CHECKING: + from extensions.ext_storage import Storage + + +class AssetZipPackager: + """ + Unified ZIP packager for assets. + Automatically creates directory entries from asset paths. + """ + + def __init__(self, storage: Storage, *, max_workers: int = 8) -> None: + self._storage = storage + self._max_workers = max_workers + + def package(self, assets: list[AssetItem], *, prefix: str = "") -> bytes: + """ + Package assets into a ZIP file. + + Args: + assets: List of assets to package + prefix: Optional prefix to add to all paths in the ZIP + + Returns: + ZIP file content as bytes + """ + zip_buffer = io.BytesIO() + + # Extract folder paths from asset paths + folder_paths = self._extract_folder_paths(assets, prefix) + + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: + # Create directory entries + for folder_path in sorted(folder_paths): + zf.writestr(zipfile.ZipInfo(folder_path + "/"), "") + + # Write files in parallel + if assets: + self._write_files_parallel(zf, assets, prefix) + + return zip_buffer.getvalue() + + def _extract_folder_paths(self, assets: list[AssetItem], prefix: str) -> set[str]: + """Extract all folder paths from asset paths.""" + folders: set[str] = set() + for asset in assets: + full_path = f"{prefix}/{asset.path}" if prefix else asset.path + parts = full_path.split("/")[:-1] # Remove filename + folders.update("/".join(parts[:i]) for i in range(1, len(parts) + 1)) + return folders + + def _write_files_parallel( + self, + zf: zipfile.ZipFile, + assets: list[AssetItem], + prefix: str, + ) -> None: + lock = Lock() + + def load_and_write(asset: AssetItem) -> None: + content = self._storage.load_once(asset.get_storage_key()) + full_path = f"{prefix}/{asset.path}" if prefix else asset.path + with lock: + zf.writestr(full_path, content) + + with ThreadPoolExecutor(max_workers=self._max_workers) as executor: + futures = [executor.submit(load_and_write, a) for a in assets] + for future in futures: + future.result() diff --git a/api/core/app_assets/packager/zip_packager.py b/api/core/app_assets/packager/zip_packager.py deleted file mode 100644 index 09a7d860d7..0000000000 --- a/api/core/app_assets/packager/zip_packager.py +++ /dev/null @@ -1,42 +0,0 @@ -import io -import zipfile -from concurrent.futures import Future, ThreadPoolExecutor -from threading import Lock -from typing import TYPE_CHECKING - -from core.app_assets.entities import AssetItem - -from .base import AssetPackager - -if TYPE_CHECKING: - from extensions.ext_storage import Storage - - -class ZipPackager(AssetPackager): - _storage: "Storage" - - def __init__(self, storage: "Storage") -> None: - self._storage = storage - - def package(self, assets: list[AssetItem]) -> bytes: - zip_buffer = io.BytesIO() - - with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: - lock = Lock() - # FOR DELVELPMENT AND TESTING ONLY, TODO: optimize - with ThreadPoolExecutor(max_workers=8) as executor: - futures: list[Future[None]] = [] - for asset in assets: - - def _write_asset(a: AssetItem) -> None: - content = self._storage.load_once(a.get_storage_key()) - with lock: - zf.writestr(a.path, content) - - futures.append(executor.submit(_write_asset, asset)) - - # Wait for all futures to complete - for future in futures: - future.result() - - return zip_buffer.getvalue() diff --git a/api/core/app_assets/paths.py b/api/core/app_assets/paths.py index fd900729ee..2eceadb798 100644 --- a/api/core/app_assets/paths.py +++ b/api/core/app_assets/paths.py @@ -16,3 +16,8 @@ class AssetPaths: @staticmethod def build_skill_artifact_set(tenant_id: str, app_id: str, assets_id: str) -> str: return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/skill_artifact_set.json" + + @staticmethod + def build_source_zip(tenant_id: str, app_id: str, workflow_id: str) -> str: + """Storage key for source assets zip (editable files snapshot at publish time).""" + return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/sources/{workflow_id}.zip" diff --git a/api/core/app_bundle/__init__.py b/api/core/app_bundle/__init__.py new file mode 100644 index 0000000000..5c1c22f206 --- /dev/null +++ b/api/core/app_bundle/__init__.py @@ -0,0 +1,5 @@ +from .source_zip_extractor import SourceZipExtractor + +__all__ = [ + "SourceZipExtractor", +] diff --git a/api/core/app_bundle/source_zip_extractor.py b/api/core/app_bundle/source_zip_extractor.py new file mode 100644 index 0000000000..d5bb6a4cac --- /dev/null +++ b/api/core/app_bundle/source_zip_extractor.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import hashlib +import io +import zipfile +from collections.abc import Callable +from typing import TYPE_CHECKING +from uuid import uuid4 + +from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode +from core.app.entities.app_bundle_entities import ExtractedFile, ExtractedFolder, ZipSecurityError + +if TYPE_CHECKING: + from extensions.ext_storage import Storage + + +class SourceZipExtractor: + def __init__(self, storage: Storage) -> None: + self._storage = storage + + def extract_entries( + self, zip_bytes: bytes, *, expected_prefix: str + ) -> tuple[list[ExtractedFolder], list[ExtractedFile]]: + folders: list[ExtractedFolder] = [] + files: list[ExtractedFile] = [] + + with zipfile.ZipFile(io.BytesIO(zip_bytes), "r") as zf: + for info in zf.infolist(): + name = info.filename + self._validate_path(name) + + if not name.startswith(expected_prefix): + continue + + relative_path = name[len(expected_prefix) :].lstrip("/") + if not relative_path: + continue + + if info.is_dir(): + folders.append(ExtractedFolder(path=relative_path.rstrip("/"))) + else: + content = zf.read(info) + files.append(ExtractedFile(path=relative_path, content=content)) + + return folders, files + + def build_tree_and_save( + self, + folders: list[ExtractedFolder], + files: list[ExtractedFile], + tenant_id: str, + app_id: str, + storage_key_fn: Callable[[str, str, str], str], + ) -> AppAssetFileTree: + tree = AppAssetFileTree() + path_to_node_id: dict[str, str] = {} + + all_folder_paths = {f.path for f in folders} + for file in files: + self._ensure_parent_folders(file.path, all_folder_paths) + + sorted_folders = sorted(all_folder_paths, key=lambda p: p.count("/")) + for folder_path in sorted_folders: + node_id = str(uuid4()) + name = folder_path.rsplit("/", 1)[-1] + parent_path = folder_path.rsplit("/", 1)[0] if "/" in folder_path else None + parent_id = path_to_node_id.get(parent_path) if parent_path else None + + node = AppAssetNode.create_folder(node_id, name, parent_id) + tree.add(node) + path_to_node_id[folder_path] = node_id + + sorted_files = sorted(files, key=lambda f: f.path) + for file in sorted_files: + node_id = str(uuid4()) + name = file.path.rsplit("/", 1)[-1] + parent_path = file.path.rsplit("/", 1)[0] if "/" in file.path else None + parent_id = path_to_node_id.get(parent_path) if parent_path else None + + checksum = hashlib.sha256(file.content).hexdigest() + node = AppAssetNode.create_file(node_id, name, parent_id, len(file.content), checksum) + tree.add(node) + + storage_key = storage_key_fn(tenant_id, app_id, node_id) + self._storage.save(storage_key, file.content) + + return tree + + def _validate_path(self, path: str) -> None: + if ".." in path: + raise ZipSecurityError(f"Path traversal detected: {path}") + if path.startswith("/"): + raise ZipSecurityError(f"Absolute path detected: {path}") + if "\\" in path: + raise ZipSecurityError(f"Backslash in path: {path}") + + def _ensure_parent_folders(self, file_path: str, folder_set: set[str]) -> None: + parts = file_path.split("/")[:-1] + for i in range(1, len(parts) + 1): + parent = "/".join(parts[:i]) + folder_set.add(parent) diff --git a/api/core/sandbox/initializer/app_assets_initializer.py b/api/core/sandbox/initializer/app_assets_initializer.py index ba21739014..5d73ce2bcd 100644 --- a/api/core/sandbox/initializer/app_assets_initializer.py +++ b/api/core/sandbox/initializer/app_assets_initializer.py @@ -37,6 +37,11 @@ class AppAssetsInitializer(AsyncSandboxInitializer): ["sh", "-c", f"unzip {AppAssets.ZIP_PATH} -d {AppAssets.PATH} 2>/dev/null || [ $? -eq 1 ]"], error_message="Failed to unzip assets", ) + # Ensure directories have execute permission for traversal and files are readable + .add( + ["sh", "-c", f"chmod -R u+rwX,go+rX {AppAssets.PATH}"], + error_message="Failed to set permissions on assets", + ) .execute(timeout=APP_ASSETS_DOWNLOAD_TIMEOUT, raise_on_error=True) ) diff --git a/api/core/skill/skill_compiler.py b/api/core/skill/skill_compiler.py index 57ba9d9214..4f344f46bd 100644 --- a/api/core/skill/skill_compiler.py +++ b/api/core/skill/skill_compiler.py @@ -1,248 +1,390 @@ import hashlib -import logging import re -from collections.abc import Mapping -from datetime import UTC, datetime -from typing import Any +from collections.abc import Iterable, Mapping, Sequence +from dataclasses import dataclass +from typing import Any, Protocol, cast from core.app.entities.app_asset_entities import AppAssetFileTree from core.skill.entities.asset_references import AssetReferences from core.skill.entities.skill_bundle import SkillBundle from core.skill.entities.skill_bundle_entry import SkillBundleEntry, SourceInfo from core.skill.entities.skill_document import SkillDocument -from core.skill.entities.skill_metadata import ( - FileReference, - SkillMetadata, - ToolConfiguration, - ToolReference, -) +from core.skill.entities.skill_metadata import FileReference, SkillMetadata, ToolConfiguration, ToolReference from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency from core.tools.entities.tool_entities import ToolProviderType -logger = logging.getLogger(__name__) -TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") -FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") +class PathResolver(Protocol): + def resolve(self, source_id: str, target_id: str) -> str: ... + + +class ToolResolver(Protocol): + def resolve(self, tool_ref: ToolReference) -> str: ... + + +@dataclass(frozen=True) +class CompilerConfig: + tool_pattern: re.Pattern[str] = re.compile(r"§\[tool\]\.\[.*?\]\.\[.*?\]\.\[(.*?)\]§") + file_pattern: re.Pattern[str] = re.compile(r"§\[file\]\.\[.*?\]\.\[(.*?)\]§") + + +class FileTreePathResolver: + def __init__(self, tree: AppAssetFileTree, base_path: str = ""): + self._tree = tree + self._base_path = base_path.rstrip("/") + + def resolve(self, source_id: str, target_id: str) -> str: + source_node = self._tree.get(source_id) + target_node = self._tree.get(target_id) + + if target_node is None: + return "[File not found]" + + if source_node is not None: + return self._tree.relative_path(source_node, target_node) + + full_path = self._tree.get_path(target_node.id) + if self._base_path: + return f"{self._base_path}/{full_path}" + return full_path + + +class DefaultToolResolver: + def resolve(self, tool_ref: ToolReference) -> str: + return f"[Executable: {tool_ref.tool_name}_{tool_ref.uuid} --help command]" class SkillCompiler: - def _parse_metadata(self, content: str, raw_metadata: Mapping[str, Any]) -> SkillMetadata: - tools_raw: dict[str, Any] = dict(raw_metadata.get("tools", {})) - tools: dict[str, ToolReference] = {} - files: list[FileReference] = [] - - for match in TOOL_REFERENCE_PATTERN.finditer(content): - tool_id = match.group(3) - tool_name = match.group(2) - tool_provider = match.group(1) - tool_meta = tools_raw.get(tool_id) - if tool_meta is None: - continue - - config_raw = tool_meta.get("configuration", {}) - configuration = ToolConfiguration.model_validate(config_raw) if config_raw else None - tools[tool_id] = ToolReference( - uuid=tool_id, - type=ToolProviderType.value_of(tool_meta.get("type")), - provider=tool_provider, - tool_name=tool_name, - credential_id=tool_meta.get("credential_id"), - configuration=configuration, - ) - - for match in FILE_REFERENCE_PATTERN.finditer(content): - files.append( - FileReference( - source=match.group(1), - asset_id=match.group(2), - ) - ) - - return SkillMetadata(tools=tools, files=files) + def __init__( + self, + path_resolver: PathResolver | None = None, + tool_resolver: ToolResolver | None = None, + config: CompilerConfig | None = None, + ): + self._path_resolver = path_resolver + self._tool_resolver = tool_resolver or DefaultToolResolver() + self._config = config or CompilerConfig() def compile_all( self, - documents: list[SkillDocument], + documents: Iterable[SkillDocument], file_tree: AppAssetFileTree, assets_id: str, ) -> SkillBundle: - bundle = SkillBundle( - assets_id=assets_id, - built_at=datetime.now(UTC), - ) - - doc_map: dict[str, SkillDocument] = {doc.skill_id: doc for doc in documents} - parsed_metadata: dict[str, SkillMetadata] = {} - - for doc in documents: - metadata = self._parse_metadata(doc.content, doc.metadata) - parsed_metadata[doc.skill_id] = metadata - direct_skill_refs = self._extract_skill_refs(metadata, doc_map) - bundle.dependency_graph[doc.skill_id] = list(direct_skill_refs) - for ref_id in direct_skill_refs: - if ref_id not in bundle.reverse_graph: - bundle.reverse_graph[ref_id] = [] - bundle.reverse_graph[ref_id].append(doc.skill_id) - - for doc in documents: - metadata = parsed_metadata[doc.skill_id] - entry = self._compile_single(doc, metadata, bundle, parsed_metadata, file_tree) - bundle.upsert(entry) - - return bundle + path_resolver = self._path_resolver or FileTreePathResolver(file_tree) + return self._compile_batch_internal(documents, assets_id, path_resolver) def compile_one( self, bundle: SkillBundle, document: SkillDocument, file_tree: AppAssetFileTree, - all_documents: dict[str, SkillDocument] | None = None, + base_path: str = "", ) -> SkillBundleEntry: - doc_map = all_documents or {} - if document.skill_id not in doc_map: - doc_map[document.skill_id] = document - - parsed_metadata: dict[str, SkillMetadata] = {} - for skill_id, doc in doc_map.items(): - parsed_metadata[skill_id] = self._parse_metadata(doc.content, doc.metadata) - - metadata = parsed_metadata[document.skill_id] - direct_skill_refs = self._extract_skill_refs(metadata, doc_map) - bundle.dependency_graph[document.skill_id] = list(direct_skill_refs) - for ref_id in direct_skill_refs: - if ref_id not in bundle.reverse_graph: - bundle.reverse_graph[ref_id] = [] - if document.skill_id not in bundle.reverse_graph[ref_id]: - bundle.reverse_graph[ref_id].append(document.skill_id) - - return self._compile_single(document, metadata, bundle, parsed_metadata, file_tree) - - def _compile_single( - self, - document: SkillDocument, - metadata: SkillMetadata, - bundle: SkillBundle, - parsed_metadata: dict[str, SkillMetadata], - file_tree: AppAssetFileTree, - ) -> SkillBundleEntry: - all_tools, all_files = self._compute_transitive_closure( - document.skill_id, bundle, parsed_metadata + path_resolver = self._path_resolver or FileTreePathResolver(file_tree, base_path) + resolved_content, tool_dependencies = self._compile_template_internal( + document.content, document.metadata, bundle, path_resolver ) - current_node = file_tree.get(document.skill_id) - - resolved_content = self._resolve_content( - document.content, metadata, current_node, file_tree - ) - - content_digest = hashlib.sha256(document.content.encode("utf-8")).hexdigest() + metadata = self._parse_metadata(document.content, document.metadata) + final_files: dict[str, FileReference] = {f.asset_id: f for f in metadata.files} return SkillBundleEntry( skill_id=document.skill_id, source=SourceInfo( asset_id=document.skill_id, - content_digest=content_digest, + content_digest=hashlib.sha256(document.content.encode("utf-8")).hexdigest(), + ), + tools=tool_dependencies, + files=AssetReferences(references=list(final_files.values())), + content=resolved_content, + ) + + def _compile_batch_internal( + self, + documents: Iterable[SkillDocument], + assets_id: str, + path_resolver: PathResolver, + ) -> SkillBundle: + doc_map = {doc.skill_id: doc for doc in documents} + graph: dict[str, set[str]] = {} + metadata_cache: dict[str, SkillMetadata] = {} + + # Phase 1: Parse metadata and build dependency graph + for doc in doc_map.values(): + metadata = self._parse_metadata(doc.content, doc.metadata) + metadata_cache[doc.skill_id] = metadata + + deps: set[str] = set() + for file_ref in metadata.files: + if file_ref.asset_id in doc_map: + deps.add(file_ref.asset_id) + graph[doc.skill_id] = deps + + bundle = SkillBundle(assets_id=assets_id) + bundle.dependency_graph = {k: list(v) for k, v in graph.items()} + + # Build reverse graph for propagation + reverse_graph: dict[str, set[str]] = {skill_id: set() for skill_id in doc_map} + for skill_id, deps in graph.items(): + for dep_id in deps: + if dep_id in reverse_graph: + reverse_graph[dep_id].add(skill_id) + bundle.reverse_graph = {k: list(v) for k, v in reverse_graph.items()} + + # Phase 2: Compile each skill independently (content + direct dependencies only) + for skill_id, doc in doc_map.items(): + metadata = metadata_cache[skill_id] + entry = self._compile_node_direct(doc, metadata, path_resolver) + bundle.upsert(entry) + + # Phase 3: Propagate transitive dependencies until fixed-point + self._propagate_transitive_dependencies(bundle, graph) + + return bundle + + def _compile_node_direct( + self, + doc: SkillDocument, + metadata: SkillMetadata, + path_resolver: PathResolver, + ) -> SkillBundleEntry: + """Compile a single skill with only its direct dependencies (no transitive).""" + direct_tools: dict[str, ToolDependency] = {} + direct_refs: dict[str, ToolReference] = {} + + for tool_ref in metadata.tools.values(): + key = f"{tool_ref.provider}.{tool_ref.tool_name}" + if key not in direct_tools: + direct_tools[key] = ToolDependency( + type=tool_ref.type, + provider=tool_ref.provider, + tool_name=tool_ref.tool_name, + ) + direct_refs[tool_ref.uuid] = tool_ref + + direct_files: dict[str, FileReference] = {f.asset_id: f for f in metadata.files} + resolved_content = self._resolve_content(doc.content, metadata, path_resolver, doc.skill_id) + + return SkillBundleEntry( + skill_id=doc.skill_id, + source=SourceInfo( + asset_id=doc.skill_id, + content_digest=hashlib.sha256(doc.content.encode("utf-8")).hexdigest(), ), tools=ToolDependencies( - dependencies=list(all_tools.values()), - references=list(metadata.tools.values()), + dependencies=list(direct_tools.values()), + references=list(direct_refs.values()), ), files=AssetReferences( - references=list(all_files.values()), + references=list(direct_files.values()), ), content=resolved_content, ) - def _extract_skill_refs( + def _propagate_transitive_dependencies( self, - metadata: SkillMetadata, - doc_map: dict[str, SkillDocument], - ) -> set[str]: - skill_refs: set[str] = set() - for file_ref in metadata.files: - if file_ref.asset_id in doc_map: - skill_refs.add(file_ref.asset_id) - return skill_refs - - def _compute_transitive_closure( - self, - skill_id: str, bundle: SkillBundle, - parsed_metadata: dict[str, SkillMetadata], - ) -> tuple[dict[str, ToolDependency], dict[str, FileReference]]: - all_tools: dict[str, ToolDependency] = {} - all_files: dict[str, FileReference] = {} + graph: dict[str, set[str]], + ) -> None: + """Iteratively propagate transitive dependencies until no changes occur.""" + changed = True + while changed: + changed = False + for skill_id, dep_ids in graph.items(): + entry = bundle.get(skill_id) + if not entry: + continue - visited: set[str] = set() - queue = [skill_id] + # Collect current tools and files + current_tools: dict[str, ToolDependency] = { + f"{d.provider}.{d.tool_name}": d for d in entry.tools.dependencies + } + current_refs: dict[str, ToolReference] = {r.uuid: r for r in entry.tools.references} + current_files: dict[str, FileReference] = {f.asset_id: f for f in entry.files.references} - while queue: - current_id = queue.pop(0) - if current_id in visited: - continue - visited.add(current_id) + original_tool_count = len(current_tools) + original_ref_count = len(current_refs) + original_file_count = len(current_files) - metadata = parsed_metadata.get(current_id) - if metadata is None: - existing_entry = bundle.get(current_id) - if existing_entry: - for dep in existing_entry.tools.dependencies: - key = f"{dep.provider}.{dep.tool_name}" - if key not in all_tools: - all_tools[key] = dep - for file_ref in existing_entry.files.references: - if file_ref.asset_id not in all_files: - all_files[file_ref.asset_id] = file_ref - continue + # Merge from dependencies + for dep_id in dep_ids: + dep_entry = bundle.get(dep_id) + if not dep_entry: + continue - for tool_ref in metadata.tools.values(): - key = f"{tool_ref.provider}.{tool_ref.tool_name}" - if key not in all_tools: - all_tools[key] = ToolDependency( - type=tool_ref.type, - provider=tool_ref.provider, - tool_name=tool_ref.tool_name, + for tool_dep in dep_entry.tools.dependencies: + key = f"{tool_dep.provider}.{tool_dep.tool_name}" + if key not in current_tools: + current_tools[key] = tool_dep + + for tool_ref in dep_entry.tools.references: + if tool_ref.uuid not in current_refs: + current_refs[tool_ref.uuid] = tool_ref + + for file_ref in dep_entry.files.references: + if file_ref.asset_id not in current_files: + current_files[file_ref.asset_id] = file_ref + + # Check if anything changed + if ( + len(current_tools) != original_tool_count + or len(current_refs) != original_ref_count + or len(current_files) != original_file_count + ): + changed = True + # Update the entry with new transitive dependencies + updated_entry = SkillBundleEntry( + skill_id=entry.skill_id, + source=entry.source, + tools=ToolDependencies( + dependencies=list(current_tools.values()), + references=list(current_refs.values()), + ), + files=AssetReferences( + references=list(current_files.values()), + ), + content=entry.content, ) + bundle.upsert(updated_entry) - for file_ref in metadata.files: - if file_ref.asset_id not in all_files: - all_files[file_ref.asset_id] = file_ref - - for dep_id in bundle.dependency_graph.get(current_id, []): - if dep_id not in visited: - queue.append(dep_id) - - return all_tools, all_files - - def _resolve_content( + def _compile_template_internal( self, content: str, + metadata_dict: Mapping[str, Any], + context: SkillBundle, + path_resolver: PathResolver, + ) -> tuple[str, ToolDependencies]: + metadata = self._parse_metadata(content, metadata_dict) + + direct_deps: list[SkillBundleEntry] = [] + for file_ref in metadata.files: + artifact = context.get(file_ref.asset_id) + if artifact: + direct_deps.append(artifact) + + final_tools, final_refs = self._aggregate_dependencies(metadata, direct_deps) + + resolved_content = self._resolve_content(content, metadata, path_resolver, current_id="