diff --git a/api/core/app_assets/__init__.py b/api/core/app_assets/__init__.py new file mode 100644 index 0000000000..141489d5bd --- /dev/null +++ b/api/core/app_assets/__init__.py @@ -0,0 +1,34 @@ +from .assets import AssetItem, FileAsset +from .packager import AssetPackager, ZipPackager +from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser +from .paths import AssetPaths +from .skill import ( + FileReference, + SkillAsset, + SkillMetadata, + ToolConfiguration, + ToolDefinition, + ToolFieldConfig, + ToolReference, + ToolType, +) + +__all__ = [ + "AssetItem", + "AssetItemParser", + "AssetPackager", + "AssetParser", + "AssetPaths", + "FileAsset", + "FileAssetParser", + "FileReference", + "SkillAsset", + "SkillAssetParser", + "SkillMetadata", + "ToolConfiguration", + "ToolDefinition", + "ToolFieldConfig", + "ToolReference", + "ToolType", + "ZipPackager", +] diff --git a/api/core/app_assets/assets.py b/api/core/app_assets/assets.py new file mode 100644 index 0000000000..b3ded10683 --- /dev/null +++ b/api/core/app_assets/assets.py @@ -0,0 +1,22 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass + + +@dataclass +class AssetItem(ABC): + node_id: str + path: str + file_name: str + extension: str + + @abstractmethod + def get_storage_key(self) -> str: + raise NotImplementedError + + +@dataclass +class FileAsset(AssetItem): + storage_key: str + + def get_storage_key(self) -> str: + return self.storage_key diff --git a/api/core/app_assets/packager/__init__.py b/api/core/app_assets/packager/__init__.py new file mode 100644 index 0000000000..a9bc9147fa --- /dev/null +++ b/api/core/app_assets/packager/__init__.py @@ -0,0 +1,7 @@ +from .base import AssetPackager +from .zip_packager import ZipPackager + +__all__ = [ + "AssetPackager", + "ZipPackager", +] diff --git a/api/core/app_assets/packager/base.py b/api/core/app_assets/packager/base.py new file mode 100644 index 0000000000..404a24c1cb --- /dev/null +++ b/api/core/app_assets/packager/base.py @@ -0,0 +1,9 @@ +from abc import ABC, abstractmethod + +from core.app_assets.assets import AssetItem + + +class AssetPackager(ABC): + @abstractmethod + def package(self, assets: list[AssetItem]) -> bytes: + raise NotImplementedError diff --git a/api/core/app_assets/packager/zip_packager.py b/api/core/app_assets/packager/zip_packager.py new file mode 100644 index 0000000000..f776b58da6 --- /dev/null +++ b/api/core/app_assets/packager/zip_packager.py @@ -0,0 +1,27 @@ +import io +import zipfile +from typing import TYPE_CHECKING + +from core.app_assets.assets import AssetItem + +from .base import AssetPackager + +if TYPE_CHECKING: + from extensions.ext_storage import Storage + + +class ZipPackager(AssetPackager): + _storage: "Storage" + + def __init__(self, storage: "Storage") -> None: + self._storage = storage + + def package(self, assets: list[AssetItem]) -> bytes: + zip_buffer = io.BytesIO() + + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: + for asset in assets: + content = self._storage.load_once(asset.get_storage_key()) + zf.writestr(asset.path, content) + + return zip_buffer.getvalue() diff --git a/api/core/app_assets/parser/__init__.py b/api/core/app_assets/parser/__init__.py new file mode 100644 index 0000000000..c2d7352c20 --- /dev/null +++ b/api/core/app_assets/parser/__init__.py @@ -0,0 +1,10 @@ +from .asset_parser import AssetParser +from .base import AssetItemParser, FileAssetParser +from .skill_parser import SkillAssetParser + +__all__ = [ + "AssetItemParser", + "AssetParser", + "FileAssetParser", + "SkillAssetParser", +] diff --git a/api/core/app_assets/parser/asset_parser.py b/api/core/app_assets/parser/asset_parser.py new file mode 100644 index 0000000000..a8867e374e --- /dev/null +++ b/api/core/app_assets/parser/asset_parser.py @@ -0,0 +1,51 @@ +from typing import TYPE_CHECKING + +from core.app.entities.app_asset_entities import AppAssetFileTree +from core.app_assets.assets import AssetItem +from core.app_assets.paths import AssetPaths + +from .base import AssetItemParser, FileAssetParser + +if TYPE_CHECKING: + from extensions.ext_storage import Storage + + +class AssetParser: + _tree: AppAssetFileTree + _tenant_id: str + _app_id: str + _storage: "Storage" + _parsers: dict[str, AssetItemParser] + _default_parser: AssetItemParser + + def __init__( + self, + tree: AppAssetFileTree, + tenant_id: str, + app_id: str, + storage: "Storage", + ) -> None: + self._tree = tree + self._tenant_id = tenant_id + self._app_id = app_id + self._storage = storage + self._parsers = {} + self._default_parser = FileAssetParser() + + def register(self, extension: str, parser: AssetItemParser) -> None: + self._parsers[extension] = parser + + def parse(self) -> list[AssetItem]: + assets: list[AssetItem] = [] + + for node in self._tree.walk_files(): + path = self._tree.get_path(node.id).lstrip("/") + storage_key = AssetPaths.draft_file(self._tenant_id, self._app_id, node.id) + raw_bytes = self._storage.load_once(storage_key) + extension = node.extension or "" + + parser = self._parsers.get(extension, self._default_parser) + asset = parser.parse(node.id, path, node.name, extension, storage_key, raw_bytes) + assets.append(asset) + + return assets diff --git a/api/core/app_assets/parser/base.py b/api/core/app_assets/parser/base.py new file mode 100644 index 0000000000..ad0bac0dce --- /dev/null +++ b/api/core/app_assets/parser/base.py @@ -0,0 +1,36 @@ +from abc import ABC, abstractmethod + +from core.app_assets.assets import AssetItem, FileAsset + + +class AssetItemParser(ABC): + @abstractmethod + def parse( + self, + node_id: str, + path: str, + file_name: str, + extension: str, + storage_key: str, + raw_bytes: bytes, + ) -> AssetItem: + raise NotImplementedError + + +class FileAssetParser(AssetItemParser): + def parse( + self, + node_id: str, + path: str, + file_name: str, + extension: str, + storage_key: str, + raw_bytes: bytes, + ) -> FileAsset: + return FileAsset( + node_id=node_id, + path=path, + file_name=file_name, + extension=extension, + storage_key=storage_key, + ) diff --git a/api/core/app_assets/parser/skill_parser.py b/api/core/app_assets/parser/skill_parser.py new file mode 100644 index 0000000000..aad7a53b4f --- /dev/null +++ b/api/core/app_assets/parser/skill_parser.py @@ -0,0 +1,120 @@ +import json +import re +from typing import TYPE_CHECKING, Any + +from core.app_assets.paths import AssetPaths +from core.app_assets.skill import ( + FileReference, + SkillAsset, + SkillMetadata, + ToolReference, +) + +from .base import AssetItemParser + +if TYPE_CHECKING: + from extensions.ext_storage import Storage + +TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") +FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\]§") + + +class SkillAssetParser(AssetItemParser): + _tenant_id: str + _app_id: str + _publish_id: str + _storage: "Storage" + + def __init__( + self, + tenant_id: str, + app_id: str, + publish_id: str, + storage: "Storage", + ) -> None: + self._tenant_id = tenant_id + self._app_id = app_id + self._publish_id = publish_id + self._storage = storage + + def _get_resolved_key(self, node_id: str) -> str: + return AssetPaths.published_resolved_file(self._tenant_id, self._app_id, self._publish_id, node_id) + + def parse( + self, + node_id: str, + path: str, + file_name: str, + extension: str, + storage_key: str, + raw_bytes: bytes, + ) -> SkillAsset: + try: + data = json.loads(raw_bytes.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + raise ValueError(f"Invalid skill document JSON for {node_id}: {e}") from e + + if not isinstance(data, dict): + raise ValueError(f"Skill document {node_id} must be a JSON object") + + data_dict: dict[str, Any] = data + metadata_raw = data_dict.get("metadata", {}) + content = data_dict.get("content", "") + + if not isinstance(content, str): + raise ValueError(f"Skill document {node_id} 'content' must be a string") + + metadata = SkillMetadata.model_validate(metadata_raw) + + tool_references: list[ToolReference] = [] + for match in TOOL_REFERENCE_PATTERN.finditer(content): + tool_references.append( + ToolReference( + provider=match.group(1), + tool_name=match.group(2), + uuid=match.group(3), + raw=match.group(0), + ) + ) + + file_references: list[FileReference] = [] + for match in FILE_REFERENCE_PATTERN.finditer(content): + file_references.append( + FileReference( + source=match.group(1), + uuid=match.group(2), + raw=match.group(0), + ) + ) + + resolved_content = self._resolve_content(content, tool_references, file_references) + resolved_key = self._get_resolved_key(node_id) + self._storage.save(resolved_key, resolved_content.encode("utf-8")) + + return SkillAsset( + node_id=node_id, + path=path, + file_name=file_name, + extension=extension, + storage_key=resolved_key, + metadata=metadata, + content=resolved_content, + tool_references=tool_references, + file_references=file_references, + ) + + def _resolve_content( + self, + content: str, + tool_references: list[ToolReference], + file_references: list[FileReference], + ) -> str: + for ref in tool_references: + replacement = f"{ref.provider}/{ref.tool_name}" + content = content.replace(ref.raw, replacement) + + for ref in file_references: + replacement = f"[file:{ref.uuid}]" + content = content.replace(ref.raw, replacement) + + return content diff --git a/api/core/app_assets/paths.py b/api/core/app_assets/paths.py new file mode 100644 index 0000000000..00644bea7c --- /dev/null +++ b/api/core/app_assets/paths.py @@ -0,0 +1,18 @@ +class AssetPaths: + _BASE = "app_assets" + + @staticmethod + def draft_file(tenant_id: str, app_id: str, node_id: str) -> str: + return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/draft/{node_id}" + + @staticmethod + def published_zip(tenant_id: str, app_id: str, publish_id: str) -> str: + return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/published/{publish_id}.zip" + + @staticmethod + def published_resolved_file(tenant_id: str, app_id: str, publish_id: str, node_id: str) -> str: + return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/published/{publish_id}/resolved/{node_id}" + + @staticmethod + def published_tool_manifest(tenant_id: str, app_id: str, publish_id: str) -> str: + return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/published/{publish_id}/tools.json" diff --git a/api/core/app_assets/skill.py b/api/core/app_assets/skill.py new file mode 100644 index 0000000000..3658d06537 --- /dev/null +++ b/api/core/app_assets/skill.py @@ -0,0 +1,69 @@ +from dataclasses import dataclass, field +from enum import StrEnum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from .assets import AssetItem + + +class ToolType(StrEnum): + MCP = "mcp" + BUILTIN = "builtin" + + +class ToolFieldConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + id: str + value: Any + auto: bool = False + + +class ToolConfiguration(BaseModel): + model_config = ConfigDict(extra="forbid") + + fields: list[ToolFieldConfig] = Field(default_factory=list) + + +class ToolDefinition(BaseModel): + model_config = ConfigDict(extra="forbid") + + type: ToolType + credential_id: str | None = None + configuration: ToolConfiguration = Field(default_factory=ToolConfiguration) + + +class ToolReference(BaseModel): + model_config = ConfigDict(extra="forbid") + + provider: str + tool_name: str + uuid: str + raw: str + + +class FileReference(BaseModel): + model_config = ConfigDict(extra="forbid") + + source: str + uuid: str + raw: str + + +class SkillMetadata(BaseModel): + model_config = ConfigDict(extra="allow") + + tools: dict[str, ToolDefinition] = Field(default_factory=dict) + + +@dataclass +class SkillAsset(AssetItem): + storage_key: str + metadata: SkillMetadata + content: str + tool_references: list[ToolReference] = field(default_factory=list) + file_references: list[FileReference] = field(default_factory=list) + + def get_storage_key(self) -> str: + return self.storage_key diff --git a/api/core/sandbox/initializer/app_assets_initializer.py b/api/core/sandbox/initializer/app_assets_initializer.py index 60cd209857..a7f0d2fedd 100644 --- a/api/core/sandbox/initializer/app_assets_initializer.py +++ b/api/core/sandbox/initializer/app_assets_initializer.py @@ -3,6 +3,7 @@ from io import BytesIO from sqlalchemy.orm import Session +from core.app_assets.paths import AssetPaths from core.virtual_environment.__base.helpers import execute, with_connection from core.virtual_environment.__base.virtual_environment import VirtualEnvironment from extensions.ext_database import db @@ -26,7 +27,7 @@ class AppAssetsInitializer(SandboxInitializer): logger.debug("No published assets for app_id=%s, skipping", self._app_id) return - zip_key = AppAssets.get_published_storage_key(self._tenant_id, self._app_id, published.id) + zip_key = AssetPaths.published_zip(self._tenant_id, self._app_id, published.id) try: zip_data = storage.load_once(zip_key) except Exception: diff --git a/api/core/skill/__init__.py b/api/core/skill/__init__.py new file mode 100644 index 0000000000..142f378e34 --- /dev/null +++ b/api/core/skill/__init__.py @@ -0,0 +1,8 @@ +from .entities import ToolManifest, ToolManifestEntry +from .skill_manager import SkillManager + +__all__ = [ + "SkillManager", + "ToolManifest", + "ToolManifestEntry", +] diff --git a/api/core/skill/entities/__init__.py b/api/core/skill/entities/__init__.py new file mode 100644 index 0000000000..2ca5ab17e6 --- /dev/null +++ b/api/core/skill/entities/__init__.py @@ -0,0 +1,6 @@ +from .tool_manifest import ToolManifest, ToolManifestEntry + +__all__ = [ + "ToolManifest", + "ToolManifestEntry", +] diff --git a/api/core/skill/entities/tool_manifest.py b/api/core/skill/entities/tool_manifest.py new file mode 100644 index 0000000000..dfcafaa6b1 --- /dev/null +++ b/api/core/skill/entities/tool_manifest.py @@ -0,0 +1,23 @@ +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from core.app_assets.skill import ToolType + + +class ToolManifestEntry(BaseModel): + model_config = ConfigDict(extra="forbid") + + uuid: str + type: ToolType + provider: str | None = None + tool_name: str | None = None + credential_id: str | None = None + configuration: dict[str, Any] | None = None + + +class ToolManifest(BaseModel): + model_config = ConfigDict(extra="forbid") + + tools: dict[str, ToolManifestEntry] = Field(default_factory=dict) + references: list[str] = Field(default_factory=list) diff --git a/api/core/skill/skill_manager.py b/api/core/skill/skill_manager.py new file mode 100644 index 0000000000..d281108f02 --- /dev/null +++ b/api/core/skill/skill_manager.py @@ -0,0 +1,51 @@ +from core.app_assets.paths import AssetPaths +from core.app_assets.skill import SkillAsset +from extensions.ext_storage import storage + +from .entities import ToolManifest, ToolManifestEntry + + +class SkillManager: + @staticmethod + def generate_tool_manifest(assets: list[SkillAsset]) -> ToolManifest: + tools: dict[str, ToolManifestEntry] = {} + references: list[str] = [] + + for asset in assets: + manifest = SkillManager._collect_asset_manifest(asset) + tools.update(manifest.tools) + references.extend(manifest.references) + + return ToolManifest(tools=tools, references=references) + + @staticmethod + def save_tool_manifest( + tenant_id: str, + app_id: str, + publish_id: str, + manifest: ToolManifest, + ) -> None: + if not manifest.tools: + return + + key = AssetPaths.published_tool_manifest(tenant_id, app_id, publish_id) + storage.save(key, manifest.model_dump_json(indent=2).encode("utf-8")) + + @staticmethod + def _collect_asset_manifest(asset: SkillAsset) -> ToolManifest: + tools: dict[str, ToolManifestEntry] = {} + + for uuid, tool_def in asset.metadata.tools.items(): + ref = next((r for r in asset.tool_references if r.uuid == uuid), None) + + tools[uuid] = ToolManifestEntry( + uuid=uuid, + type=tool_def.type, + provider=ref.provider if ref else None, + tool_name=ref.tool_name if ref else None, + credential_id=tool_def.credential_id, + configuration=tool_def.configuration.model_dump() if tool_def.configuration.fields else None, + ) + + references = [ref.raw for ref in asset.tool_references] + return ToolManifest(tools=tools, references=references) diff --git a/api/models/app_asset.py b/api/models/app_asset.py index 4c03cc3195..d2937a0238 100644 --- a/api/models/app_asset.py +++ b/api/models/app_asset.py @@ -46,13 +46,5 @@ class AppAssets(Base): def asset_tree(self, value: AppAssetFileTree) -> None: self._asset_tree = value.model_dump_json() - @staticmethod - def get_storage_key(tenant_id: str, app_id: str, node_id: str) -> str: - return f"app_assets/{tenant_id}/{app_id}/draft/{node_id}" - - @staticmethod - def get_published_storage_key(tenant_id: str, app_id: str, assets_id: str) -> str: - return f"app_assets/{tenant_id}/{app_id}/published/{assets_id}.zip" - def __repr__(self) -> str: return f"" diff --git a/api/services/app_asset_service.py b/api/services/app_asset_service.py index 7d736ba080..b33188ee6d 100644 --- a/api/services/app_asset_service.py +++ b/api/services/app_asset_service.py @@ -14,6 +14,12 @@ from core.app.entities.app_asset_entities import ( TreeParentNotFoundError, TreePathConflictError, ) +from core.app_assets.packager.zip_packager import ZipPackager +from core.app_assets.parser.asset_parser import AssetParser +from core.app_assets.parser.skill_parser import SkillAssetParser +from core.app_assets.paths import AssetPaths +from core.app_assets.skill import SkillAsset +from core.skill.skill_manager import SkillManager from extensions.ext_database import db from extensions.ext_storage import storage from extensions.storage.file_presign_storage import FilePresignStorage @@ -112,7 +118,7 @@ class AppAssetService: except TreePathConflictError as e: raise AppAssetPathConflictError(str(e)) from e - storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) + storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) storage.save(storage_key, content) assets.asset_tree = tree @@ -135,7 +141,7 @@ class AppAssetService: max_size_mb = AppAssetService.MAX_PREVIEW_CONTENT_SIZE / 1024 / 1024 raise AppAssetNodeTooLargeError(f"File node {node_id} size exceeded the limit: {max_size_mb} MB") - storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) + storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) return storage.load_once(storage_key) @staticmethod @@ -156,7 +162,7 @@ class AppAssetService: except TreeNodeNotFoundError as e: raise AppAssetNodeNotFoundError(str(e)) from e - storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) + storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) storage.save(storage_key, content) assets.asset_tree = tree @@ -249,7 +255,7 @@ class AppAssetService: raise AppAssetNodeNotFoundError(str(e)) from e for nid in removed_ids: - storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, nid) + storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, nid) try: storage.delete(storage_key) except Exception: @@ -261,23 +267,18 @@ class AppAssetService: @staticmethod def publish(app_model: App, account_id: str) -> AppAssets: + tenant_id = app_model.tenant_id + app_id = app_model.id with Session(db.engine, expire_on_commit=False) as session: assets = AppAssetService.get_or_create_assets(session, app_model, account_id) tree = assets.asset_tree - # TODO: use sandbox virtual environment to create zip file - zip_buffer = io.BytesIO() - with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: - for file_node in tree.walk_files(): - storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, file_node.id) - content = storage.load_once(storage_key) - archive_path = tree.get_path(file_node.id).lstrip("/") - zf.writestr(archive_path, content) + publish_id = str(uuid4()) published = AppAssets( - id=str(uuid4()), - tenant_id=app_model.tenant_id, - app_id=app_model.id, + id=publish_id, + tenant_id=tenant_id, + app_id=app_id, version=str(naive_utc_now()), created_by=account_id, ) @@ -285,8 +286,30 @@ class AppAssetService: session.add(published) session.flush() - zip_key = AppAssets.get_published_storage_key(app_model.tenant_id, app_model.id, published.id) - storage.save(zip_key, zip_buffer.getvalue()) + parser = AssetParser(tree, tenant_id, app_id, storage) + parser.register( + "md", + SkillAssetParser(tenant_id, app_id, publish_id, storage), + ) + + assets = parser.parse() + manifest = SkillManager.generate_tool_manifest( + assets=[asset for asset in assets if isinstance(asset, SkillAsset)] + ) + + SkillManager.save_tool_manifest( + tenant_id, + app_id, + publish_id, + manifest, + ) + + # TODO: use VM zip packager and make this process async + packager = ZipPackager(storage) + + zip_bytes = packager.package(assets) + zip_key = AssetPaths.published_zip(tenant_id, app_id, publish_id) + storage.save(zip_key, zip_bytes) session.commit() @@ -311,7 +334,7 @@ class AppAssetService: if not published or published.version == AppAssets.VERSION_DRAFT: raise AppAssetNodeNotFoundError(f"Published version {assets_id} not found") - zip_key = AppAssets.get_published_storage_key(app_model.tenant_id, app_model.id, assets_id) + zip_key = AssetPaths.published_zip(app_model.tenant_id, app_model.id, assets_id) zip_data = storage.load_once(zip_key) archive_path = file_path.lstrip("/") @@ -335,6 +358,6 @@ class AppAssetService: if not node or node.node_type != AssetNodeType.FILE: raise AppAssetNodeNotFoundError(f"File node {node_id} not found") - storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) + storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) presign_storage = FilePresignStorage(storage.storage_runner) return presign_storage.get_download_url(storage_key, expires_in)