feat(skill): skill parser & packager

This commit is contained in:
Harry 2026-01-19 12:40:44 +08:00
parent 245567118c
commit 0de32f682a
18 changed files with 535 additions and 28 deletions

View File

@ -0,0 +1,34 @@
from .assets import AssetItem, FileAsset
from .packager import AssetPackager, ZipPackager
from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser
from .paths import AssetPaths
from .skill import (
FileReference,
SkillAsset,
SkillMetadata,
ToolConfiguration,
ToolDefinition,
ToolFieldConfig,
ToolReference,
ToolType,
)
__all__ = [
"AssetItem",
"AssetItemParser",
"AssetPackager",
"AssetParser",
"AssetPaths",
"FileAsset",
"FileAssetParser",
"FileReference",
"SkillAsset",
"SkillAssetParser",
"SkillMetadata",
"ToolConfiguration",
"ToolDefinition",
"ToolFieldConfig",
"ToolReference",
"ToolType",
"ZipPackager",
]

View File

@ -0,0 +1,22 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass
class AssetItem(ABC):
node_id: str
path: str
file_name: str
extension: str
@abstractmethod
def get_storage_key(self) -> str:
raise NotImplementedError
@dataclass
class FileAsset(AssetItem):
storage_key: str
def get_storage_key(self) -> str:
return self.storage_key

View File

@ -0,0 +1,7 @@
from .base import AssetPackager
from .zip_packager import ZipPackager
__all__ = [
"AssetPackager",
"ZipPackager",
]

View File

@ -0,0 +1,9 @@
from abc import ABC, abstractmethod
from core.app_assets.assets import AssetItem
class AssetPackager(ABC):
@abstractmethod
def package(self, assets: list[AssetItem]) -> bytes:
raise NotImplementedError

View File

@ -0,0 +1,27 @@
import io
import zipfile
from typing import TYPE_CHECKING
from core.app_assets.assets import AssetItem
from .base import AssetPackager
if TYPE_CHECKING:
from extensions.ext_storage import Storage
class ZipPackager(AssetPackager):
_storage: "Storage"
def __init__(self, storage: "Storage") -> None:
self._storage = storage
def package(self, assets: list[AssetItem]) -> bytes:
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
for asset in assets:
content = self._storage.load_once(asset.get_storage_key())
zf.writestr(asset.path, content)
return zip_buffer.getvalue()

View File

@ -0,0 +1,10 @@
from .asset_parser import AssetParser
from .base import AssetItemParser, FileAssetParser
from .skill_parser import SkillAssetParser
__all__ = [
"AssetItemParser",
"AssetParser",
"FileAssetParser",
"SkillAssetParser",
]

View File

@ -0,0 +1,51 @@
from typing import TYPE_CHECKING
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.app_assets.assets import AssetItem
from core.app_assets.paths import AssetPaths
from .base import AssetItemParser, FileAssetParser
if TYPE_CHECKING:
from extensions.ext_storage import Storage
class AssetParser:
_tree: AppAssetFileTree
_tenant_id: str
_app_id: str
_storage: "Storage"
_parsers: dict[str, AssetItemParser]
_default_parser: AssetItemParser
def __init__(
self,
tree: AppAssetFileTree,
tenant_id: str,
app_id: str,
storage: "Storage",
) -> None:
self._tree = tree
self._tenant_id = tenant_id
self._app_id = app_id
self._storage = storage
self._parsers = {}
self._default_parser = FileAssetParser()
def register(self, extension: str, parser: AssetItemParser) -> None:
self._parsers[extension] = parser
def parse(self) -> list[AssetItem]:
assets: list[AssetItem] = []
for node in self._tree.walk_files():
path = self._tree.get_path(node.id).lstrip("/")
storage_key = AssetPaths.draft_file(self._tenant_id, self._app_id, node.id)
raw_bytes = self._storage.load_once(storage_key)
extension = node.extension or ""
parser = self._parsers.get(extension, self._default_parser)
asset = parser.parse(node.id, path, node.name, extension, storage_key, raw_bytes)
assets.append(asset)
return assets

View File

@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
from core.app_assets.assets import AssetItem, FileAsset
class AssetItemParser(ABC):
@abstractmethod
def parse(
self,
node_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
raw_bytes: bytes,
) -> AssetItem:
raise NotImplementedError
class FileAssetParser(AssetItemParser):
def parse(
self,
node_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
raw_bytes: bytes,
) -> FileAsset:
return FileAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
)

View File

@ -0,0 +1,120 @@
import json
import re
from typing import TYPE_CHECKING, Any
from core.app_assets.paths import AssetPaths
from core.app_assets.skill import (
FileReference,
SkillAsset,
SkillMetadata,
ToolReference,
)
from .base import AssetItemParser
if TYPE_CHECKING:
from extensions.ext_storage import Storage
TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\")
FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\")
class SkillAssetParser(AssetItemParser):
_tenant_id: str
_app_id: str
_publish_id: str
_storage: "Storage"
def __init__(
self,
tenant_id: str,
app_id: str,
publish_id: str,
storage: "Storage",
) -> None:
self._tenant_id = tenant_id
self._app_id = app_id
self._publish_id = publish_id
self._storage = storage
def _get_resolved_key(self, node_id: str) -> str:
return AssetPaths.published_resolved_file(self._tenant_id, self._app_id, self._publish_id, node_id)
def parse(
self,
node_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
raw_bytes: bytes,
) -> SkillAsset:
try:
data = json.loads(raw_bytes.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError) as e:
raise ValueError(f"Invalid skill document JSON for {node_id}: {e}") from e
if not isinstance(data, dict):
raise ValueError(f"Skill document {node_id} must be a JSON object")
data_dict: dict[str, Any] = data
metadata_raw = data_dict.get("metadata", {})
content = data_dict.get("content", "")
if not isinstance(content, str):
raise ValueError(f"Skill document {node_id} 'content' must be a string")
metadata = SkillMetadata.model_validate(metadata_raw)
tool_references: list[ToolReference] = []
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_references.append(
ToolReference(
provider=match.group(1),
tool_name=match.group(2),
uuid=match.group(3),
raw=match.group(0),
)
)
file_references: list[FileReference] = []
for match in FILE_REFERENCE_PATTERN.finditer(content):
file_references.append(
FileReference(
source=match.group(1),
uuid=match.group(2),
raw=match.group(0),
)
)
resolved_content = self._resolve_content(content, tool_references, file_references)
resolved_key = self._get_resolved_key(node_id)
self._storage.save(resolved_key, resolved_content.encode("utf-8"))
return SkillAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=resolved_key,
metadata=metadata,
content=resolved_content,
tool_references=tool_references,
file_references=file_references,
)
def _resolve_content(
self,
content: str,
tool_references: list[ToolReference],
file_references: list[FileReference],
) -> str:
for ref in tool_references:
replacement = f"{ref.provider}/{ref.tool_name}"
content = content.replace(ref.raw, replacement)
for ref in file_references:
replacement = f"[file:{ref.uuid}]"
content = content.replace(ref.raw, replacement)
return content

View File

@ -0,0 +1,18 @@
class AssetPaths:
_BASE = "app_assets"
@staticmethod
def draft_file(tenant_id: str, app_id: str, node_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/draft/{node_id}"
@staticmethod
def published_zip(tenant_id: str, app_id: str, publish_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/published/{publish_id}.zip"
@staticmethod
def published_resolved_file(tenant_id: str, app_id: str, publish_id: str, node_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/published/{publish_id}/resolved/{node_id}"
@staticmethod
def published_tool_manifest(tenant_id: str, app_id: str, publish_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/published/{publish_id}/tools.json"

View File

@ -0,0 +1,69 @@
from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from .assets import AssetItem
class ToolType(StrEnum):
MCP = "mcp"
BUILTIN = "builtin"
class ToolFieldConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
value: Any
auto: bool = False
class ToolConfiguration(BaseModel):
model_config = ConfigDict(extra="forbid")
fields: list[ToolFieldConfig] = Field(default_factory=list)
class ToolDefinition(BaseModel):
model_config = ConfigDict(extra="forbid")
type: ToolType
credential_id: str | None = None
configuration: ToolConfiguration = Field(default_factory=ToolConfiguration)
class ToolReference(BaseModel):
model_config = ConfigDict(extra="forbid")
provider: str
tool_name: str
uuid: str
raw: str
class FileReference(BaseModel):
model_config = ConfigDict(extra="forbid")
source: str
uuid: str
raw: str
class SkillMetadata(BaseModel):
model_config = ConfigDict(extra="allow")
tools: dict[str, ToolDefinition] = Field(default_factory=dict)
@dataclass
class SkillAsset(AssetItem):
storage_key: str
metadata: SkillMetadata
content: str
tool_references: list[ToolReference] = field(default_factory=list)
file_references: list[FileReference] = field(default_factory=list)
def get_storage_key(self) -> str:
return self.storage_key

View File

@ -3,6 +3,7 @@ from io import BytesIO
from sqlalchemy.orm import Session
from core.app_assets.paths import AssetPaths
from core.virtual_environment.__base.helpers import execute, with_connection
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
from extensions.ext_database import db
@ -26,7 +27,7 @@ class AppAssetsInitializer(SandboxInitializer):
logger.debug("No published assets for app_id=%s, skipping", self._app_id)
return
zip_key = AppAssets.get_published_storage_key(self._tenant_id, self._app_id, published.id)
zip_key = AssetPaths.published_zip(self._tenant_id, self._app_id, published.id)
try:
zip_data = storage.load_once(zip_key)
except Exception:

View File

@ -0,0 +1,8 @@
from .entities import ToolManifest, ToolManifestEntry
from .skill_manager import SkillManager
__all__ = [
"SkillManager",
"ToolManifest",
"ToolManifestEntry",
]

View File

@ -0,0 +1,6 @@
from .tool_manifest import ToolManifest, ToolManifestEntry
__all__ = [
"ToolManifest",
"ToolManifestEntry",
]

View File

@ -0,0 +1,23 @@
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from core.app_assets.skill import ToolType
class ToolManifestEntry(BaseModel):
model_config = ConfigDict(extra="forbid")
uuid: str
type: ToolType
provider: str | None = None
tool_name: str | None = None
credential_id: str | None = None
configuration: dict[str, Any] | None = None
class ToolManifest(BaseModel):
model_config = ConfigDict(extra="forbid")
tools: dict[str, ToolManifestEntry] = Field(default_factory=dict)
references: list[str] = Field(default_factory=list)

View File

@ -0,0 +1,51 @@
from core.app_assets.paths import AssetPaths
from core.app_assets.skill import SkillAsset
from extensions.ext_storage import storage
from .entities import ToolManifest, ToolManifestEntry
class SkillManager:
@staticmethod
def generate_tool_manifest(assets: list[SkillAsset]) -> ToolManifest:
tools: dict[str, ToolManifestEntry] = {}
references: list[str] = []
for asset in assets:
manifest = SkillManager._collect_asset_manifest(asset)
tools.update(manifest.tools)
references.extend(manifest.references)
return ToolManifest(tools=tools, references=references)
@staticmethod
def save_tool_manifest(
tenant_id: str,
app_id: str,
publish_id: str,
manifest: ToolManifest,
) -> None:
if not manifest.tools:
return
key = AssetPaths.published_tool_manifest(tenant_id, app_id, publish_id)
storage.save(key, manifest.model_dump_json(indent=2).encode("utf-8"))
@staticmethod
def _collect_asset_manifest(asset: SkillAsset) -> ToolManifest:
tools: dict[str, ToolManifestEntry] = {}
for uuid, tool_def in asset.metadata.tools.items():
ref = next((r for r in asset.tool_references if r.uuid == uuid), None)
tools[uuid] = ToolManifestEntry(
uuid=uuid,
type=tool_def.type,
provider=ref.provider if ref else None,
tool_name=ref.tool_name if ref else None,
credential_id=tool_def.credential_id,
configuration=tool_def.configuration.model_dump() if tool_def.configuration.fields else None,
)
references = [ref.raw for ref in asset.tool_references]
return ToolManifest(tools=tools, references=references)

View File

@ -46,13 +46,5 @@ class AppAssets(Base):
def asset_tree(self, value: AppAssetFileTree) -> None:
self._asset_tree = value.model_dump_json()
@staticmethod
def get_storage_key(tenant_id: str, app_id: str, node_id: str) -> str:
return f"app_assets/{tenant_id}/{app_id}/draft/{node_id}"
@staticmethod
def get_published_storage_key(tenant_id: str, app_id: str, assets_id: str) -> str:
return f"app_assets/{tenant_id}/{app_id}/published/{assets_id}.zip"
def __repr__(self) -> str:
return f"<AppAssets(id={self.id}, app_id={self.app_id}, version={self.version})>"

View File

@ -14,6 +14,12 @@ from core.app.entities.app_asset_entities import (
TreeParentNotFoundError,
TreePathConflictError,
)
from core.app_assets.packager.zip_packager import ZipPackager
from core.app_assets.parser.asset_parser import AssetParser
from core.app_assets.parser.skill_parser import SkillAssetParser
from core.app_assets.paths import AssetPaths
from core.app_assets.skill import SkillAsset
from core.skill.skill_manager import SkillManager
from extensions.ext_database import db
from extensions.ext_storage import storage
from extensions.storage.file_presign_storage import FilePresignStorage
@ -112,7 +118,7 @@ class AppAssetService:
except TreePathConflictError as e:
raise AppAssetPathConflictError(str(e)) from e
storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id)
storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id)
storage.save(storage_key, content)
assets.asset_tree = tree
@ -135,7 +141,7 @@ class AppAssetService:
max_size_mb = AppAssetService.MAX_PREVIEW_CONTENT_SIZE / 1024 / 1024
raise AppAssetNodeTooLargeError(f"File node {node_id} size exceeded the limit: {max_size_mb} MB")
storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id)
storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id)
return storage.load_once(storage_key)
@staticmethod
@ -156,7 +162,7 @@ class AppAssetService:
except TreeNodeNotFoundError as e:
raise AppAssetNodeNotFoundError(str(e)) from e
storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id)
storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id)
storage.save(storage_key, content)
assets.asset_tree = tree
@ -249,7 +255,7 @@ class AppAssetService:
raise AppAssetNodeNotFoundError(str(e)) from e
for nid in removed_ids:
storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, nid)
storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, nid)
try:
storage.delete(storage_key)
except Exception:
@ -261,23 +267,18 @@ class AppAssetService:
@staticmethod
def publish(app_model: App, account_id: str) -> AppAssets:
tenant_id = app_model.tenant_id
app_id = app_model.id
with Session(db.engine, expire_on_commit=False) as session:
assets = AppAssetService.get_or_create_assets(session, app_model, account_id)
tree = assets.asset_tree
# TODO: use sandbox virtual environment to create zip file
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
for file_node in tree.walk_files():
storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, file_node.id)
content = storage.load_once(storage_key)
archive_path = tree.get_path(file_node.id).lstrip("/")
zf.writestr(archive_path, content)
publish_id = str(uuid4())
published = AppAssets(
id=str(uuid4()),
tenant_id=app_model.tenant_id,
app_id=app_model.id,
id=publish_id,
tenant_id=tenant_id,
app_id=app_id,
version=str(naive_utc_now()),
created_by=account_id,
)
@ -285,8 +286,30 @@ class AppAssetService:
session.add(published)
session.flush()
zip_key = AppAssets.get_published_storage_key(app_model.tenant_id, app_model.id, published.id)
storage.save(zip_key, zip_buffer.getvalue())
parser = AssetParser(tree, tenant_id, app_id, storage)
parser.register(
"md",
SkillAssetParser(tenant_id, app_id, publish_id, storage),
)
assets = parser.parse()
manifest = SkillManager.generate_tool_manifest(
assets=[asset for asset in assets if isinstance(asset, SkillAsset)]
)
SkillManager.save_tool_manifest(
tenant_id,
app_id,
publish_id,
manifest,
)
# TODO: use VM zip packager and make this process async
packager = ZipPackager(storage)
zip_bytes = packager.package(assets)
zip_key = AssetPaths.published_zip(tenant_id, app_id, publish_id)
storage.save(zip_key, zip_bytes)
session.commit()
@ -311,7 +334,7 @@ class AppAssetService:
if not published or published.version == AppAssets.VERSION_DRAFT:
raise AppAssetNodeNotFoundError(f"Published version {assets_id} not found")
zip_key = AppAssets.get_published_storage_key(app_model.tenant_id, app_model.id, assets_id)
zip_key = AssetPaths.published_zip(app_model.tenant_id, app_model.id, assets_id)
zip_data = storage.load_once(zip_key)
archive_path = file_path.lstrip("/")
@ -335,6 +358,6 @@ class AppAssetService:
if not node or node.node_type != AssetNodeType.FILE:
raise AppAssetNodeNotFoundError(f"File node {node_id} not found")
storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id)
storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id)
presign_storage = FilePresignStorage(storage.storage_runner)
return presign_storage.get_download_url(storage_key, expires_in)