mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 14:51:13 +08:00
Merge commit 'd5c9f2d5b1' into feat/agent-v2
# Conflicts: # api/tests/unit_tests/services/agent/test_skill_standardize_service.py
This commit is contained in:
commit
15c4dbc19b
@ -1,15 +1,17 @@
|
||||
"""Validate + extract metadata from an uploaded Skill package (ENG-370).
|
||||
"""Validate and normalize uploaded Skill packages for drive standardization.
|
||||
|
||||
A Skill is a ``.zip`` / ``.skill`` archive that must contain a ``SKILL.md`` entry
|
||||
file (Anthropic Skills convention: YAML frontmatter with ``name`` + ``description``,
|
||||
followed by markdown instructions). This service validates the archive (extension,
|
||||
size, zip integrity, zip-slip safety, SKILL.md presence/encoding/fields) and
|
||||
extracts a manifest consumed by drive standardization.
|
||||
size, zip integrity, zip-slip safety, SKILL.md presence/encoding/fields),
|
||||
normalizes retained member paths relative to the selected skill root, rebuilds
|
||||
canonical archive bytes, and returns normalized metadata together with the
|
||||
archive-root ``SKILL.md`` bytes.
|
||||
|
||||
It does NOT execute or load the skill — the agent backend owns execution. It also
|
||||
does not persist anything into Agent Soul or bind anything to config versions;
|
||||
``SkillStandardizeService`` consumes the manifest and commits the canonical drive
|
||||
rows instead.
|
||||
``SkillStandardizeService`` consumes the normalized package and commits the
|
||||
canonical drive rows instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -19,6 +21,7 @@ import io
|
||||
import posixpath
|
||||
import re
|
||||
import zipfile
|
||||
import zlib
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
@ -58,10 +61,57 @@ class SkillManifest(BaseModel):
|
||||
hash: str # sha256 of the archive bytes
|
||||
|
||||
|
||||
class SkillPackageService:
|
||||
"""Validate Skill archives and extract their manifest."""
|
||||
class NormalizedSkillPackage(BaseModel):
|
||||
"""Canonical skill package bytes and metadata ready to store in agent drive."""
|
||||
|
||||
def validate_and_extract(self, *, content: bytes, filename: str) -> SkillManifest:
|
||||
manifest: SkillManifest
|
||||
archive_bytes: bytes
|
||||
skill_md_bytes: bytes
|
||||
strip_prefix: str | None
|
||||
|
||||
|
||||
class SkillPackageService:
|
||||
"""Validate Skill archives and produce the normalized package stored in drive."""
|
||||
|
||||
def validate_and_normalize(self, *, content: bytes, filename: str) -> NormalizedSkillPackage:
|
||||
"""Return the canonical drive package for an uploaded skill archive.
|
||||
|
||||
The shallowest ``SKILL.md`` defines the skill root. The returned manifest
|
||||
is normalized to archive-root ``SKILL.md`` and its hash describes the
|
||||
rebuilt archive bytes. Member read/decompression failures while consuming
|
||||
the archive are mapped to ``invalid_archive``.
|
||||
"""
|
||||
archive = self._open_archive(content=content, filename=filename)
|
||||
with archive:
|
||||
members, total_uncompressed = self._collect_file_members(archive)
|
||||
entry_path = self._find_skill_md([safe_path for _, safe_path in members])
|
||||
strip_prefix = self._skill_root_prefix(entry_path)
|
||||
normalized_members = self._normalize_members(members=members, skill_root_prefix=strip_prefix)
|
||||
skill_md_member = normalized_members[_SKILL_MD_NAME]
|
||||
self._validate_skill_md_size(skill_md_member)
|
||||
skill_md_bytes = self._read_member_bytes_from_archive(archive, member_info=skill_md_member)
|
||||
skill_md = self._decode_skill_md(skill_md_bytes)
|
||||
normalized_archive_bytes = self._build_normalized_archive(
|
||||
archive=archive, normalized_members=normalized_members
|
||||
)
|
||||
|
||||
name, description = self._parse_skill_md(skill_md)
|
||||
manifest = SkillManifest(
|
||||
name=name,
|
||||
description=description,
|
||||
entry_path=_SKILL_MD_NAME,
|
||||
files=sorted(normalized_members),
|
||||
size=total_uncompressed,
|
||||
hash=hashlib.sha256(normalized_archive_bytes).hexdigest(),
|
||||
)
|
||||
return NormalizedSkillPackage(
|
||||
manifest=manifest,
|
||||
archive_bytes=normalized_archive_bytes,
|
||||
skill_md_bytes=skill_md_bytes,
|
||||
strip_prefix=strip_prefix,
|
||||
)
|
||||
|
||||
def _open_archive(self, *, content: bytes, filename: str) -> zipfile.ZipFile:
|
||||
self._check_extension(filename)
|
||||
if not content:
|
||||
raise SkillPackageError("empty_archive", "skill archive is empty", status_code=400)
|
||||
@ -69,52 +119,87 @@ class SkillPackageService:
|
||||
raise SkillPackageError("archive_too_large", "skill archive exceeds size limit", status_code=400)
|
||||
|
||||
try:
|
||||
archive = zipfile.ZipFile(io.BytesIO(content))
|
||||
return zipfile.ZipFile(io.BytesIO(content))
|
||||
except zipfile.BadZipFile as exc:
|
||||
raise SkillPackageError("invalid_archive", "skill archive is not a valid zip", status_code=400) from exc
|
||||
|
||||
with archive:
|
||||
infos = [info for info in archive.infolist() if not info.is_dir()]
|
||||
if len(infos) > _MAX_ENTRIES:
|
||||
raise SkillPackageError("too_many_entries", "skill archive has too many files", status_code=400)
|
||||
def _collect_file_members(self, archive: zipfile.ZipFile) -> tuple[list[tuple[zipfile.ZipInfo, str]], int]:
|
||||
infos = [info for info in archive.infolist() if not info.is_dir()]
|
||||
if len(infos) > _MAX_ENTRIES:
|
||||
raise SkillPackageError("too_many_entries", "skill archive has too many files", status_code=400)
|
||||
|
||||
safe_paths: list[str] = []
|
||||
total_uncompressed = 0
|
||||
for info in infos:
|
||||
safe_paths.append(self._safe_member_path(info.filename))
|
||||
total_uncompressed += max(info.file_size, 0)
|
||||
if total_uncompressed > _MAX_UNCOMPRESSED_BYTES:
|
||||
raise SkillPackageError(
|
||||
"archive_too_large", "skill archive uncompressed size exceeds limit", status_code=400
|
||||
)
|
||||
|
||||
entry_path = self._find_skill_md(safe_paths)
|
||||
skill_md = self._read_skill_md(archive, entry_path)
|
||||
|
||||
name, description = self._parse_skill_md(skill_md)
|
||||
return SkillManifest(
|
||||
name=name,
|
||||
description=description,
|
||||
entry_path=entry_path,
|
||||
files=sorted(safe_paths),
|
||||
size=total_uncompressed,
|
||||
hash=hashlib.sha256(content).hexdigest(),
|
||||
)
|
||||
|
||||
def read_member_bytes(self, *, content: bytes, member_path: str) -> bytes:
|
||||
"""Read a single archive member's bytes (used by standardization, ENG-594)."""
|
||||
try:
|
||||
archive = zipfile.ZipFile(io.BytesIO(content))
|
||||
except zipfile.BadZipFile as exc:
|
||||
raise SkillPackageError("invalid_archive", "skill archive is not a valid zip", status_code=400) from exc
|
||||
with archive:
|
||||
member = next(
|
||||
(info for info in archive.infolist() if posixpath.normpath(info.filename) == member_path),
|
||||
None,
|
||||
members: list[tuple[zipfile.ZipInfo, str]] = []
|
||||
total_uncompressed = 0
|
||||
for info in infos:
|
||||
members.append((info, self._safe_member_path(info.filename)))
|
||||
total_uncompressed += max(info.file_size, 0)
|
||||
if total_uncompressed > _MAX_UNCOMPRESSED_BYTES:
|
||||
raise SkillPackageError(
|
||||
"archive_too_large",
|
||||
"skill archive uncompressed size exceeds limit",
|
||||
status_code=400,
|
||||
)
|
||||
if member is None:
|
||||
raise SkillPackageError("member_not_found", f"{member_path} not found in archive", status_code=400)
|
||||
return archive.read(member)
|
||||
return members, total_uncompressed
|
||||
|
||||
@staticmethod
|
||||
def _skill_root_prefix(entry_path: str) -> str | None:
|
||||
skill_root = posixpath.dirname(entry_path)
|
||||
if not skill_root:
|
||||
return None
|
||||
return f"{skill_root}/"
|
||||
|
||||
def _normalize_members(
|
||||
self,
|
||||
*,
|
||||
members: list[tuple[zipfile.ZipInfo, str]],
|
||||
skill_root_prefix: str | None,
|
||||
) -> dict[str, zipfile.ZipInfo]:
|
||||
normalized_members: dict[str, zipfile.ZipInfo] = {}
|
||||
for info, safe_path in members:
|
||||
if skill_root_prefix is not None:
|
||||
if not safe_path.startswith(skill_root_prefix):
|
||||
raise SkillPackageError(
|
||||
"files_outside_skill_root",
|
||||
"skill archive contains files outside the selected skill root",
|
||||
status_code=400,
|
||||
)
|
||||
normalized_path = safe_path.removeprefix(skill_root_prefix)
|
||||
else:
|
||||
normalized_path = safe_path
|
||||
|
||||
if (
|
||||
not normalized_path
|
||||
or normalized_path in {".", ".."}
|
||||
or normalized_path.startswith("/")
|
||||
or "\\" in normalized_path
|
||||
):
|
||||
raise SkillPackageError("unsafe_path", "skill archive contains an unsafe path", status_code=400)
|
||||
if normalized_path in normalized_members:
|
||||
raise SkillPackageError(
|
||||
"duplicate_member_path",
|
||||
"skill archive contains duplicate normalized paths",
|
||||
status_code=400,
|
||||
)
|
||||
normalized_members[normalized_path] = info
|
||||
|
||||
if _SKILL_MD_NAME not in normalized_members:
|
||||
raise SkillPackageError("missing_skill_md", "skill archive must contain a SKILL.md", status_code=400)
|
||||
return normalized_members
|
||||
|
||||
def _build_normalized_archive(
|
||||
self,
|
||||
*,
|
||||
archive: zipfile.ZipFile,
|
||||
normalized_members: dict[str, zipfile.ZipInfo],
|
||||
) -> bytes:
|
||||
output = io.BytesIO()
|
||||
with zipfile.ZipFile(output, "w", compression=zipfile.ZIP_DEFLATED) as normalized_archive:
|
||||
for normalized_path in sorted(normalized_members):
|
||||
normalized_archive.writestr(
|
||||
normalized_path,
|
||||
self._read_member_bytes_from_archive(archive, member_info=normalized_members[normalized_path]),
|
||||
)
|
||||
return output.getvalue()
|
||||
|
||||
@staticmethod
|
||||
def _check_extension(filename: str) -> None:
|
||||
@ -145,17 +230,19 @@ class SkillPackageService:
|
||||
return min(candidates, key=lambda p: (p.count("/"), len(p)))
|
||||
|
||||
@staticmethod
|
||||
def _read_skill_md(archive: zipfile.ZipFile, entry_path: str) -> str:
|
||||
# Look the member up by its original name (normpath may differ from the stored name).
|
||||
member = next(
|
||||
(info for info in archive.infolist() if posixpath.normpath(info.filename) == entry_path),
|
||||
None,
|
||||
)
|
||||
if member is None:
|
||||
raise SkillPackageError("missing_skill_md", "skill archive must contain a SKILL.md", status_code=400)
|
||||
if member.file_size > _MAX_SKILL_MD_BYTES:
|
||||
def _read_member_bytes_from_archive(archive: zipfile.ZipFile, *, member_info: zipfile.ZipInfo) -> bytes:
|
||||
try:
|
||||
return archive.read(member_info)
|
||||
except (zipfile.BadZipFile, EOFError, OSError, RuntimeError, ValueError, zlib.error) as exc:
|
||||
raise SkillPackageError("invalid_archive", "skill archive is not a valid zip", status_code=400) from exc
|
||||
|
||||
@staticmethod
|
||||
def _validate_skill_md_size(member_info: zipfile.ZipInfo) -> None:
|
||||
if member_info.file_size > _MAX_SKILL_MD_BYTES:
|
||||
raise SkillPackageError("skill_md_too_large", "SKILL.md exceeds size limit", status_code=400)
|
||||
raw = archive.read(member)
|
||||
|
||||
@staticmethod
|
||||
def _decode_skill_md(raw: bytes) -> str:
|
||||
try:
|
||||
return raw.decode("utf-8")
|
||||
except UnicodeDecodeError as exc:
|
||||
@ -193,4 +280,4 @@ class SkillPackageService:
|
||||
return loaded if isinstance(loaded, dict) else {}
|
||||
|
||||
|
||||
__all__ = ["SkillManifest", "SkillPackageError", "SkillPackageService"]
|
||||
__all__ = ["NormalizedSkillPackage", "SkillManifest", "SkillPackageError", "SkillPackageService"]
|
||||
|
||||
@ -33,7 +33,11 @@ def slugify_skill_name(name: str) -> str:
|
||||
|
||||
|
||||
class SkillStandardizeService:
|
||||
"""Validate + standardize a Skill package into a per-agent drive upload result."""
|
||||
"""Persist a normalized skill package into drive-owned files for one agent.
|
||||
|
||||
Instances are intentionally stateful: ``standardize()`` updates
|
||||
``last_committed_items`` with the drive commit result for the most recent call.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -45,6 +49,7 @@ class SkillStandardizeService:
|
||||
self._package = package_service or SkillPackageService()
|
||||
self._drive = drive_service or AgentDriveService()
|
||||
self._tool_files = tool_file_manager or ToolFileManager()
|
||||
self.last_committed_items: list[dict[str, Any]] = []
|
||||
|
||||
def standardize(
|
||||
self,
|
||||
@ -55,8 +60,14 @@ class SkillStandardizeService:
|
||||
user_id: str,
|
||||
agent_id: str,
|
||||
) -> dict[str, Any]:
|
||||
manifest = self._package.validate_and_extract(content=content, filename=filename)
|
||||
skill_md_bytes = self._package.read_member_bytes(content=content, member_path=manifest.entry_path)
|
||||
"""Create two ToolFiles, commit two drive-owned keys, and return skill metadata.
|
||||
|
||||
This writes ``<slug>/SKILL.md`` and ``<slug>/.DIFY-SKILL-FULL.zip``,
|
||||
stores the drive commit rows in ``last_committed_items``, and returns the
|
||||
console response shape ``{"skill": ..., "manifest": ...}``.
|
||||
"""
|
||||
package = self._package.validate_and_normalize(content=content, filename=filename)
|
||||
manifest = package.manifest
|
||||
slug = slugify_skill_name(manifest.name)
|
||||
|
||||
# Drive-owned files: canonical SKILL.md and the full archive. The
|
||||
@ -65,7 +76,7 @@ class SkillStandardizeService:
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
conversation_id=None,
|
||||
file_binary=skill_md_bytes,
|
||||
file_binary=package.skill_md_bytes,
|
||||
mimetype="text/markdown",
|
||||
filename=_SKILL_MD_NAME,
|
||||
)
|
||||
@ -73,38 +84,14 @@ class SkillStandardizeService:
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
conversation_id=None,
|
||||
file_binary=content,
|
||||
file_binary=package.archive_bytes,
|
||||
mimetype="application/zip",
|
||||
filename=_FULL_ARCHIVE_NAME,
|
||||
)
|
||||
|
||||
skill_md_key = f"{slug}/{_SKILL_MD_NAME}"
|
||||
archive_key = f"{slug}/{_FULL_ARCHIVE_NAME}"
|
||||
member_items: list[DriveCommitItem] = []
|
||||
for member_path in sorted(set(manifest.files)):
|
||||
member_key = f"{slug}/{member_path}"
|
||||
if member_key in {skill_md_key, archive_key}:
|
||||
continue
|
||||
|
||||
member_bytes = self._package.read_member_bytes(content=content, member_path=member_path)
|
||||
mimetype = mimetypes.guess_type(member_path)[0] or "application/octet-stream"
|
||||
member_tool_file = self._tool_files.create_file_by_raw(
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
conversation_id=None,
|
||||
file_binary=member_bytes,
|
||||
mimetype=mimetype,
|
||||
filename=posixpath.basename(member_path),
|
||||
)
|
||||
member_items.append(
|
||||
DriveCommitItem(
|
||||
key=member_key,
|
||||
file_ref=DriveFileRef(kind="tool_file", id=member_tool_file.id),
|
||||
value_owned_by_drive=True,
|
||||
)
|
||||
)
|
||||
|
||||
self._drive.commit(
|
||||
committed_items = self._drive.commit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
agent_id=agent_id,
|
||||
@ -125,23 +112,17 @@ class SkillStandardizeService:
|
||||
file_ref=DriveFileRef(kind="tool_file", id=archive_tool_file.id),
|
||||
value_owned_by_drive=True,
|
||||
),
|
||||
*member_items,
|
||||
],
|
||||
)
|
||||
|
||||
drive_skill = next(
|
||||
skill
|
||||
for skill in self._drive.list_skills(tenant_id=tenant_id, agent_id=agent_id)
|
||||
if skill["skill_md_key"] == skill_md_key
|
||||
)
|
||||
self.last_committed_items = committed_items
|
||||
|
||||
return {
|
||||
"skill": {
|
||||
"name": drive_skill["name"],
|
||||
"description": drive_skill["description"],
|
||||
"path": drive_skill["path"],
|
||||
"skill_md_key": drive_skill["skill_md_key"],
|
||||
"archive_key": drive_skill["archive_key"],
|
||||
"name": manifest.name,
|
||||
"description": manifest.description,
|
||||
"path": slug,
|
||||
"skill_md_key": skill_md_key,
|
||||
"archive_key": archive_key,
|
||||
},
|
||||
"manifest": manifest.model_dump(),
|
||||
}
|
||||
|
||||
@ -1,13 +1,16 @@
|
||||
"""Unit tests for the Skill package validator/extractor (ENG-370)."""
|
||||
"""Unit tests for the Skill package validator/normalizer (ENG-370)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import zipfile
|
||||
import zlib
|
||||
|
||||
import pytest
|
||||
|
||||
from services.agent.skill_package_service import SkillPackageError, SkillPackageService
|
||||
from services.agent import skill_package_service as skill_package_service_module
|
||||
from services.agent.skill_package_service import NormalizedSkillPackage, SkillPackageError, SkillPackageService
|
||||
|
||||
_SKILL_MD = """---
|
||||
name: PDF Toolkit
|
||||
@ -28,12 +31,17 @@ def _zip(members: dict[str, bytes], *, compression: int = zipfile.ZIP_DEFLATED)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def _extract(members: dict[str, bytes], *, filename: str = "skill.zip"):
|
||||
return SkillPackageService().validate_and_extract(content=_zip(members), filename=filename)
|
||||
def _normalize(members: dict[str, bytes], *, filename: str = "skill.zip") -> NormalizedSkillPackage:
|
||||
return SkillPackageService().validate_and_normalize(content=_zip(members), filename=filename)
|
||||
|
||||
|
||||
def test_valid_skill_extracts_manifest():
|
||||
manifest = _extract({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"print('hi')\n"})
|
||||
def _archive_members(content: bytes) -> list[str]:
|
||||
with zipfile.ZipFile(io.BytesIO(content)) as archive:
|
||||
return sorted(info.filename for info in archive.infolist() if not info.is_dir())
|
||||
|
||||
|
||||
def test_valid_skill_normalizes_manifest():
|
||||
manifest = _normalize({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"print('hi')\n"}).manifest
|
||||
|
||||
assert manifest.name == "PDF Toolkit"
|
||||
assert manifest.description == "Tools for working with PDF files."
|
||||
@ -44,19 +52,57 @@ def test_valid_skill_extracts_manifest():
|
||||
|
||||
|
||||
def test_name_falls_back_to_heading_without_frontmatter():
|
||||
manifest = _extract({"SKILL.md": b"# Heading Name\n\nbody"})
|
||||
manifest = _normalize({"SKILL.md": b"# Heading Name\n\nbody"}).manifest
|
||||
assert manifest.name == "Heading Name"
|
||||
assert manifest.description == ""
|
||||
|
||||
|
||||
def test_nested_skill_md_is_found():
|
||||
manifest = _extract({"pdf-toolkit/SKILL.md": _SKILL_MD.encode()})
|
||||
assert manifest.entry_path == "pdf-toolkit/SKILL.md"
|
||||
|
||||
|
||||
def test_shallowest_skill_md_preferred():
|
||||
manifest = _extract({"SKILL.md": _SKILL_MD.encode(), "nested/SKILL.md": _SKILL_MD.encode()})
|
||||
def test_shallowest_skill_md_preferred_during_normalization():
|
||||
manifest = _normalize({"SKILL.md": _SKILL_MD.encode(), "nested/SKILL.md": _SKILL_MD.encode()}).manifest
|
||||
assert manifest.entry_path == "SKILL.md"
|
||||
assert manifest.files == ["SKILL.md", "nested/SKILL.md"]
|
||||
|
||||
|
||||
def test_validate_and_normalize_keeps_root_skill_unchanged():
|
||||
package = _normalize({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"print('hi')\n"})
|
||||
|
||||
assert package.manifest.entry_path == "SKILL.md"
|
||||
assert package.manifest.files == ["SKILL.md", "scripts/run.py"]
|
||||
assert package.skill_md_bytes == _SKILL_MD.encode()
|
||||
assert package.strip_prefix is None
|
||||
assert _archive_members(package.archive_bytes) == ["SKILL.md", "scripts/run.py"]
|
||||
assert len(package.manifest.hash) == 64
|
||||
|
||||
|
||||
def test_validate_and_normalize_strips_single_top_level_folder():
|
||||
package = _normalize(
|
||||
{
|
||||
"pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
"pdf-toolkit/scripts/run.py": b"print('hi')\n",
|
||||
}
|
||||
)
|
||||
|
||||
assert package.manifest.entry_path == "SKILL.md"
|
||||
assert package.manifest.files == ["SKILL.md", "scripts/run.py"]
|
||||
assert package.skill_md_bytes == _SKILL_MD.encode()
|
||||
assert package.strip_prefix == "pdf-toolkit/"
|
||||
assert _archive_members(package.archive_bytes) == ["SKILL.md", "scripts/run.py"]
|
||||
|
||||
|
||||
def test_validate_and_normalize_strips_deeper_selected_skill_root():
|
||||
members = {
|
||||
"bundle/pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
"bundle/pdf-toolkit/scripts/run.py": b"print('hi')\n",
|
||||
}
|
||||
original_upload_bytes = _zip(members)
|
||||
package = SkillPackageService().validate_and_normalize(content=original_upload_bytes, filename="skill.zip")
|
||||
|
||||
assert package.manifest.entry_path == "SKILL.md"
|
||||
assert package.manifest.files == ["SKILL.md", "scripts/run.py"]
|
||||
assert package.strip_prefix == "bundle/pdf-toolkit/"
|
||||
assert _archive_members(package.archive_bytes) == ["SKILL.md", "scripts/run.py"]
|
||||
assert package.manifest.hash == hashlib.sha256(package.archive_bytes).hexdigest()
|
||||
assert package.manifest.hash != hashlib.sha256(original_upload_bytes).hexdigest()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -71,53 +117,105 @@ def test_shallowest_skill_md_preferred():
|
||||
)
|
||||
def test_invalid_packages_rejected(members: dict[str, bytes], filename: str, code: str):
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_extract(members, filename=filename)
|
||||
_normalize(members, filename=filename)
|
||||
assert exc_info.value.code == code
|
||||
assert exc_info.value.status_code == 400
|
||||
|
||||
|
||||
def test_non_zip_content_rejected():
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
SkillPackageService().validate_and_extract(content=b"not a zip", filename="skill.zip")
|
||||
SkillPackageService().validate_and_normalize(content=b"not a zip", filename="skill.zip")
|
||||
assert exc_info.value.code == "invalid_archive"
|
||||
|
||||
|
||||
def test_zip_slip_member_rejected():
|
||||
payload = _zip({"../evil.txt": b"x", "SKILL.md": _SKILL_MD.encode()})
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
SkillPackageService().validate_and_extract(content=payload, filename="skill.zip")
|
||||
SkillPackageService().validate_and_normalize(content=payload, filename="skill.zip")
|
||||
assert exc_info.value.code == "unsafe_path"
|
||||
|
||||
|
||||
def test_empty_archive_rejected():
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
SkillPackageService().validate_and_extract(content=b"", filename="skill.zip")
|
||||
SkillPackageService().validate_and_normalize(content=b"", filename="skill.zip")
|
||||
assert exc_info.value.code == "empty_archive"
|
||||
|
||||
|
||||
def test_validate_and_normalize_rejects_skill_md_too_large(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setattr(skill_package_service_module, "_MAX_SKILL_MD_BYTES", 8)
|
||||
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize({"SKILL.md": _SKILL_MD.encode()})
|
||||
assert exc_info.value.code == "skill_md_too_large"
|
||||
|
||||
|
||||
def test_validate_and_normalize_rejects_too_many_entries(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setattr(skill_package_service_module, "_MAX_ENTRIES", 1)
|
||||
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"print('x')\n"})
|
||||
assert exc_info.value.code == "too_many_entries"
|
||||
|
||||
|
||||
def test_validate_and_normalize_rejects_archive_too_large_uncompressed(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setattr(skill_package_service_module, "_MAX_UNCOMPRESSED_BYTES", 32)
|
||||
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"x" * 33})
|
||||
assert exc_info.value.code == "archive_too_large"
|
||||
|
||||
|
||||
def test_validate_and_normalize_rejects_archive_too_large_uploaded_bytes(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setattr(skill_package_service_module, "_MAX_ARCHIVE_BYTES", 8)
|
||||
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
SkillPackageService().validate_and_normalize(content=b"x" * 9, filename="skill.zip")
|
||||
assert exc_info.value.code == "archive_too_large"
|
||||
|
||||
|
||||
def test_bad_frontmatter_yaml_rejected():
|
||||
bad = b"---\n: : : not yaml\n---\n# x\n"
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_extract({"SKILL.md": bad})
|
||||
_normalize({"SKILL.md": bad})
|
||||
assert exc_info.value.code == "invalid_frontmatter"
|
||||
|
||||
|
||||
def test_unterminated_frontmatter_falls_back_to_heading():
|
||||
# leading '---' with no closing fence -> no frontmatter, use the heading
|
||||
manifest = _extract({"SKILL.md": b"---\n# Heading Wins\nbody"})
|
||||
manifest = _normalize({"SKILL.md": b"---\n# Heading Wins\nbody"}).manifest
|
||||
assert manifest.name == "Heading Wins"
|
||||
|
||||
|
||||
def test_read_member_bytes_roundtrip_and_errors():
|
||||
service = SkillPackageService()
|
||||
payload = _zip({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"print('x')\n"})
|
||||
def test_validate_and_normalize_rejects_files_outside_selected_skill_root():
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize({"pdf-toolkit/SKILL.md": _SKILL_MD.encode(), "README.md": b"x"})
|
||||
assert exc_info.value.code == "files_outside_skill_root"
|
||||
|
||||
assert service.read_member_bytes(content=payload, member_path="scripts/run.py") == b"print('x')\n"
|
||||
|
||||
with pytest.raises(SkillPackageError) as missing:
|
||||
service.read_member_bytes(content=payload, member_path="nope.txt")
|
||||
assert missing.value.code == "member_not_found"
|
||||
def test_validate_and_normalize_rejects_duplicate_normalized_paths():
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize(
|
||||
{
|
||||
"pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
"pdf-toolkit/scripts/run.py": b"print('x')\n",
|
||||
"pdf-toolkit/scripts/./run.py": b"print('y')\n",
|
||||
}
|
||||
)
|
||||
assert exc_info.value.code == "duplicate_member_path"
|
||||
|
||||
with pytest.raises(SkillPackageError) as bad_zip:
|
||||
service.read_member_bytes(content=b"not a zip", member_path="SKILL.md")
|
||||
assert bad_zip.value.code == "invalid_archive"
|
||||
|
||||
def test_validate_and_normalize_maps_member_decompression_failures_to_invalid_archive(monkeypatch: pytest.MonkeyPatch):
|
||||
original_read = zipfile.ZipFile.read
|
||||
|
||||
def corrupted_read(self: zipfile.ZipFile, member: str | zipfile.ZipInfo, *args: object, **kwargs: object) -> bytes:
|
||||
filename = member.filename if isinstance(member, zipfile.ZipInfo) else member
|
||||
if filename == "scripts/run.py":
|
||||
raise zlib.error("invalid distance too far back")
|
||||
return original_read(self, member, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(zipfile.ZipFile, "read", corrupted_read)
|
||||
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize({"SKILL.md": _SKILL_MD.encode(), "scripts/run.py": b"print('x')\n"})
|
||||
assert exc_info.value.code == "invalid_archive"
|
||||
assert exc_info.value.message == "skill archive is not a valid zip"
|
||||
|
||||
@ -33,7 +33,7 @@ def test_slugify_skill_name():
|
||||
|
||||
|
||||
def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest():
|
||||
content = _zip({"SKILL.md": _SKILL_MD, "scripts/run.py": b"print('x')\n"})
|
||||
content = _zip({"pdf-toolkit/SKILL.md": _SKILL_MD, "pdf-toolkit/scripts/run.py": b"print('x')\n"})
|
||||
|
||||
tool_files = MagicMock()
|
||||
tool_files.create_file_by_raw.side_effect = [
|
||||
@ -42,19 +42,6 @@ def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest(
|
||||
]
|
||||
drive = MagicMock()
|
||||
drive.commit.return_value = []
|
||||
drive.list_skills.return_value = [
|
||||
{
|
||||
"path": "pdf-toolkit",
|
||||
"skill_md_key": "pdf-toolkit/SKILL.md",
|
||||
"archive_key": "pdf-toolkit/.DIFY-SKILL-FULL.zip",
|
||||
"name": "PDF Toolkit",
|
||||
"description": "Work with PDFs.",
|
||||
"size": len(_SKILL_MD),
|
||||
"mime_type": "text/markdown",
|
||||
"hash": None,
|
||||
"created_at": None,
|
||||
},
|
||||
]
|
||||
|
||||
service = SkillStandardizeService(tool_file_manager=tool_files, drive_service=drive)
|
||||
result = service.standardize(
|
||||
@ -71,7 +58,12 @@ def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest(
|
||||
assert md_call.kwargs["mimetype"] == "text/markdown"
|
||||
assert md_call.kwargs["file_binary"] == _SKILL_MD
|
||||
assert zip_call.kwargs["mimetype"] == "application/zip"
|
||||
assert zip_call.kwargs["file_binary"] == content
|
||||
assert zip_call.kwargs["file_binary"] != content
|
||||
with zipfile.ZipFile(io.BytesIO(zip_call.kwargs["file_binary"])) as archive:
|
||||
assert sorted(info.filename for info in archive.infolist() if not info.is_dir()) == [
|
||||
"SKILL.md",
|
||||
"scripts/run.py",
|
||||
]
|
||||
|
||||
# Committed as drive-owned with the standardized keys. Member paths are
|
||||
# carried in metadata for inspect/preview/runtime lazy resolution.
|
||||
@ -96,4 +88,7 @@ def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest(
|
||||
assert skill["name"] == "PDF Toolkit"
|
||||
assert skill["archive_key"] == "pdf-toolkit/.DIFY-SKILL-FULL.zip"
|
||||
assert skill["skill_md_key"] == "pdf-toolkit/SKILL.md"
|
||||
assert result["manifest"]["entry_path"] == "SKILL.md"
|
||||
assert result["manifest"]["files"] == ["SKILL.md", "scripts/run.py"]
|
||||
drive.list_skills.assert_not_called()
|
||||
assert "_committed_items" not in result
|
||||
|
||||
Loading…
Reference in New Issue
Block a user