mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 23:01:11 +08:00
Relax skill package top-level normalization
This commit is contained in:
parent
c55629c84d
commit
b9c6ad82f9
@ -76,17 +76,28 @@ class SkillPackageService:
|
||||
def validate_and_normalize(self, *, content: bytes, filename: str) -> NormalizedSkillPackage:
|
||||
"""Return the canonical drive package for an uploaded skill archive.
|
||||
|
||||
The shallowest ``SKILL.md`` defines the skill root. The returned manifest
|
||||
is normalized to archive-root ``SKILL.md`` and its hash describes the
|
||||
rebuilt archive bytes. Member read/decompression failures while consuming
|
||||
the archive are mapped to ``invalid_archive``.
|
||||
The shallowest ``SKILL.md`` defines the skill root. When exactly one
|
||||
depth-2 ``<folder>/SKILL.md`` exists, normalization strips that top-level
|
||||
folder and silently discards all members outside it, including nested
|
||||
foreign paths. When that unique depth-2 condition does not apply, files
|
||||
outside the selected skill root still raise ``files_outside_skill_root``.
|
||||
The returned manifest is normalized to archive-root ``SKILL.md`` and its
|
||||
hash describes the rebuilt archive bytes. Member read/decompression
|
||||
failures while consuming the archive are mapped to ``invalid_archive``.
|
||||
"""
|
||||
archive = self._open_archive(content=content, filename=filename)
|
||||
with archive:
|
||||
members, total_uncompressed = self._collect_file_members(archive)
|
||||
entry_path = self._find_skill_md([safe_path for _, safe_path in members])
|
||||
members = self._collect_file_members(archive)
|
||||
member_paths = [safe_path for _, safe_path in members]
|
||||
entry_path = self._find_skill_md(member_paths)
|
||||
strip_prefix = self._skill_root_prefix(entry_path)
|
||||
normalized_members = self._normalize_members(members=members, skill_root_prefix=strip_prefix)
|
||||
normalized_members = self._normalize_members(
|
||||
members=members,
|
||||
skill_root_prefix=strip_prefix,
|
||||
ignore_outside_selected_root=self._can_strip_single_top_level_folder(
|
||||
paths=member_paths, entry_path=entry_path
|
||||
),
|
||||
)
|
||||
skill_md_member = normalized_members[_SKILL_MD_NAME]
|
||||
self._validate_skill_md_size(skill_md_member)
|
||||
skill_md_bytes = self._read_member_bytes_from_archive(archive, member_info=skill_md_member)
|
||||
@ -94,6 +105,7 @@ class SkillPackageService:
|
||||
normalized_archive_bytes = self._build_normalized_archive(
|
||||
archive=archive, normalized_members=normalized_members
|
||||
)
|
||||
normalized_size = sum(max(info.file_size, 0) for info in normalized_members.values())
|
||||
|
||||
name, description = self._parse_skill_md(skill_md)
|
||||
manifest = SkillManifest(
|
||||
@ -101,7 +113,7 @@ class SkillPackageService:
|
||||
description=description,
|
||||
entry_path=_SKILL_MD_NAME,
|
||||
files=sorted(normalized_members),
|
||||
size=total_uncompressed,
|
||||
size=normalized_size,
|
||||
hash=hashlib.sha256(normalized_archive_bytes).hexdigest(),
|
||||
)
|
||||
return NormalizedSkillPackage(
|
||||
@ -123,7 +135,7 @@ class SkillPackageService:
|
||||
except zipfile.BadZipFile as exc:
|
||||
raise SkillPackageError("invalid_archive", "skill archive is not a valid zip", status_code=400) from exc
|
||||
|
||||
def _collect_file_members(self, archive: zipfile.ZipFile) -> tuple[list[tuple[zipfile.ZipInfo, str]], int]:
|
||||
def _collect_file_members(self, archive: zipfile.ZipFile) -> list[tuple[zipfile.ZipInfo, str]]:
|
||||
infos = [info for info in archive.infolist() if not info.is_dir()]
|
||||
if len(infos) > _MAX_ENTRIES:
|
||||
raise SkillPackageError("too_many_entries", "skill archive has too many files", status_code=400)
|
||||
@ -139,7 +151,7 @@ class SkillPackageService:
|
||||
"skill archive uncompressed size exceeds limit",
|
||||
status_code=400,
|
||||
)
|
||||
return members, total_uncompressed
|
||||
return members
|
||||
|
||||
@staticmethod
|
||||
def _skill_root_prefix(entry_path: str) -> str | None:
|
||||
@ -153,11 +165,14 @@ class SkillPackageService:
|
||||
*,
|
||||
members: list[tuple[zipfile.ZipInfo, str]],
|
||||
skill_root_prefix: str | None,
|
||||
ignore_outside_selected_root: bool = False,
|
||||
) -> dict[str, zipfile.ZipInfo]:
|
||||
normalized_members: dict[str, zipfile.ZipInfo] = {}
|
||||
for info, safe_path in members:
|
||||
if skill_root_prefix is not None:
|
||||
if not safe_path.startswith(skill_root_prefix):
|
||||
if ignore_outside_selected_root:
|
||||
continue
|
||||
raise SkillPackageError(
|
||||
"files_outside_skill_root",
|
||||
"skill archive contains files outside the selected skill root",
|
||||
@ -229,6 +244,13 @@ class SkillPackageService:
|
||||
# Prefer the shallowest SKILL.md (skill root).
|
||||
return min(candidates, key=lambda p: (p.count("/"), len(p)))
|
||||
|
||||
@staticmethod
|
||||
def _can_strip_single_top_level_folder(*, paths: list[str], entry_path: str) -> bool:
|
||||
if entry_path.count("/") != 1:
|
||||
return False
|
||||
candidates = [path for path in paths if path.count("/") == 1 and posixpath.basename(path) == _SKILL_MD_NAME]
|
||||
return len(candidates) == 1 and candidates[0] == entry_path
|
||||
|
||||
@staticmethod
|
||||
def _read_member_bytes_from_archive(archive: zipfile.ZipFile, *, member_info: zipfile.ZipInfo) -> bytes:
|
||||
try:
|
||||
|
||||
@ -89,6 +89,50 @@ def test_validate_and_normalize_strips_single_top_level_folder():
|
||||
assert _archive_members(package.archive_bytes) == ["SKILL.md", "scripts/run.py"]
|
||||
|
||||
|
||||
def test_validate_and_normalize_strips_single_top_level_folder_ignoring_other_root_entries():
|
||||
package = _normalize(
|
||||
{
|
||||
"pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
"pdf-toolkit/scripts/run.py": b"print('hi')\n",
|
||||
"README.md": b"bundle notes\n",
|
||||
}
|
||||
)
|
||||
|
||||
assert package.manifest.entry_path == "SKILL.md"
|
||||
assert package.manifest.files == ["SKILL.md", "scripts/run.py"]
|
||||
assert package.skill_md_bytes == _SKILL_MD.encode()
|
||||
assert package.strip_prefix == "pdf-toolkit/"
|
||||
assert _archive_members(package.archive_bytes) == ["SKILL.md", "scripts/run.py"]
|
||||
|
||||
|
||||
def test_validate_and_normalize_strips_single_top_level_folder_dropping_nested_foreign_paths():
|
||||
package = _normalize(
|
||||
{
|
||||
"pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
"pdf-toolkit/scripts/run.py": b"print('hi')\n",
|
||||
"bundle/other.txt": b"x",
|
||||
}
|
||||
)
|
||||
|
||||
assert package.manifest.entry_path == "SKILL.md"
|
||||
assert package.manifest.files == ["SKILL.md", "scripts/run.py"]
|
||||
assert package.skill_md_bytes == _SKILL_MD.encode()
|
||||
assert package.strip_prefix == "pdf-toolkit/"
|
||||
assert _archive_members(package.archive_bytes) == ["SKILL.md", "scripts/run.py"]
|
||||
|
||||
|
||||
def test_validate_and_normalize_rejects_multiple_depth_2_skill_roots_with_sibling_skill_tree():
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize(
|
||||
{
|
||||
"pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
"pdf-toolkit/scripts/run.py": b"print('hi')\n",
|
||||
"other-tool/SKILL.md": _SKILL_MD.encode(),
|
||||
}
|
||||
)
|
||||
assert exc_info.value.code == "files_outside_skill_root"
|
||||
|
||||
|
||||
def test_validate_and_normalize_strips_deeper_selected_skill_root():
|
||||
members = {
|
||||
"bundle/pdf-toolkit/SKILL.md": _SKILL_MD.encode(),
|
||||
@ -188,7 +232,7 @@ def test_unterminated_frontmatter_falls_back_to_heading():
|
||||
|
||||
def test_validate_and_normalize_rejects_files_outside_selected_skill_root():
|
||||
with pytest.raises(SkillPackageError) as exc_info:
|
||||
_normalize({"pdf-toolkit/SKILL.md": _SKILL_MD.encode(), "README.md": b"x"})
|
||||
_normalize({"bundle/pdf-toolkit/SKILL.md": _SKILL_MD.encode(), "README.md": b"x"})
|
||||
assert exc_info.value.code == "files_outside_skill_root"
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user