fix(agent-v2): lazily resolve skill archive members (#37914)

This commit is contained in:
zyssyz123 2026-06-25 12:14:12 +08:00 committed by GitHub
parent 4139d0c4fb
commit 70fbbbf2c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 512 additions and 88 deletions

View File

@ -208,6 +208,30 @@ def _file_ref_for_active_soul(*, tenant_id: str, agent_id: str, key: str):
return file_ref
def _archive_member_ref_for_active_soul(
*, tenant_id: str, agent_id: str, key: str
) -> tuple[AgentDriveFileKind, str, str] | None:
agent_soul = AgentSoulFilesService.active_agent_soul(session=db.session, tenant_id=tenant_id, agent_id=agent_id)
normalized_key = key.strip().lstrip("/")
for skill in agent_soul.files.skills:
skill_path = skill.path or (
AgentSoulFilesService.skill_path_from_key(skill.skill_md_key) if skill.skill_md_key else None
)
if not skill_path or not normalized_key.startswith(f"{skill_path}/"):
continue
member_path = normalized_key.removeprefix(f"{skill_path}/")
if member_path == ".DIFY-SKILL-FULL.zip":
return None
manifest_files = {str(path).strip().lstrip("/") for path in (skill.manifest_files or [])}
if manifest_files and member_path not in manifest_files:
return None
archive_file_id = skill.full_archive_file_id
if not archive_file_id:
return None
return AgentDriveFileKind.TOOL_FILE, archive_file_id, member_path
return None
def _file_kind_from_ref(file_ref) -> AgentDriveFileKind | None:
raw = file_ref.transfer_method or ("upload_file" if file_ref.upload_file_id else None)
if raw is None:
@ -221,6 +245,17 @@ def _file_kind_from_ref(file_ref) -> AgentDriveFileKind | None:
def _preview_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]:
file_ref = _file_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
if file_ref is None:
archive_member = _archive_member_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
if archive_member is not None:
archive_file_kind, archive_file_id, member_path = archive_member
return AgentDriveService().preview_archive_member_for_ref(
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
)
return AgentDriveService().preview(tenant_id=tenant_id, agent_id=agent_id, key=key)
file_kind = _file_kind_from_ref(file_ref)
file_id = file_ref.file_id or file_ref.upload_file_id
@ -239,6 +274,17 @@ def _preview_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> dict[
def _download_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> str:
file_ref = _file_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
if file_ref is None:
archive_member = _archive_member_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
if archive_member is not None:
archive_file_kind, archive_file_id, member_path = archive_member
return AgentDriveService().download_url_archive_member_for_ref(
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
)
return AgentDriveService().download_url(tenant_id=tenant_id, agent_id=agent_id, key=key)
file_kind = _file_kind_from_ref(file_ref)
file_id = file_ref.file_id or file_ref.upload_file_id

View File

@ -14,11 +14,12 @@ api = ExternalApi(
files_ns = Namespace("files", description="File operations", path="/")
from . import image_preview, tool_files, upload
from . import agent_drive_archive, image_preview, tool_files, upload
api.add_namespace(files_ns)
__all__ = [
"agent_drive_archive",
"api",
"bp",
"files_ns",

View File

@ -0,0 +1,67 @@
from urllib.parse import quote
from flask import Response, request
from flask_restx import Resource
from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, NotFound
from controllers.common.file_response import enforce_download_for_html
from controllers.common.schema import register_schema_models
from controllers.files import files_ns
from models.agent import AgentDriveFileKind
from services.agent_drive_service import AgentDriveError, AgentDriveService
class AgentDriveArchiveMemberQuery(BaseModel):
tenant_id: str = Field(..., description="Tenant ID")
agent_id: str = Field(..., description="Agent ID")
key: str = Field(..., description="Virtual drive key")
archive_file_kind: AgentDriveFileKind = Field(..., description="Archive file kind")
archive_file_id: str = Field(..., description="Archive file id")
member_path: str = Field(..., description="Zip member path")
timestamp: str = Field(..., description="Unix timestamp")
nonce: str = Field(..., description="Random nonce")
sign: str = Field(..., description="HMAC signature")
as_attachment: bool = Field(default=False, description="Download as attachment")
register_schema_models(files_ns, AgentDriveArchiveMemberQuery)
@files_ns.route("/agent-drive/archive-member")
class AgentDriveArchiveMemberApi(Resource):
@files_ns.doc("get_agent_drive_archive_member")
@files_ns.doc(description="Download a lazily resolved Agent Skill archive member by signed parameters")
def get(self):
args = AgentDriveArchiveMemberQuery.model_validate(request.args.to_dict(flat=True))
if not AgentDriveService.verify_archive_member_signature(
tenant_id=args.tenant_id,
agent_id=args.agent_id,
key=args.key,
archive_file_kind=args.archive_file_kind,
archive_file_id=args.archive_file_id,
member_path=args.member_path,
timestamp=args.timestamp,
nonce=args.nonce,
sign=args.sign,
):
raise Forbidden("Invalid request.")
try:
payload, mime_type, filename = AgentDriveService().load_archive_member_for_signed_request(
tenant_id=args.tenant_id,
agent_id=args.agent_id,
key=args.key,
archive_file_kind=args.archive_file_kind,
archive_file_id=args.archive_file_id,
member_path=args.member_path,
)
except AgentDriveError as exc:
raise NotFound(exc.message) from exc
response = Response(payload, mimetype=mime_type, direct_passthrough=True, headers={})
response.headers["Content-Length"] = str(len(payload))
if args.as_attachment and filename:
encoded_filename = quote(filename)
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
enforce_download_for_html(response, mime_type=mime_type, filename=filename, extension="")
return response

View File

@ -7,19 +7,14 @@ to the agent drive (Agent Files §5.4 / §4):
* ``<slug>/.DIFY-SKILL-FULL.zip`` the full archive, kept only to restore the
complete skill contents.
Both are stored as ``ToolFile`` records and bound via ``AgentDriveService.commit``
with ``value_owned_by_drive=True`` (the drive owns their lifecycle). The returned
payload is the slim drive-derived skill DTO the UI needs to work with the drive
catalog ``name``, ``description``, ``path``, ``skill_md_key``, and
``archive_key`` plus the extracted manifest for upload feedback. The console
``/skills/upload`` endpoints delegate to this service so "upload" now always means
drive-backed skill normalization rather than Agent Soul binding.
The archive's member list is stored in skill metadata and resolved lazily for
inspect/preview/runtime. Upload must not eagerly materialize every archive member
as a separate ToolFile; small archives with many files would otherwise perform
hundreds of storage writes and DB commits inside the request.
"""
from __future__ import annotations
import mimetypes
import posixpath
import re
from typing import Any
@ -65,8 +60,8 @@ class SkillStandardizeService:
skill_md_bytes = self._package.read_member_bytes(content=content, member_path=manifest.entry_path)
slug = slugify_skill_name(manifest.name)
# Drive-owned files: canonical SKILL.md, every inspectable archive file,
# and the full archive for future restore/export.
# Drive-owned files: canonical SKILL.md and the full archive. The
# archive member tree is preserved in metadata and resolved lazily.
md_tool_file = self._tool_files.create_file_by_raw(
user_id=user_id,
tenant_id=tenant_id,
@ -86,30 +81,6 @@ class SkillStandardizeService:
skill_md_key = f"{slug}/{_SKILL_MD_NAME}"
archive_key = f"{slug}/{_FULL_ARCHIVE_NAME}"
member_items: list[DriveCommitItem] = []
for member_path in sorted(set(manifest.files)):
member_key = f"{slug}/{member_path}"
if member_key in {skill_md_key, archive_key}:
continue
member_bytes = self._package.read_member_bytes(content=content, member_path=member_path)
mimetype = mimetypes.guess_type(member_path)[0] or "application/octet-stream"
member_tool_file = self._tool_files.create_file_by_raw(
user_id=user_id,
tenant_id=tenant_id,
conversation_id=None,
file_binary=member_bytes,
mimetype=mimetype,
filename=posixpath.basename(member_path),
)
member_items.append(
DriveCommitItem(
key=member_key,
file_ref=DriveFileRef(kind="tool_file", id=member_tool_file.id),
value_owned_by_drive=True,
)
)
committed_items = self._drive.commit(
tenant_id=tenant_id,
user_id=user_id,
@ -131,7 +102,6 @@ class SkillStandardizeService:
file_ref=DriveFileRef(kind="tool_file", id=archive_tool_file.id),
value_owned_by_drive=True,
),
*member_items,
],
)
self.last_committed_items = committed_items

View File

@ -19,10 +19,18 @@ ToolFile records (see ``AgentDriveFile``). This service is the control plane:
from __future__ import annotations
import base64
import hashlib
import hmac
import io
import json
import logging
import mimetypes
import os
import re
import time
import urllib.parse
import zipfile
from typing import Any, Literal, TypedDict
from urllib.parse import unquote
@ -31,6 +39,7 @@ from sqlalchemy import func, select
from sqlalchemy.exc import DataError, SQLAlchemyError
from sqlalchemy.orm import Session
from configs import dify_config
from core.app.file_access.controller import DatabaseFileAccessController
from core.db.session_factory import session_factory
from extensions.ext_storage import storage
@ -46,6 +55,7 @@ _MAX_KEY_LENGTH = 512
_DRIVE_REF_PREFIX = "agent-"
_SKILL_MD_SUFFIX = "/SKILL.md"
_SKILL_ARCHIVE_NAME = ".DIFY-SKILL-FULL.zip"
_ARCHIVE_MEMBER_DOWNLOAD_PURPOSE = "agent-drive-archive-member"
class AgentDriveError(Exception):
@ -365,6 +375,7 @@ class AgentDriveService:
skill_md_key=skill_md_key,
manifest_files=manifest_files,
drive_keys=drive_keys,
archive_available=catalog["archive_key"] in drive_keys if catalog["archive_key"] else False,
)
return {
**catalog,
@ -598,6 +609,7 @@ class AgentDriveService:
skill_md_key: str,
manifest_files: list[str] | None,
drive_keys: set[str],
archive_available: bool = False,
) -> tuple[list[AgentDriveSkillFileInfo], list[str]]:
warnings: list[str] = []
if manifest_files:
@ -617,13 +629,14 @@ class AgentDriveService:
if path == _SKILL_ARCHIVE_NAME:
continue
drive_key = f"{skill_path}/{path}"
available_in_drive = drive_key in drive_keys or (archive_available and path != _SKILL_ARCHIVE_NAME)
files.append(
{
"path": path,
"name": path.rsplit("/", 1)[-1],
"type": "file",
"drive_key": drive_key if drive_key in drive_keys else None,
"available_in_drive": drive_key in drive_keys,
"drive_key": drive_key if available_in_drive else None,
"available_in_drive": available_in_drive,
}
)
if "SKILL.md" not in {file["path"] for file in files}:
@ -891,36 +904,138 @@ class AgentDriveService:
raise AgentDriveError("drive_key_not_found", "drive value record is missing", status_code=404)
return upload_file.key
def preview(self, *, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]:
"""Truncated text preview of one drive value (binary-safe, never 500s on size)."""
with session_factory.create_session() as session:
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
storage_key = self._storage_key_for_row(session, tenant_id=tenant_id, row=row)
size = row.size
def _archive_member_for_key(
self,
session: Session,
*,
tenant_id: str,
agent_id: str,
key: str,
) -> tuple[AgentDriveFile, str]:
normalized_key = normalize_drive_key(key)
if "/" not in normalized_key:
raise AgentDriveError("drive_key_not_found", "no drive entry for this key", status_code=404)
skill_path, member_path = normalized_key.split("/", 1)
if member_path in {_SKILL_ARCHIVE_NAME, ""}:
raise AgentDriveError("drive_key_not_found", "no archive member for this key", status_code=404)
data = bytearray()
for chunk in storage.load_stream(storage_key):
data.extend(chunk)
if len(data) > self.PREVIEW_MAX_BYTES:
break
truncated = len(data) > self.PREVIEW_MAX_BYTES
sample = bytes(data[: self.PREVIEW_MAX_BYTES])
# Same semantics as the sandbox read endpoint: NUL or undecodable -> binary.
skill_md_key = f"{skill_path}{_SKILL_MD_SUFFIX}"
skill_row = session.scalar(
select(AgentDriveFile).where(
AgentDriveFile.tenant_id == tenant_id,
AgentDriveFile.agent_id == agent_id,
AgentDriveFile.key == skill_md_key,
AgentDriveFile.is_skill.is_(True),
)
)
if skill_row is None:
raise AgentDriveError("drive_key_not_found", "no drive entry for this key", status_code=404)
metadata = self._parse_skill_metadata(skill_row.key, skill_row.skill_metadata)
manifest_files = {normalize_drive_key(path) for path in (metadata.manifest_files or [])}
if member_path not in manifest_files:
raise AgentDriveError("drive_key_not_found", "archive member is not part of this skill", status_code=404)
archive_row = session.scalar(
select(AgentDriveFile).where(
AgentDriveFile.tenant_id == tenant_id,
AgentDriveFile.agent_id == agent_id,
AgentDriveFile.key == self._skill_archive_key(skill_md_key),
)
)
if archive_row is None:
raise AgentDriveError("drive_key_not_found", "skill archive is missing", status_code=404)
return archive_row, member_path
def _load_archive_member_bytes(
self,
*,
tenant_id: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
) -> bytes:
member_path = normalize_drive_key(member_path)
with session_factory.create_session() as session:
storage_key = self._storage_key_for_ref(
session,
tenant_id=tenant_id,
file_kind=archive_file_kind,
file_id=archive_file_id,
)
archive_bytes = b"".join(storage.load_stream(storage_key))
try:
with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive:
member = next(
(
info
for info in archive.infolist()
if not info.is_dir() and normalize_drive_key(info.filename) == member_path
),
None,
)
if member is None:
raise AgentDriveError(
"drive_key_not_found", "archive member is missing from the skill archive", status_code=404
)
return archive.read(member)
except zipfile.BadZipFile as exc:
raise AgentDriveError("invalid_skill_archive", "skill archive is not a valid zip", status_code=500) from exc
@classmethod
def _preview_bytes(cls, *, key: str, size: int | None, payload: bytes) -> dict[str, Any]:
truncated = len(payload) > cls.PREVIEW_MAX_BYTES
sample = payload[: cls.PREVIEW_MAX_BYTES]
if b"\x00" in sample:
return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None}
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
try:
text = sample.decode("utf-8")
except UnicodeDecodeError:
if truncated:
# A multi-byte char may sit on the cut point; retry without the tail.
try:
text = sample[:-3].decode("utf-8", errors="strict")
except UnicodeDecodeError:
return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None}
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
else:
return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None}
return {"key": row.key, "size": size, "truncated": truncated, "binary": False, "text": text}
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
return {"key": key, "size": size, "truncated": truncated, "binary": False, "text": text}
def preview(self, *, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]:
"""Truncated text preview of one drive value (binary-safe, never 500s on size)."""
with session_factory.create_session() as session:
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
try:
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
storage_key = self._storage_key_for_row(session, tenant_id=tenant_id, row=row)
size = row.size
response_key = row.key
archive_ref: tuple[AgentDriveFile, str] | None = None
except AgentDriveError:
archive_ref = self._archive_member_for_key(
session,
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
)
storage_key = None
size = None
response_key = normalize_drive_key(key)
if archive_ref is not None:
archive_row, member_path = archive_ref
payload = self._load_archive_member_bytes(
tenant_id=tenant_id,
archive_file_kind=archive_row.file_kind,
archive_file_id=archive_row.file_id,
member_path=member_path,
)
return self._preview_bytes(key=response_key, size=len(payload), payload=payload)
data = bytearray()
assert storage_key is not None
for chunk in storage.load_stream(storage_key):
data.extend(chunk)
if len(data) > self.PREVIEW_MAX_BYTES:
break
return self._preview_bytes(key=response_key, size=size, payload=bytes(data))
def preview_file_ref(
self,
@ -947,27 +1062,51 @@ class AgentDriveService:
data.extend(chunk)
if len(data) > self.PREVIEW_MAX_BYTES:
break
truncated = len(data) > self.PREVIEW_MAX_BYTES
sample = bytes(data[: self.PREVIEW_MAX_BYTES])
if b"\x00" in sample:
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
try:
text = sample.decode("utf-8")
except UnicodeDecodeError:
if truncated:
try:
text = sample[:-3].decode("utf-8", errors="strict")
except UnicodeDecodeError:
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
else:
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
return {"key": key, "size": size, "truncated": truncated, "binary": False, "text": text}
return self._preview_bytes(key=key, size=size, payload=bytes(data))
def preview_archive_member_for_ref(
self,
*,
tenant_id: str,
agent_id: str,
key: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
) -> dict[str, Any]:
with session_factory.create_session() as session:
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
payload = self._load_archive_member_bytes(
tenant_id=tenant_id,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
)
return self._preview_bytes(key=normalize_drive_key(key), size=len(payload), payload=payload)
def download_url(self, *, tenant_id: str, agent_id: str, key: str) -> str:
"""External signed URL for a browser download of one drive value."""
with session_factory.create_session() as session:
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
try:
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
except AgentDriveError:
archive_row, member_path = self._archive_member_for_key(
session,
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
)
return self.sign_archive_member_url(
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
archive_file_kind=archive_row.file_kind,
archive_file_id=archive_row.file_id,
member_path=member_path,
for_external=True,
as_attachment=True,
)
url = self._resolve_download_url(
tenant_id=tenant_id,
file_kind=row.file_kind,
@ -979,6 +1118,30 @@ class AgentDriveService:
raise AgentDriveError("drive_key_not_found", "drive value cannot be resolved", status_code=404)
return url
def download_url_archive_member_for_ref(
self,
*,
tenant_id: str,
agent_id: str,
key: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
for_external: bool = True,
) -> str:
with session_factory.create_session() as session:
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
return self.sign_archive_member_url(
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
for_external=for_external,
as_attachment=True,
)
def download_url_for_ref(
self,
*,
@ -1000,6 +1163,135 @@ class AgentDriveService:
raise AgentDriveError("drive_key_not_found", "drive value cannot be resolved", status_code=404)
return url
@staticmethod
def _secret_key() -> bytes:
return dify_config.SECRET_KEY.encode()
@classmethod
def _archive_member_signature_payload(
cls,
*,
tenant_id: str,
agent_id: str,
key: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
timestamp: str,
nonce: str,
) -> str:
return "|".join(
[
_ARCHIVE_MEMBER_DOWNLOAD_PURPOSE,
tenant_id,
agent_id,
normalize_drive_key(key),
archive_file_kind.value,
archive_file_id,
normalize_drive_key(member_path),
timestamp,
nonce,
]
)
@classmethod
def _sign_archive_member_payload(cls, payload: str) -> str:
digest = hmac.new(cls._secret_key(), payload.encode(), hashlib.sha256).digest()
return base64.urlsafe_b64encode(digest).decode()
@classmethod
def sign_archive_member_url(
cls,
*,
tenant_id: str,
agent_id: str,
key: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
for_external: bool,
as_attachment: bool = False,
) -> str:
base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL)
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
payload = cls._archive_member_signature_payload(
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
timestamp=timestamp,
nonce=nonce,
)
query = urllib.parse.urlencode(
{
"tenant_id": tenant_id,
"agent_id": agent_id,
"key": normalize_drive_key(key),
"archive_file_kind": archive_file_kind.value,
"archive_file_id": archive_file_id,
"member_path": normalize_drive_key(member_path),
"timestamp": timestamp,
"nonce": nonce,
"sign": cls._sign_archive_member_payload(payload),
"as_attachment": str(as_attachment).lower(),
}
)
return f"{base_url}/files/agent-drive/archive-member?{query}"
@classmethod
def verify_archive_member_signature(
cls,
*,
tenant_id: str,
agent_id: str,
key: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
timestamp: str,
nonce: str,
sign: str,
) -> bool:
payload = cls._archive_member_signature_payload(
tenant_id=tenant_id,
agent_id=agent_id,
key=key,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
timestamp=timestamp,
nonce=nonce,
)
if sign != cls._sign_archive_member_payload(payload):
return False
current_time = int(time.time())
return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
def load_archive_member_for_signed_request(
self,
*,
tenant_id: str,
agent_id: str,
key: str,
archive_file_kind: AgentDriveFileKind,
archive_file_id: str,
member_path: str,
) -> tuple[bytes, str, str]:
with session_factory.create_session() as session:
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
payload = self._load_archive_member_bytes(
tenant_id=tenant_id,
archive_file_kind=archive_file_kind,
archive_file_id=archive_file_id,
member_path=member_path,
)
mime_type = mimetypes.guess_type(member_path)[0] or "application/octet-stream"
filename = normalize_drive_key(key).rsplit("/", 1)[-1]
return payload, mime_type, filename
__all__ = [
"AgentDriveError",

View File

@ -32,14 +32,13 @@ def test_slugify_skill_name():
assert slugify_skill_name("") == "skill"
def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_members():
def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest():
content = _zip({"SKILL.md": _SKILL_MD, "scripts/run.py": b"print('x')\n"})
tool_files = MagicMock()
tool_files.create_file_by_raw.side_effect = [
SimpleNamespace(id="md-tool-file"),
SimpleNamespace(id="zip-tool-file"),
SimpleNamespace(id="script-tool-file"),
]
drive = MagicMock()
drive.commit.return_value = []
@ -67,34 +66,30 @@ def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_members()
)
assert service.last_committed_items == []
# ToolFiles: SKILL.md, full archive, and each inspectable package member.
assert tool_files.create_file_by_raw.call_count == 3
md_call, zip_call, script_call = tool_files.create_file_by_raw.call_args_list
# ToolFiles: SKILL.md and the full archive. Archive members stay lazy.
assert tool_files.create_file_by_raw.call_count == 2
md_call, zip_call = tool_files.create_file_by_raw.call_args_list
assert md_call.kwargs["mimetype"] == "text/markdown"
assert md_call.kwargs["file_binary"] == _SKILL_MD
assert zip_call.kwargs["mimetype"] == "application/zip"
assert zip_call.kwargs["file_binary"] == content
assert script_call.kwargs["mimetype"] in {"text/x-python", "text/plain", "application/octet-stream"}
assert script_call.kwargs["file_binary"] == b"print('x')\n"
assert script_call.kwargs["filename"] == "run.py"
# Committed as drive-owned with the standardized keys.
# Committed as drive-owned with the standardized keys. Member paths are
# carried in metadata for inspect/preview/runtime lazy resolution.
commit_kwargs = drive.commit.call_args.kwargs
assert commit_kwargs["agent_id"] == "agent-1"
items = commit_kwargs["items"]
assert [item.key for item in items] == [
"pdf-toolkit/SKILL.md",
"pdf-toolkit/.DIFY-SKILL-FULL.zip",
"pdf-toolkit/scripts/run.py",
]
assert all(item.value_owned_by_drive for item in items)
assert [item.file_ref.id for item in items] == ["md-tool-file", "zip-tool-file", "script-tool-file"]
assert [item.file_ref.id for item in items] == ["md-tool-file", "zip-tool-file"]
assert items[0].is_skill is True
assert items[0].skill_metadata is not None
assert items[0].skill_metadata.name == "PDF Toolkit"
assert items[0].skill_metadata.manifest_files == ["SKILL.md", "scripts/run.py"]
assert items[1].is_skill is False
assert items[2].is_skill is False
# The returned upload response carries only the drive-derived fields the UI needs.
skill = result["skill"]

View File

@ -7,6 +7,8 @@ exercised against the project's in-memory SQLite engine with seeded ToolFiles.
from __future__ import annotations
import datetime
import io
import zipfile
from collections.abc import Generator
from unittest.mock import patch
@ -103,6 +105,14 @@ def _seed_tool_file(*, user_id: str = USER, name: str = "f.txt") -> str:
return tool_file.id
def _zip_bytes(members: dict[str, bytes]) -> bytes:
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as archive:
for name, data in members.items():
archive.writestr(name, data)
return buffer.getvalue()
def _commit(key: str, tool_file_id: str, *, owned: bool = True):
return AgentDriveService().commit(
tenant_id=TENANT,
@ -769,7 +779,8 @@ def test_inspect_skill_returns_manifest_files_and_file_tree():
assert result["warnings"] == []
assert [file["path"] for file in result["files"]] == ["SKILL.md", "references/guide.md", "scripts/run.py"]
assert result["files"][0]["available_in_drive"] is True
assert result["files"][1]["available_in_drive"] is False
assert result["files"][1]["available_in_drive"] is True
assert result["files"][1]["drive_key"] == "pdf-toolkit/references/guide.md"
assert result["file_tree"][0]["name"] == "references"
assert result["file_tree"][1]["name"] == "scripts"
assert result["file_tree"][2]["name"] == "SKILL.md"
@ -787,6 +798,48 @@ def test_inspect_skill_falls_back_to_drive_keys_when_manifest_missing():
assert [file["path"] for file in result["files"]] == ["SKILL.md"]
def test_preview_skill_archive_member_from_manifest_without_drive_row():
_commit_skill(manifest_files=["SKILL.md", "references/guide.md"])
archive = _zip_bytes({"SKILL.md": b"# PDF Toolkit\n", "references/guide.md": b"Guide content\n"})
with patch("services.agent_drive_service.storage") as storage_mock:
storage_mock.load_stream.return_value = iter([archive])
result = AgentDriveService().preview(
tenant_id=TENANT,
agent_id=AGENT,
key="pdf-toolkit/references/guide.md",
)
assert result == {
"key": "pdf-toolkit/references/guide.md",
"size": len(b"Guide content\n"),
"truncated": False,
"binary": False,
"text": "Guide content\n",
}
def test_download_url_signs_skill_archive_member_from_manifest_without_drive_row():
_commit_skill(manifest_files=["SKILL.md", "references/guide.md"])
with patch.object(
AgentDriveService,
"sign_archive_member_url",
return_value="https://signed.example/member",
) as sign:
url = AgentDriveService().download_url(
tenant_id=TENANT,
agent_id=AGENT,
key="pdf-toolkit/references/guide.md",
)
assert url == "https://signed.example/member"
kwargs = sign.call_args.kwargs
assert kwargs["key"] == "pdf-toolkit/references/guide.md"
assert kwargs["member_path"] == "references/guide.md"
assert kwargs["for_external"] is True
def test_skill_metadata_rejects_non_canonical_rows():
tf = _seed_tool_file(name="not-skill.md")
with pytest.raises(AgentDriveError) as exc_info: