From 70fbbbf2c549e76026347891ee387999bb902809 Mon Sep 17 00:00:00 2001 From: zyssyz123 <916125788@qq.com> Date: Thu, 25 Jun 2026 12:14:12 +0800 Subject: [PATCH] fix(agent-v2): lazily resolve skill archive members (#37914) --- .../console/app/agent_drive_inspector.py | 46 +++ api/controllers/files/__init__.py | 3 +- api/controllers/files/agent_drive_archive.py | 67 ++++ .../agent/skill_standardize_service.py | 42 +- api/services/agent_drive_service.py | 368 ++++++++++++++++-- .../agent/test_skill_standardize_service.py | 19 +- .../services/test_agent_drive_service.py | 55 ++- 7 files changed, 512 insertions(+), 88 deletions(-) create mode 100644 api/controllers/files/agent_drive_archive.py diff --git a/api/controllers/console/app/agent_drive_inspector.py b/api/controllers/console/app/agent_drive_inspector.py index 1ae4addffd5..dc8a2d27d26 100644 --- a/api/controllers/console/app/agent_drive_inspector.py +++ b/api/controllers/console/app/agent_drive_inspector.py @@ -208,6 +208,30 @@ def _file_ref_for_active_soul(*, tenant_id: str, agent_id: str, key: str): return file_ref +def _archive_member_ref_for_active_soul( + *, tenant_id: str, agent_id: str, key: str +) -> tuple[AgentDriveFileKind, str, str] | None: + agent_soul = AgentSoulFilesService.active_agent_soul(session=db.session, tenant_id=tenant_id, agent_id=agent_id) + normalized_key = key.strip().lstrip("/") + for skill in agent_soul.files.skills: + skill_path = skill.path or ( + AgentSoulFilesService.skill_path_from_key(skill.skill_md_key) if skill.skill_md_key else None + ) + if not skill_path or not normalized_key.startswith(f"{skill_path}/"): + continue + member_path = normalized_key.removeprefix(f"{skill_path}/") + if member_path == ".DIFY-SKILL-FULL.zip": + return None + manifest_files = {str(path).strip().lstrip("/") for path in (skill.manifest_files or [])} + if manifest_files and member_path not in manifest_files: + return None + archive_file_id = skill.full_archive_file_id + if not archive_file_id: + return None + return AgentDriveFileKind.TOOL_FILE, archive_file_id, member_path + return None + + def _file_kind_from_ref(file_ref) -> AgentDriveFileKind | None: raw = file_ref.transfer_method or ("upload_file" if file_ref.upload_file_id else None) if raw is None: @@ -221,6 +245,17 @@ def _file_kind_from_ref(file_ref) -> AgentDriveFileKind | None: def _preview_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]: file_ref = _file_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key) if file_ref is None: + archive_member = _archive_member_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key) + if archive_member is not None: + archive_file_kind, archive_file_id, member_path = archive_member + return AgentDriveService().preview_archive_member_for_ref( + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + ) return AgentDriveService().preview(tenant_id=tenant_id, agent_id=agent_id, key=key) file_kind = _file_kind_from_ref(file_ref) file_id = file_ref.file_id or file_ref.upload_file_id @@ -239,6 +274,17 @@ def _preview_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> dict[ def _download_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> str: file_ref = _file_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key) if file_ref is None: + archive_member = _archive_member_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key) + if archive_member is not None: + archive_file_kind, archive_file_id, member_path = archive_member + return AgentDriveService().download_url_archive_member_for_ref( + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + ) return AgentDriveService().download_url(tenant_id=tenant_id, agent_id=agent_id, key=key) file_kind = _file_kind_from_ref(file_ref) file_id = file_ref.file_id or file_ref.upload_file_id diff --git a/api/controllers/files/__init__.py b/api/controllers/files/__init__.py index f8976b86b9f..5d26308e430 100644 --- a/api/controllers/files/__init__.py +++ b/api/controllers/files/__init__.py @@ -14,11 +14,12 @@ api = ExternalApi( files_ns = Namespace("files", description="File operations", path="/") -from . import image_preview, tool_files, upload +from . import agent_drive_archive, image_preview, tool_files, upload api.add_namespace(files_ns) __all__ = [ + "agent_drive_archive", "api", "bp", "files_ns", diff --git a/api/controllers/files/agent_drive_archive.py b/api/controllers/files/agent_drive_archive.py new file mode 100644 index 00000000000..afa6ac79483 --- /dev/null +++ b/api/controllers/files/agent_drive_archive.py @@ -0,0 +1,67 @@ +from urllib.parse import quote + +from flask import Response, request +from flask_restx import Resource +from pydantic import BaseModel, Field +from werkzeug.exceptions import Forbidden, NotFound + +from controllers.common.file_response import enforce_download_for_html +from controllers.common.schema import register_schema_models +from controllers.files import files_ns +from models.agent import AgentDriveFileKind +from services.agent_drive_service import AgentDriveError, AgentDriveService + + +class AgentDriveArchiveMemberQuery(BaseModel): + tenant_id: str = Field(..., description="Tenant ID") + agent_id: str = Field(..., description="Agent ID") + key: str = Field(..., description="Virtual drive key") + archive_file_kind: AgentDriveFileKind = Field(..., description="Archive file kind") + archive_file_id: str = Field(..., description="Archive file id") + member_path: str = Field(..., description="Zip member path") + timestamp: str = Field(..., description="Unix timestamp") + nonce: str = Field(..., description="Random nonce") + sign: str = Field(..., description="HMAC signature") + as_attachment: bool = Field(default=False, description="Download as attachment") + + +register_schema_models(files_ns, AgentDriveArchiveMemberQuery) + + +@files_ns.route("/agent-drive/archive-member") +class AgentDriveArchiveMemberApi(Resource): + @files_ns.doc("get_agent_drive_archive_member") + @files_ns.doc(description="Download a lazily resolved Agent Skill archive member by signed parameters") + def get(self): + args = AgentDriveArchiveMemberQuery.model_validate(request.args.to_dict(flat=True)) + if not AgentDriveService.verify_archive_member_signature( + tenant_id=args.tenant_id, + agent_id=args.agent_id, + key=args.key, + archive_file_kind=args.archive_file_kind, + archive_file_id=args.archive_file_id, + member_path=args.member_path, + timestamp=args.timestamp, + nonce=args.nonce, + sign=args.sign, + ): + raise Forbidden("Invalid request.") + try: + payload, mime_type, filename = AgentDriveService().load_archive_member_for_signed_request( + tenant_id=args.tenant_id, + agent_id=args.agent_id, + key=args.key, + archive_file_kind=args.archive_file_kind, + archive_file_id=args.archive_file_id, + member_path=args.member_path, + ) + except AgentDriveError as exc: + raise NotFound(exc.message) from exc + + response = Response(payload, mimetype=mime_type, direct_passthrough=True, headers={}) + response.headers["Content-Length"] = str(len(payload)) + if args.as_attachment and filename: + encoded_filename = quote(filename) + response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" + enforce_download_for_html(response, mime_type=mime_type, filename=filename, extension="") + return response diff --git a/api/services/agent/skill_standardize_service.py b/api/services/agent/skill_standardize_service.py index c6090411f37..579c9591682 100644 --- a/api/services/agent/skill_standardize_service.py +++ b/api/services/agent/skill_standardize_service.py @@ -7,19 +7,14 @@ to the agent drive (Agent Files §5.4 / §4): * ``/.DIFY-SKILL-FULL.zip`` — the full archive, kept only to restore the complete skill contents. -Both are stored as ``ToolFile`` records and bound via ``AgentDriveService.commit`` -with ``value_owned_by_drive=True`` (the drive owns their lifecycle). The returned -payload is the slim drive-derived skill DTO the UI needs to work with the drive -catalog — ``name``, ``description``, ``path``, ``skill_md_key``, and -``archive_key`` — plus the extracted manifest for upload feedback. The console -``/skills/upload`` endpoints delegate to this service so "upload" now always means -drive-backed skill normalization rather than Agent Soul binding. +The archive's member list is stored in skill metadata and resolved lazily for +inspect/preview/runtime. Upload must not eagerly materialize every archive member +as a separate ToolFile; small archives with many files would otherwise perform +hundreds of storage writes and DB commits inside the request. """ from __future__ import annotations -import mimetypes -import posixpath import re from typing import Any @@ -65,8 +60,8 @@ class SkillStandardizeService: skill_md_bytes = self._package.read_member_bytes(content=content, member_path=manifest.entry_path) slug = slugify_skill_name(manifest.name) - # Drive-owned files: canonical SKILL.md, every inspectable archive file, - # and the full archive for future restore/export. + # Drive-owned files: canonical SKILL.md and the full archive. The + # archive member tree is preserved in metadata and resolved lazily. md_tool_file = self._tool_files.create_file_by_raw( user_id=user_id, tenant_id=tenant_id, @@ -86,30 +81,6 @@ class SkillStandardizeService: skill_md_key = f"{slug}/{_SKILL_MD_NAME}" archive_key = f"{slug}/{_FULL_ARCHIVE_NAME}" - member_items: list[DriveCommitItem] = [] - for member_path in sorted(set(manifest.files)): - member_key = f"{slug}/{member_path}" - if member_key in {skill_md_key, archive_key}: - continue - - member_bytes = self._package.read_member_bytes(content=content, member_path=member_path) - mimetype = mimetypes.guess_type(member_path)[0] or "application/octet-stream" - member_tool_file = self._tool_files.create_file_by_raw( - user_id=user_id, - tenant_id=tenant_id, - conversation_id=None, - file_binary=member_bytes, - mimetype=mimetype, - filename=posixpath.basename(member_path), - ) - member_items.append( - DriveCommitItem( - key=member_key, - file_ref=DriveFileRef(kind="tool_file", id=member_tool_file.id), - value_owned_by_drive=True, - ) - ) - committed_items = self._drive.commit( tenant_id=tenant_id, user_id=user_id, @@ -131,7 +102,6 @@ class SkillStandardizeService: file_ref=DriveFileRef(kind="tool_file", id=archive_tool_file.id), value_owned_by_drive=True, ), - *member_items, ], ) self.last_committed_items = committed_items diff --git a/api/services/agent_drive_service.py b/api/services/agent_drive_service.py index 6cf7a68996b..0009fb8845c 100644 --- a/api/services/agent_drive_service.py +++ b/api/services/agent_drive_service.py @@ -19,10 +19,18 @@ ToolFile records (see ``AgentDriveFile``). This service is the control plane: from __future__ import annotations +import base64 +import hashlib +import hmac +import io import json import logging +import mimetypes +import os import re +import time import urllib.parse +import zipfile from typing import Any, Literal, TypedDict from urllib.parse import unquote @@ -31,6 +39,7 @@ from sqlalchemy import func, select from sqlalchemy.exc import DataError, SQLAlchemyError from sqlalchemy.orm import Session +from configs import dify_config from core.app.file_access.controller import DatabaseFileAccessController from core.db.session_factory import session_factory from extensions.ext_storage import storage @@ -46,6 +55,7 @@ _MAX_KEY_LENGTH = 512 _DRIVE_REF_PREFIX = "agent-" _SKILL_MD_SUFFIX = "/SKILL.md" _SKILL_ARCHIVE_NAME = ".DIFY-SKILL-FULL.zip" +_ARCHIVE_MEMBER_DOWNLOAD_PURPOSE = "agent-drive-archive-member" class AgentDriveError(Exception): @@ -365,6 +375,7 @@ class AgentDriveService: skill_md_key=skill_md_key, manifest_files=manifest_files, drive_keys=drive_keys, + archive_available=catalog["archive_key"] in drive_keys if catalog["archive_key"] else False, ) return { **catalog, @@ -598,6 +609,7 @@ class AgentDriveService: skill_md_key: str, manifest_files: list[str] | None, drive_keys: set[str], + archive_available: bool = False, ) -> tuple[list[AgentDriveSkillFileInfo], list[str]]: warnings: list[str] = [] if manifest_files: @@ -617,13 +629,14 @@ class AgentDriveService: if path == _SKILL_ARCHIVE_NAME: continue drive_key = f"{skill_path}/{path}" + available_in_drive = drive_key in drive_keys or (archive_available and path != _SKILL_ARCHIVE_NAME) files.append( { "path": path, "name": path.rsplit("/", 1)[-1], "type": "file", - "drive_key": drive_key if drive_key in drive_keys else None, - "available_in_drive": drive_key in drive_keys, + "drive_key": drive_key if available_in_drive else None, + "available_in_drive": available_in_drive, } ) if "SKILL.md" not in {file["path"] for file in files}: @@ -891,36 +904,138 @@ class AgentDriveService: raise AgentDriveError("drive_key_not_found", "drive value record is missing", status_code=404) return upload_file.key - def preview(self, *, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]: - """Truncated text preview of one drive value (binary-safe, never 500s on size).""" - with session_factory.create_session() as session: - self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id) - row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key) - storage_key = self._storage_key_for_row(session, tenant_id=tenant_id, row=row) - size = row.size + def _archive_member_for_key( + self, + session: Session, + *, + tenant_id: str, + agent_id: str, + key: str, + ) -> tuple[AgentDriveFile, str]: + normalized_key = normalize_drive_key(key) + if "/" not in normalized_key: + raise AgentDriveError("drive_key_not_found", "no drive entry for this key", status_code=404) + skill_path, member_path = normalized_key.split("/", 1) + if member_path in {_SKILL_ARCHIVE_NAME, ""}: + raise AgentDriveError("drive_key_not_found", "no archive member for this key", status_code=404) - data = bytearray() - for chunk in storage.load_stream(storage_key): - data.extend(chunk) - if len(data) > self.PREVIEW_MAX_BYTES: - break - truncated = len(data) > self.PREVIEW_MAX_BYTES - sample = bytes(data[: self.PREVIEW_MAX_BYTES]) - # Same semantics as the sandbox read endpoint: NUL or undecodable -> binary. + skill_md_key = f"{skill_path}{_SKILL_MD_SUFFIX}" + skill_row = session.scalar( + select(AgentDriveFile).where( + AgentDriveFile.tenant_id == tenant_id, + AgentDriveFile.agent_id == agent_id, + AgentDriveFile.key == skill_md_key, + AgentDriveFile.is_skill.is_(True), + ) + ) + if skill_row is None: + raise AgentDriveError("drive_key_not_found", "no drive entry for this key", status_code=404) + metadata = self._parse_skill_metadata(skill_row.key, skill_row.skill_metadata) + manifest_files = {normalize_drive_key(path) for path in (metadata.manifest_files or [])} + if member_path not in manifest_files: + raise AgentDriveError("drive_key_not_found", "archive member is not part of this skill", status_code=404) + archive_row = session.scalar( + select(AgentDriveFile).where( + AgentDriveFile.tenant_id == tenant_id, + AgentDriveFile.agent_id == agent_id, + AgentDriveFile.key == self._skill_archive_key(skill_md_key), + ) + ) + if archive_row is None: + raise AgentDriveError("drive_key_not_found", "skill archive is missing", status_code=404) + return archive_row, member_path + + def _load_archive_member_bytes( + self, + *, + tenant_id: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + ) -> bytes: + member_path = normalize_drive_key(member_path) + with session_factory.create_session() as session: + storage_key = self._storage_key_for_ref( + session, + tenant_id=tenant_id, + file_kind=archive_file_kind, + file_id=archive_file_id, + ) + archive_bytes = b"".join(storage.load_stream(storage_key)) + try: + with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive: + member = next( + ( + info + for info in archive.infolist() + if not info.is_dir() and normalize_drive_key(info.filename) == member_path + ), + None, + ) + if member is None: + raise AgentDriveError( + "drive_key_not_found", "archive member is missing from the skill archive", status_code=404 + ) + return archive.read(member) + except zipfile.BadZipFile as exc: + raise AgentDriveError("invalid_skill_archive", "skill archive is not a valid zip", status_code=500) from exc + + @classmethod + def _preview_bytes(cls, *, key: str, size: int | None, payload: bytes) -> dict[str, Any]: + truncated = len(payload) > cls.PREVIEW_MAX_BYTES + sample = payload[: cls.PREVIEW_MAX_BYTES] if b"\x00" in sample: - return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None} + return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None} try: text = sample.decode("utf-8") except UnicodeDecodeError: if truncated: - # A multi-byte char may sit on the cut point; retry without the tail. try: text = sample[:-3].decode("utf-8", errors="strict") except UnicodeDecodeError: - return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None} + return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None} else: - return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None} - return {"key": row.key, "size": size, "truncated": truncated, "binary": False, "text": text} + return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None} + return {"key": key, "size": size, "truncated": truncated, "binary": False, "text": text} + + def preview(self, *, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]: + """Truncated text preview of one drive value (binary-safe, never 500s on size).""" + with session_factory.create_session() as session: + self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id) + try: + row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key) + storage_key = self._storage_key_for_row(session, tenant_id=tenant_id, row=row) + size = row.size + response_key = row.key + archive_ref: tuple[AgentDriveFile, str] | None = None + except AgentDriveError: + archive_ref = self._archive_member_for_key( + session, + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + ) + storage_key = None + size = None + response_key = normalize_drive_key(key) + + if archive_ref is not None: + archive_row, member_path = archive_ref + payload = self._load_archive_member_bytes( + tenant_id=tenant_id, + archive_file_kind=archive_row.file_kind, + archive_file_id=archive_row.file_id, + member_path=member_path, + ) + return self._preview_bytes(key=response_key, size=len(payload), payload=payload) + + data = bytearray() + assert storage_key is not None + for chunk in storage.load_stream(storage_key): + data.extend(chunk) + if len(data) > self.PREVIEW_MAX_BYTES: + break + return self._preview_bytes(key=response_key, size=size, payload=bytes(data)) def preview_file_ref( self, @@ -947,27 +1062,51 @@ class AgentDriveService: data.extend(chunk) if len(data) > self.PREVIEW_MAX_BYTES: break - truncated = len(data) > self.PREVIEW_MAX_BYTES - sample = bytes(data[: self.PREVIEW_MAX_BYTES]) - if b"\x00" in sample: - return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None} - try: - text = sample.decode("utf-8") - except UnicodeDecodeError: - if truncated: - try: - text = sample[:-3].decode("utf-8", errors="strict") - except UnicodeDecodeError: - return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None} - else: - return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None} - return {"key": key, "size": size, "truncated": truncated, "binary": False, "text": text} + return self._preview_bytes(key=key, size=size, payload=bytes(data)) + + def preview_archive_member_for_ref( + self, + *, + tenant_id: str, + agent_id: str, + key: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + ) -> dict[str, Any]: + with session_factory.create_session() as session: + self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id) + payload = self._load_archive_member_bytes( + tenant_id=tenant_id, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + ) + return self._preview_bytes(key=normalize_drive_key(key), size=len(payload), payload=payload) def download_url(self, *, tenant_id: str, agent_id: str, key: str) -> str: """External signed URL for a browser download of one drive value.""" with session_factory.create_session() as session: self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id) - row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key) + try: + row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key) + except AgentDriveError: + archive_row, member_path = self._archive_member_for_key( + session, + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + ) + return self.sign_archive_member_url( + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + archive_file_kind=archive_row.file_kind, + archive_file_id=archive_row.file_id, + member_path=member_path, + for_external=True, + as_attachment=True, + ) url = self._resolve_download_url( tenant_id=tenant_id, file_kind=row.file_kind, @@ -979,6 +1118,30 @@ class AgentDriveService: raise AgentDriveError("drive_key_not_found", "drive value cannot be resolved", status_code=404) return url + def download_url_archive_member_for_ref( + self, + *, + tenant_id: str, + agent_id: str, + key: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + for_external: bool = True, + ) -> str: + with session_factory.create_session() as session: + self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id) + return self.sign_archive_member_url( + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + for_external=for_external, + as_attachment=True, + ) + def download_url_for_ref( self, *, @@ -1000,6 +1163,135 @@ class AgentDriveService: raise AgentDriveError("drive_key_not_found", "drive value cannot be resolved", status_code=404) return url + @staticmethod + def _secret_key() -> bytes: + return dify_config.SECRET_KEY.encode() + + @classmethod + def _archive_member_signature_payload( + cls, + *, + tenant_id: str, + agent_id: str, + key: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + timestamp: str, + nonce: str, + ) -> str: + return "|".join( + [ + _ARCHIVE_MEMBER_DOWNLOAD_PURPOSE, + tenant_id, + agent_id, + normalize_drive_key(key), + archive_file_kind.value, + archive_file_id, + normalize_drive_key(member_path), + timestamp, + nonce, + ] + ) + + @classmethod + def _sign_archive_member_payload(cls, payload: str) -> str: + digest = hmac.new(cls._secret_key(), payload.encode(), hashlib.sha256).digest() + return base64.urlsafe_b64encode(digest).decode() + + @classmethod + def sign_archive_member_url( + cls, + *, + tenant_id: str, + agent_id: str, + key: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + for_external: bool, + as_attachment: bool = False, + ) -> str: + base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL) + timestamp = str(int(time.time())) + nonce = os.urandom(16).hex() + payload = cls._archive_member_signature_payload( + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + timestamp=timestamp, + nonce=nonce, + ) + query = urllib.parse.urlencode( + { + "tenant_id": tenant_id, + "agent_id": agent_id, + "key": normalize_drive_key(key), + "archive_file_kind": archive_file_kind.value, + "archive_file_id": archive_file_id, + "member_path": normalize_drive_key(member_path), + "timestamp": timestamp, + "nonce": nonce, + "sign": cls._sign_archive_member_payload(payload), + "as_attachment": str(as_attachment).lower(), + } + ) + return f"{base_url}/files/agent-drive/archive-member?{query}" + + @classmethod + def verify_archive_member_signature( + cls, + *, + tenant_id: str, + agent_id: str, + key: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + timestamp: str, + nonce: str, + sign: str, + ) -> bool: + payload = cls._archive_member_signature_payload( + tenant_id=tenant_id, + agent_id=agent_id, + key=key, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + timestamp=timestamp, + nonce=nonce, + ) + if sign != cls._sign_archive_member_payload(payload): + return False + current_time = int(time.time()) + return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT + + def load_archive_member_for_signed_request( + self, + *, + tenant_id: str, + agent_id: str, + key: str, + archive_file_kind: AgentDriveFileKind, + archive_file_id: str, + member_path: str, + ) -> tuple[bytes, str, str]: + with session_factory.create_session() as session: + self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id) + payload = self._load_archive_member_bytes( + tenant_id=tenant_id, + archive_file_kind=archive_file_kind, + archive_file_id=archive_file_id, + member_path=member_path, + ) + mime_type = mimetypes.guess_type(member_path)[0] or "application/octet-stream" + filename = normalize_drive_key(key).rsplit("/", 1)[-1] + return payload, mime_type, filename + __all__ = [ "AgentDriveError", diff --git a/api/tests/unit_tests/services/agent/test_skill_standardize_service.py b/api/tests/unit_tests/services/agent/test_skill_standardize_service.py index c5647e8d7f7..6a8ec4391fd 100644 --- a/api/tests/unit_tests/services/agent/test_skill_standardize_service.py +++ b/api/tests/unit_tests/services/agent/test_skill_standardize_service.py @@ -32,14 +32,13 @@ def test_slugify_skill_name(): assert slugify_skill_name("") == "skill" -def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_members(): +def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest(): content = _zip({"SKILL.md": _SKILL_MD, "scripts/run.py": b"print('x')\n"}) tool_files = MagicMock() tool_files.create_file_by_raw.side_effect = [ SimpleNamespace(id="md-tool-file"), SimpleNamespace(id="zip-tool-file"), - SimpleNamespace(id="script-tool-file"), ] drive = MagicMock() drive.commit.return_value = [] @@ -67,34 +66,30 @@ def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_members() ) assert service.last_committed_items == [] - # ToolFiles: SKILL.md, full archive, and each inspectable package member. - assert tool_files.create_file_by_raw.call_count == 3 - md_call, zip_call, script_call = tool_files.create_file_by_raw.call_args_list + # ToolFiles: SKILL.md and the full archive. Archive members stay lazy. + assert tool_files.create_file_by_raw.call_count == 2 + md_call, zip_call = tool_files.create_file_by_raw.call_args_list assert md_call.kwargs["mimetype"] == "text/markdown" assert md_call.kwargs["file_binary"] == _SKILL_MD assert zip_call.kwargs["mimetype"] == "application/zip" assert zip_call.kwargs["file_binary"] == content - assert script_call.kwargs["mimetype"] in {"text/x-python", "text/plain", "application/octet-stream"} - assert script_call.kwargs["file_binary"] == b"print('x')\n" - assert script_call.kwargs["filename"] == "run.py" - # Committed as drive-owned with the standardized keys. + # Committed as drive-owned with the standardized keys. Member paths are + # carried in metadata for inspect/preview/runtime lazy resolution. commit_kwargs = drive.commit.call_args.kwargs assert commit_kwargs["agent_id"] == "agent-1" items = commit_kwargs["items"] assert [item.key for item in items] == [ "pdf-toolkit/SKILL.md", "pdf-toolkit/.DIFY-SKILL-FULL.zip", - "pdf-toolkit/scripts/run.py", ] assert all(item.value_owned_by_drive for item in items) - assert [item.file_ref.id for item in items] == ["md-tool-file", "zip-tool-file", "script-tool-file"] + assert [item.file_ref.id for item in items] == ["md-tool-file", "zip-tool-file"] assert items[0].is_skill is True assert items[0].skill_metadata is not None assert items[0].skill_metadata.name == "PDF Toolkit" assert items[0].skill_metadata.manifest_files == ["SKILL.md", "scripts/run.py"] assert items[1].is_skill is False - assert items[2].is_skill is False # The returned upload response carries only the drive-derived fields the UI needs. skill = result["skill"] diff --git a/api/tests/unit_tests/services/test_agent_drive_service.py b/api/tests/unit_tests/services/test_agent_drive_service.py index e4a22b64a48..ad72e142522 100644 --- a/api/tests/unit_tests/services/test_agent_drive_service.py +++ b/api/tests/unit_tests/services/test_agent_drive_service.py @@ -7,6 +7,8 @@ exercised against the project's in-memory SQLite engine with seeded ToolFiles. from __future__ import annotations import datetime +import io +import zipfile from collections.abc import Generator from unittest.mock import patch @@ -103,6 +105,14 @@ def _seed_tool_file(*, user_id: str = USER, name: str = "f.txt") -> str: return tool_file.id +def _zip_bytes(members: dict[str, bytes]) -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, "w") as archive: + for name, data in members.items(): + archive.writestr(name, data) + return buffer.getvalue() + + def _commit(key: str, tool_file_id: str, *, owned: bool = True): return AgentDriveService().commit( tenant_id=TENANT, @@ -769,7 +779,8 @@ def test_inspect_skill_returns_manifest_files_and_file_tree(): assert result["warnings"] == [] assert [file["path"] for file in result["files"]] == ["SKILL.md", "references/guide.md", "scripts/run.py"] assert result["files"][0]["available_in_drive"] is True - assert result["files"][1]["available_in_drive"] is False + assert result["files"][1]["available_in_drive"] is True + assert result["files"][1]["drive_key"] == "pdf-toolkit/references/guide.md" assert result["file_tree"][0]["name"] == "references" assert result["file_tree"][1]["name"] == "scripts" assert result["file_tree"][2]["name"] == "SKILL.md" @@ -787,6 +798,48 @@ def test_inspect_skill_falls_back_to_drive_keys_when_manifest_missing(): assert [file["path"] for file in result["files"]] == ["SKILL.md"] +def test_preview_skill_archive_member_from_manifest_without_drive_row(): + _commit_skill(manifest_files=["SKILL.md", "references/guide.md"]) + archive = _zip_bytes({"SKILL.md": b"# PDF Toolkit\n", "references/guide.md": b"Guide content\n"}) + + with patch("services.agent_drive_service.storage") as storage_mock: + storage_mock.load_stream.return_value = iter([archive]) + result = AgentDriveService().preview( + tenant_id=TENANT, + agent_id=AGENT, + key="pdf-toolkit/references/guide.md", + ) + + assert result == { + "key": "pdf-toolkit/references/guide.md", + "size": len(b"Guide content\n"), + "truncated": False, + "binary": False, + "text": "Guide content\n", + } + + +def test_download_url_signs_skill_archive_member_from_manifest_without_drive_row(): + _commit_skill(manifest_files=["SKILL.md", "references/guide.md"]) + + with patch.object( + AgentDriveService, + "sign_archive_member_url", + return_value="https://signed.example/member", + ) as sign: + url = AgentDriveService().download_url( + tenant_id=TENANT, + agent_id=AGENT, + key="pdf-toolkit/references/guide.md", + ) + + assert url == "https://signed.example/member" + kwargs = sign.call_args.kwargs + assert kwargs["key"] == "pdf-toolkit/references/guide.md" + assert kwargs["member_path"] == "references/guide.md" + assert kwargs["for_external"] is True + + def test_skill_metadata_rejects_non_canonical_rows(): tf = _seed_tool_file(name="not-skill.md") with pytest.raises(AgentDriveError) as exc_info: