mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 14:51:13 +08:00
fix(agent-v2): lazily resolve skill archive members (#37914)
This commit is contained in:
parent
4139d0c4fb
commit
70fbbbf2c5
@ -208,6 +208,30 @@ def _file_ref_for_active_soul(*, tenant_id: str, agent_id: str, key: str):
|
||||
return file_ref
|
||||
|
||||
|
||||
def _archive_member_ref_for_active_soul(
|
||||
*, tenant_id: str, agent_id: str, key: str
|
||||
) -> tuple[AgentDriveFileKind, str, str] | None:
|
||||
agent_soul = AgentSoulFilesService.active_agent_soul(session=db.session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
normalized_key = key.strip().lstrip("/")
|
||||
for skill in agent_soul.files.skills:
|
||||
skill_path = skill.path or (
|
||||
AgentSoulFilesService.skill_path_from_key(skill.skill_md_key) if skill.skill_md_key else None
|
||||
)
|
||||
if not skill_path or not normalized_key.startswith(f"{skill_path}/"):
|
||||
continue
|
||||
member_path = normalized_key.removeprefix(f"{skill_path}/")
|
||||
if member_path == ".DIFY-SKILL-FULL.zip":
|
||||
return None
|
||||
manifest_files = {str(path).strip().lstrip("/") for path in (skill.manifest_files or [])}
|
||||
if manifest_files and member_path not in manifest_files:
|
||||
return None
|
||||
archive_file_id = skill.full_archive_file_id
|
||||
if not archive_file_id:
|
||||
return None
|
||||
return AgentDriveFileKind.TOOL_FILE, archive_file_id, member_path
|
||||
return None
|
||||
|
||||
|
||||
def _file_kind_from_ref(file_ref) -> AgentDriveFileKind | None:
|
||||
raw = file_ref.transfer_method or ("upload_file" if file_ref.upload_file_id else None)
|
||||
if raw is None:
|
||||
@ -221,6 +245,17 @@ def _file_kind_from_ref(file_ref) -> AgentDriveFileKind | None:
|
||||
def _preview_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]:
|
||||
file_ref = _file_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
if file_ref is None:
|
||||
archive_member = _archive_member_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
if archive_member is not None:
|
||||
archive_file_kind, archive_file_id, member_path = archive_member
|
||||
return AgentDriveService().preview_archive_member_for_ref(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
)
|
||||
return AgentDriveService().preview(tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
file_kind = _file_kind_from_ref(file_ref)
|
||||
file_id = file_ref.file_id or file_ref.upload_file_id
|
||||
@ -239,6 +274,17 @@ def _preview_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> dict[
|
||||
def _download_versioned_file(*, tenant_id: str, agent_id: str, key: str) -> str:
|
||||
file_ref = _file_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
if file_ref is None:
|
||||
archive_member = _archive_member_ref_for_active_soul(tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
if archive_member is not None:
|
||||
archive_file_kind, archive_file_id, member_path = archive_member
|
||||
return AgentDriveService().download_url_archive_member_for_ref(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
)
|
||||
return AgentDriveService().download_url(tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
file_kind = _file_kind_from_ref(file_ref)
|
||||
file_id = file_ref.file_id or file_ref.upload_file_id
|
||||
|
||||
@ -14,11 +14,12 @@ api = ExternalApi(
|
||||
|
||||
files_ns = Namespace("files", description="File operations", path="/")
|
||||
|
||||
from . import image_preview, tool_files, upload
|
||||
from . import agent_drive_archive, image_preview, tool_files, upload
|
||||
|
||||
api.add_namespace(files_ns)
|
||||
|
||||
__all__ = [
|
||||
"agent_drive_archive",
|
||||
"api",
|
||||
"bp",
|
||||
"files_ns",
|
||||
|
||||
67
api/controllers/files/agent_drive_archive.py
Normal file
67
api/controllers/files/agent_drive_archive.py
Normal file
@ -0,0 +1,67 @@
|
||||
from urllib.parse import quote
|
||||
|
||||
from flask import Response, request
|
||||
from flask_restx import Resource
|
||||
from pydantic import BaseModel, Field
|
||||
from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
from controllers.common.file_response import enforce_download_for_html
|
||||
from controllers.common.schema import register_schema_models
|
||||
from controllers.files import files_ns
|
||||
from models.agent import AgentDriveFileKind
|
||||
from services.agent_drive_service import AgentDriveError, AgentDriveService
|
||||
|
||||
|
||||
class AgentDriveArchiveMemberQuery(BaseModel):
|
||||
tenant_id: str = Field(..., description="Tenant ID")
|
||||
agent_id: str = Field(..., description="Agent ID")
|
||||
key: str = Field(..., description="Virtual drive key")
|
||||
archive_file_kind: AgentDriveFileKind = Field(..., description="Archive file kind")
|
||||
archive_file_id: str = Field(..., description="Archive file id")
|
||||
member_path: str = Field(..., description="Zip member path")
|
||||
timestamp: str = Field(..., description="Unix timestamp")
|
||||
nonce: str = Field(..., description="Random nonce")
|
||||
sign: str = Field(..., description="HMAC signature")
|
||||
as_attachment: bool = Field(default=False, description="Download as attachment")
|
||||
|
||||
|
||||
register_schema_models(files_ns, AgentDriveArchiveMemberQuery)
|
||||
|
||||
|
||||
@files_ns.route("/agent-drive/archive-member")
|
||||
class AgentDriveArchiveMemberApi(Resource):
|
||||
@files_ns.doc("get_agent_drive_archive_member")
|
||||
@files_ns.doc(description="Download a lazily resolved Agent Skill archive member by signed parameters")
|
||||
def get(self):
|
||||
args = AgentDriveArchiveMemberQuery.model_validate(request.args.to_dict(flat=True))
|
||||
if not AgentDriveService.verify_archive_member_signature(
|
||||
tenant_id=args.tenant_id,
|
||||
agent_id=args.agent_id,
|
||||
key=args.key,
|
||||
archive_file_kind=args.archive_file_kind,
|
||||
archive_file_id=args.archive_file_id,
|
||||
member_path=args.member_path,
|
||||
timestamp=args.timestamp,
|
||||
nonce=args.nonce,
|
||||
sign=args.sign,
|
||||
):
|
||||
raise Forbidden("Invalid request.")
|
||||
try:
|
||||
payload, mime_type, filename = AgentDriveService().load_archive_member_for_signed_request(
|
||||
tenant_id=args.tenant_id,
|
||||
agent_id=args.agent_id,
|
||||
key=args.key,
|
||||
archive_file_kind=args.archive_file_kind,
|
||||
archive_file_id=args.archive_file_id,
|
||||
member_path=args.member_path,
|
||||
)
|
||||
except AgentDriveError as exc:
|
||||
raise NotFound(exc.message) from exc
|
||||
|
||||
response = Response(payload, mimetype=mime_type, direct_passthrough=True, headers={})
|
||||
response.headers["Content-Length"] = str(len(payload))
|
||||
if args.as_attachment and filename:
|
||||
encoded_filename = quote(filename)
|
||||
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
|
||||
enforce_download_for_html(response, mime_type=mime_type, filename=filename, extension="")
|
||||
return response
|
||||
@ -7,19 +7,14 @@ to the agent drive (Agent Files §5.4 / §4):
|
||||
* ``<slug>/.DIFY-SKILL-FULL.zip`` — the full archive, kept only to restore the
|
||||
complete skill contents.
|
||||
|
||||
Both are stored as ``ToolFile`` records and bound via ``AgentDriveService.commit``
|
||||
with ``value_owned_by_drive=True`` (the drive owns their lifecycle). The returned
|
||||
payload is the slim drive-derived skill DTO the UI needs to work with the drive
|
||||
catalog — ``name``, ``description``, ``path``, ``skill_md_key``, and
|
||||
``archive_key`` — plus the extracted manifest for upload feedback. The console
|
||||
``/skills/upload`` endpoints delegate to this service so "upload" now always means
|
||||
drive-backed skill normalization rather than Agent Soul binding.
|
||||
The archive's member list is stored in skill metadata and resolved lazily for
|
||||
inspect/preview/runtime. Upload must not eagerly materialize every archive member
|
||||
as a separate ToolFile; small archives with many files would otherwise perform
|
||||
hundreds of storage writes and DB commits inside the request.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import mimetypes
|
||||
import posixpath
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
@ -65,8 +60,8 @@ class SkillStandardizeService:
|
||||
skill_md_bytes = self._package.read_member_bytes(content=content, member_path=manifest.entry_path)
|
||||
slug = slugify_skill_name(manifest.name)
|
||||
|
||||
# Drive-owned files: canonical SKILL.md, every inspectable archive file,
|
||||
# and the full archive for future restore/export.
|
||||
# Drive-owned files: canonical SKILL.md and the full archive. The
|
||||
# archive member tree is preserved in metadata and resolved lazily.
|
||||
md_tool_file = self._tool_files.create_file_by_raw(
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
@ -86,30 +81,6 @@ class SkillStandardizeService:
|
||||
|
||||
skill_md_key = f"{slug}/{_SKILL_MD_NAME}"
|
||||
archive_key = f"{slug}/{_FULL_ARCHIVE_NAME}"
|
||||
member_items: list[DriveCommitItem] = []
|
||||
for member_path in sorted(set(manifest.files)):
|
||||
member_key = f"{slug}/{member_path}"
|
||||
if member_key in {skill_md_key, archive_key}:
|
||||
continue
|
||||
|
||||
member_bytes = self._package.read_member_bytes(content=content, member_path=member_path)
|
||||
mimetype = mimetypes.guess_type(member_path)[0] or "application/octet-stream"
|
||||
member_tool_file = self._tool_files.create_file_by_raw(
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
conversation_id=None,
|
||||
file_binary=member_bytes,
|
||||
mimetype=mimetype,
|
||||
filename=posixpath.basename(member_path),
|
||||
)
|
||||
member_items.append(
|
||||
DriveCommitItem(
|
||||
key=member_key,
|
||||
file_ref=DriveFileRef(kind="tool_file", id=member_tool_file.id),
|
||||
value_owned_by_drive=True,
|
||||
)
|
||||
)
|
||||
|
||||
committed_items = self._drive.commit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
@ -131,7 +102,6 @@ class SkillStandardizeService:
|
||||
file_ref=DriveFileRef(kind="tool_file", id=archive_tool_file.id),
|
||||
value_owned_by_drive=True,
|
||||
),
|
||||
*member_items,
|
||||
],
|
||||
)
|
||||
self.last_committed_items = committed_items
|
||||
|
||||
@ -19,10 +19,18 @@ ToolFile records (see ``AgentDriveFile``). This service is the control plane:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import zipfile
|
||||
from typing import Any, Literal, TypedDict
|
||||
from urllib.parse import unquote
|
||||
|
||||
@ -31,6 +39,7 @@ from sqlalchemy import func, select
|
||||
from sqlalchemy.exc import DataError, SQLAlchemyError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from configs import dify_config
|
||||
from core.app.file_access.controller import DatabaseFileAccessController
|
||||
from core.db.session_factory import session_factory
|
||||
from extensions.ext_storage import storage
|
||||
@ -46,6 +55,7 @@ _MAX_KEY_LENGTH = 512
|
||||
_DRIVE_REF_PREFIX = "agent-"
|
||||
_SKILL_MD_SUFFIX = "/SKILL.md"
|
||||
_SKILL_ARCHIVE_NAME = ".DIFY-SKILL-FULL.zip"
|
||||
_ARCHIVE_MEMBER_DOWNLOAD_PURPOSE = "agent-drive-archive-member"
|
||||
|
||||
|
||||
class AgentDriveError(Exception):
|
||||
@ -365,6 +375,7 @@ class AgentDriveService:
|
||||
skill_md_key=skill_md_key,
|
||||
manifest_files=manifest_files,
|
||||
drive_keys=drive_keys,
|
||||
archive_available=catalog["archive_key"] in drive_keys if catalog["archive_key"] else False,
|
||||
)
|
||||
return {
|
||||
**catalog,
|
||||
@ -598,6 +609,7 @@ class AgentDriveService:
|
||||
skill_md_key: str,
|
||||
manifest_files: list[str] | None,
|
||||
drive_keys: set[str],
|
||||
archive_available: bool = False,
|
||||
) -> tuple[list[AgentDriveSkillFileInfo], list[str]]:
|
||||
warnings: list[str] = []
|
||||
if manifest_files:
|
||||
@ -617,13 +629,14 @@ class AgentDriveService:
|
||||
if path == _SKILL_ARCHIVE_NAME:
|
||||
continue
|
||||
drive_key = f"{skill_path}/{path}"
|
||||
available_in_drive = drive_key in drive_keys or (archive_available and path != _SKILL_ARCHIVE_NAME)
|
||||
files.append(
|
||||
{
|
||||
"path": path,
|
||||
"name": path.rsplit("/", 1)[-1],
|
||||
"type": "file",
|
||||
"drive_key": drive_key if drive_key in drive_keys else None,
|
||||
"available_in_drive": drive_key in drive_keys,
|
||||
"drive_key": drive_key if available_in_drive else None,
|
||||
"available_in_drive": available_in_drive,
|
||||
}
|
||||
)
|
||||
if "SKILL.md" not in {file["path"] for file in files}:
|
||||
@ -891,36 +904,138 @@ class AgentDriveService:
|
||||
raise AgentDriveError("drive_key_not_found", "drive value record is missing", status_code=404)
|
||||
return upload_file.key
|
||||
|
||||
def preview(self, *, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]:
|
||||
"""Truncated text preview of one drive value (binary-safe, never 500s on size)."""
|
||||
with session_factory.create_session() as session:
|
||||
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
storage_key = self._storage_key_for_row(session, tenant_id=tenant_id, row=row)
|
||||
size = row.size
|
||||
def _archive_member_for_key(
|
||||
self,
|
||||
session: Session,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
) -> tuple[AgentDriveFile, str]:
|
||||
normalized_key = normalize_drive_key(key)
|
||||
if "/" not in normalized_key:
|
||||
raise AgentDriveError("drive_key_not_found", "no drive entry for this key", status_code=404)
|
||||
skill_path, member_path = normalized_key.split("/", 1)
|
||||
if member_path in {_SKILL_ARCHIVE_NAME, ""}:
|
||||
raise AgentDriveError("drive_key_not_found", "no archive member for this key", status_code=404)
|
||||
|
||||
data = bytearray()
|
||||
for chunk in storage.load_stream(storage_key):
|
||||
data.extend(chunk)
|
||||
if len(data) > self.PREVIEW_MAX_BYTES:
|
||||
break
|
||||
truncated = len(data) > self.PREVIEW_MAX_BYTES
|
||||
sample = bytes(data[: self.PREVIEW_MAX_BYTES])
|
||||
# Same semantics as the sandbox read endpoint: NUL or undecodable -> binary.
|
||||
skill_md_key = f"{skill_path}{_SKILL_MD_SUFFIX}"
|
||||
skill_row = session.scalar(
|
||||
select(AgentDriveFile).where(
|
||||
AgentDriveFile.tenant_id == tenant_id,
|
||||
AgentDriveFile.agent_id == agent_id,
|
||||
AgentDriveFile.key == skill_md_key,
|
||||
AgentDriveFile.is_skill.is_(True),
|
||||
)
|
||||
)
|
||||
if skill_row is None:
|
||||
raise AgentDriveError("drive_key_not_found", "no drive entry for this key", status_code=404)
|
||||
metadata = self._parse_skill_metadata(skill_row.key, skill_row.skill_metadata)
|
||||
manifest_files = {normalize_drive_key(path) for path in (metadata.manifest_files or [])}
|
||||
if member_path not in manifest_files:
|
||||
raise AgentDriveError("drive_key_not_found", "archive member is not part of this skill", status_code=404)
|
||||
archive_row = session.scalar(
|
||||
select(AgentDriveFile).where(
|
||||
AgentDriveFile.tenant_id == tenant_id,
|
||||
AgentDriveFile.agent_id == agent_id,
|
||||
AgentDriveFile.key == self._skill_archive_key(skill_md_key),
|
||||
)
|
||||
)
|
||||
if archive_row is None:
|
||||
raise AgentDriveError("drive_key_not_found", "skill archive is missing", status_code=404)
|
||||
return archive_row, member_path
|
||||
|
||||
def _load_archive_member_bytes(
|
||||
self,
|
||||
*,
|
||||
tenant_id: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
) -> bytes:
|
||||
member_path = normalize_drive_key(member_path)
|
||||
with session_factory.create_session() as session:
|
||||
storage_key = self._storage_key_for_ref(
|
||||
session,
|
||||
tenant_id=tenant_id,
|
||||
file_kind=archive_file_kind,
|
||||
file_id=archive_file_id,
|
||||
)
|
||||
archive_bytes = b"".join(storage.load_stream(storage_key))
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive:
|
||||
member = next(
|
||||
(
|
||||
info
|
||||
for info in archive.infolist()
|
||||
if not info.is_dir() and normalize_drive_key(info.filename) == member_path
|
||||
),
|
||||
None,
|
||||
)
|
||||
if member is None:
|
||||
raise AgentDriveError(
|
||||
"drive_key_not_found", "archive member is missing from the skill archive", status_code=404
|
||||
)
|
||||
return archive.read(member)
|
||||
except zipfile.BadZipFile as exc:
|
||||
raise AgentDriveError("invalid_skill_archive", "skill archive is not a valid zip", status_code=500) from exc
|
||||
|
||||
@classmethod
|
||||
def _preview_bytes(cls, *, key: str, size: int | None, payload: bytes) -> dict[str, Any]:
|
||||
truncated = len(payload) > cls.PREVIEW_MAX_BYTES
|
||||
sample = payload[: cls.PREVIEW_MAX_BYTES]
|
||||
if b"\x00" in sample:
|
||||
return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
try:
|
||||
text = sample.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
if truncated:
|
||||
# A multi-byte char may sit on the cut point; retry without the tail.
|
||||
try:
|
||||
text = sample[:-3].decode("utf-8", errors="strict")
|
||||
except UnicodeDecodeError:
|
||||
return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
else:
|
||||
return {"key": row.key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
return {"key": row.key, "size": size, "truncated": truncated, "binary": False, "text": text}
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": False, "text": text}
|
||||
|
||||
def preview(self, *, tenant_id: str, agent_id: str, key: str) -> dict[str, Any]:
|
||||
"""Truncated text preview of one drive value (binary-safe, never 500s on size)."""
|
||||
with session_factory.create_session() as session:
|
||||
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
try:
|
||||
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
storage_key = self._storage_key_for_row(session, tenant_id=tenant_id, row=row)
|
||||
size = row.size
|
||||
response_key = row.key
|
||||
archive_ref: tuple[AgentDriveFile, str] | None = None
|
||||
except AgentDriveError:
|
||||
archive_ref = self._archive_member_for_key(
|
||||
session,
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
)
|
||||
storage_key = None
|
||||
size = None
|
||||
response_key = normalize_drive_key(key)
|
||||
|
||||
if archive_ref is not None:
|
||||
archive_row, member_path = archive_ref
|
||||
payload = self._load_archive_member_bytes(
|
||||
tenant_id=tenant_id,
|
||||
archive_file_kind=archive_row.file_kind,
|
||||
archive_file_id=archive_row.file_id,
|
||||
member_path=member_path,
|
||||
)
|
||||
return self._preview_bytes(key=response_key, size=len(payload), payload=payload)
|
||||
|
||||
data = bytearray()
|
||||
assert storage_key is not None
|
||||
for chunk in storage.load_stream(storage_key):
|
||||
data.extend(chunk)
|
||||
if len(data) > self.PREVIEW_MAX_BYTES:
|
||||
break
|
||||
return self._preview_bytes(key=response_key, size=size, payload=bytes(data))
|
||||
|
||||
def preview_file_ref(
|
||||
self,
|
||||
@ -947,27 +1062,51 @@ class AgentDriveService:
|
||||
data.extend(chunk)
|
||||
if len(data) > self.PREVIEW_MAX_BYTES:
|
||||
break
|
||||
truncated = len(data) > self.PREVIEW_MAX_BYTES
|
||||
sample = bytes(data[: self.PREVIEW_MAX_BYTES])
|
||||
if b"\x00" in sample:
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
try:
|
||||
text = sample.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
if truncated:
|
||||
try:
|
||||
text = sample[:-3].decode("utf-8", errors="strict")
|
||||
except UnicodeDecodeError:
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
else:
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": True, "text": None}
|
||||
return {"key": key, "size": size, "truncated": truncated, "binary": False, "text": text}
|
||||
return self._preview_bytes(key=key, size=size, payload=bytes(data))
|
||||
|
||||
def preview_archive_member_for_ref(
|
||||
self,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
) -> dict[str, Any]:
|
||||
with session_factory.create_session() as session:
|
||||
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
payload = self._load_archive_member_bytes(
|
||||
tenant_id=tenant_id,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
)
|
||||
return self._preview_bytes(key=normalize_drive_key(key), size=len(payload), payload=payload)
|
||||
|
||||
def download_url(self, *, tenant_id: str, agent_id: str, key: str) -> str:
|
||||
"""External signed URL for a browser download of one drive value."""
|
||||
with session_factory.create_session() as session:
|
||||
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
try:
|
||||
row = self._require_row(session, tenant_id=tenant_id, agent_id=agent_id, key=key)
|
||||
except AgentDriveError:
|
||||
archive_row, member_path = self._archive_member_for_key(
|
||||
session,
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
)
|
||||
return self.sign_archive_member_url(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
archive_file_kind=archive_row.file_kind,
|
||||
archive_file_id=archive_row.file_id,
|
||||
member_path=member_path,
|
||||
for_external=True,
|
||||
as_attachment=True,
|
||||
)
|
||||
url = self._resolve_download_url(
|
||||
tenant_id=tenant_id,
|
||||
file_kind=row.file_kind,
|
||||
@ -979,6 +1118,30 @@ class AgentDriveService:
|
||||
raise AgentDriveError("drive_key_not_found", "drive value cannot be resolved", status_code=404)
|
||||
return url
|
||||
|
||||
def download_url_archive_member_for_ref(
|
||||
self,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
for_external: bool = True,
|
||||
) -> str:
|
||||
with session_factory.create_session() as session:
|
||||
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
return self.sign_archive_member_url(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
for_external=for_external,
|
||||
as_attachment=True,
|
||||
)
|
||||
|
||||
def download_url_for_ref(
|
||||
self,
|
||||
*,
|
||||
@ -1000,6 +1163,135 @@ class AgentDriveService:
|
||||
raise AgentDriveError("drive_key_not_found", "drive value cannot be resolved", status_code=404)
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _secret_key() -> bytes:
|
||||
return dify_config.SECRET_KEY.encode()
|
||||
|
||||
@classmethod
|
||||
def _archive_member_signature_payload(
|
||||
cls,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
timestamp: str,
|
||||
nonce: str,
|
||||
) -> str:
|
||||
return "|".join(
|
||||
[
|
||||
_ARCHIVE_MEMBER_DOWNLOAD_PURPOSE,
|
||||
tenant_id,
|
||||
agent_id,
|
||||
normalize_drive_key(key),
|
||||
archive_file_kind.value,
|
||||
archive_file_id,
|
||||
normalize_drive_key(member_path),
|
||||
timestamp,
|
||||
nonce,
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _sign_archive_member_payload(cls, payload: str) -> str:
|
||||
digest = hmac.new(cls._secret_key(), payload.encode(), hashlib.sha256).digest()
|
||||
return base64.urlsafe_b64encode(digest).decode()
|
||||
|
||||
@classmethod
|
||||
def sign_archive_member_url(
|
||||
cls,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
for_external: bool,
|
||||
as_attachment: bool = False,
|
||||
) -> str:
|
||||
base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL)
|
||||
timestamp = str(int(time.time()))
|
||||
nonce = os.urandom(16).hex()
|
||||
payload = cls._archive_member_signature_payload(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
timestamp=timestamp,
|
||||
nonce=nonce,
|
||||
)
|
||||
query = urllib.parse.urlencode(
|
||||
{
|
||||
"tenant_id": tenant_id,
|
||||
"agent_id": agent_id,
|
||||
"key": normalize_drive_key(key),
|
||||
"archive_file_kind": archive_file_kind.value,
|
||||
"archive_file_id": archive_file_id,
|
||||
"member_path": normalize_drive_key(member_path),
|
||||
"timestamp": timestamp,
|
||||
"nonce": nonce,
|
||||
"sign": cls._sign_archive_member_payload(payload),
|
||||
"as_attachment": str(as_attachment).lower(),
|
||||
}
|
||||
)
|
||||
return f"{base_url}/files/agent-drive/archive-member?{query}"
|
||||
|
||||
@classmethod
|
||||
def verify_archive_member_signature(
|
||||
cls,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
timestamp: str,
|
||||
nonce: str,
|
||||
sign: str,
|
||||
) -> bool:
|
||||
payload = cls._archive_member_signature_payload(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
key=key,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
timestamp=timestamp,
|
||||
nonce=nonce,
|
||||
)
|
||||
if sign != cls._sign_archive_member_payload(payload):
|
||||
return False
|
||||
current_time = int(time.time())
|
||||
return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
|
||||
|
||||
def load_archive_member_for_signed_request(
|
||||
self,
|
||||
*,
|
||||
tenant_id: str,
|
||||
agent_id: str,
|
||||
key: str,
|
||||
archive_file_kind: AgentDriveFileKind,
|
||||
archive_file_id: str,
|
||||
member_path: str,
|
||||
) -> tuple[bytes, str, str]:
|
||||
with session_factory.create_session() as session:
|
||||
self._assert_agent_belongs_to_tenant(session, tenant_id=tenant_id, agent_id=agent_id)
|
||||
payload = self._load_archive_member_bytes(
|
||||
tenant_id=tenant_id,
|
||||
archive_file_kind=archive_file_kind,
|
||||
archive_file_id=archive_file_id,
|
||||
member_path=member_path,
|
||||
)
|
||||
mime_type = mimetypes.guess_type(member_path)[0] or "application/octet-stream"
|
||||
filename = normalize_drive_key(key).rsplit("/", 1)[-1]
|
||||
return payload, mime_type, filename
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AgentDriveError",
|
||||
|
||||
@ -32,14 +32,13 @@ def test_slugify_skill_name():
|
||||
assert slugify_skill_name("") == "skill"
|
||||
|
||||
|
||||
def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_members():
|
||||
def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_manifest():
|
||||
content = _zip({"SKILL.md": _SKILL_MD, "scripts/run.py": b"print('x')\n"})
|
||||
|
||||
tool_files = MagicMock()
|
||||
tool_files.create_file_by_raw.side_effect = [
|
||||
SimpleNamespace(id="md-tool-file"),
|
||||
SimpleNamespace(id="zip-tool-file"),
|
||||
SimpleNamespace(id="script-tool-file"),
|
||||
]
|
||||
drive = MagicMock()
|
||||
drive.commit.return_value = []
|
||||
@ -67,34 +66,30 @@ def test_standardize_creates_drive_owned_toolfiles_and_commits_archive_members()
|
||||
)
|
||||
assert service.last_committed_items == []
|
||||
|
||||
# ToolFiles: SKILL.md, full archive, and each inspectable package member.
|
||||
assert tool_files.create_file_by_raw.call_count == 3
|
||||
md_call, zip_call, script_call = tool_files.create_file_by_raw.call_args_list
|
||||
# ToolFiles: SKILL.md and the full archive. Archive members stay lazy.
|
||||
assert tool_files.create_file_by_raw.call_count == 2
|
||||
md_call, zip_call = tool_files.create_file_by_raw.call_args_list
|
||||
assert md_call.kwargs["mimetype"] == "text/markdown"
|
||||
assert md_call.kwargs["file_binary"] == _SKILL_MD
|
||||
assert zip_call.kwargs["mimetype"] == "application/zip"
|
||||
assert zip_call.kwargs["file_binary"] == content
|
||||
assert script_call.kwargs["mimetype"] in {"text/x-python", "text/plain", "application/octet-stream"}
|
||||
assert script_call.kwargs["file_binary"] == b"print('x')\n"
|
||||
assert script_call.kwargs["filename"] == "run.py"
|
||||
|
||||
# Committed as drive-owned with the standardized keys.
|
||||
# Committed as drive-owned with the standardized keys. Member paths are
|
||||
# carried in metadata for inspect/preview/runtime lazy resolution.
|
||||
commit_kwargs = drive.commit.call_args.kwargs
|
||||
assert commit_kwargs["agent_id"] == "agent-1"
|
||||
items = commit_kwargs["items"]
|
||||
assert [item.key for item in items] == [
|
||||
"pdf-toolkit/SKILL.md",
|
||||
"pdf-toolkit/.DIFY-SKILL-FULL.zip",
|
||||
"pdf-toolkit/scripts/run.py",
|
||||
]
|
||||
assert all(item.value_owned_by_drive for item in items)
|
||||
assert [item.file_ref.id for item in items] == ["md-tool-file", "zip-tool-file", "script-tool-file"]
|
||||
assert [item.file_ref.id for item in items] == ["md-tool-file", "zip-tool-file"]
|
||||
assert items[0].is_skill is True
|
||||
assert items[0].skill_metadata is not None
|
||||
assert items[0].skill_metadata.name == "PDF Toolkit"
|
||||
assert items[0].skill_metadata.manifest_files == ["SKILL.md", "scripts/run.py"]
|
||||
assert items[1].is_skill is False
|
||||
assert items[2].is_skill is False
|
||||
|
||||
# The returned upload response carries only the drive-derived fields the UI needs.
|
||||
skill = result["skill"]
|
||||
|
||||
@ -7,6 +7,8 @@ exercised against the project's in-memory SQLite engine with seeded ToolFiles.
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import io
|
||||
import zipfile
|
||||
from collections.abc import Generator
|
||||
from unittest.mock import patch
|
||||
|
||||
@ -103,6 +105,14 @@ def _seed_tool_file(*, user_id: str = USER, name: str = "f.txt") -> str:
|
||||
return tool_file.id
|
||||
|
||||
|
||||
def _zip_bytes(members: dict[str, bytes]) -> bytes:
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
for name, data in members.items():
|
||||
archive.writestr(name, data)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def _commit(key: str, tool_file_id: str, *, owned: bool = True):
|
||||
return AgentDriveService().commit(
|
||||
tenant_id=TENANT,
|
||||
@ -769,7 +779,8 @@ def test_inspect_skill_returns_manifest_files_and_file_tree():
|
||||
assert result["warnings"] == []
|
||||
assert [file["path"] for file in result["files"]] == ["SKILL.md", "references/guide.md", "scripts/run.py"]
|
||||
assert result["files"][0]["available_in_drive"] is True
|
||||
assert result["files"][1]["available_in_drive"] is False
|
||||
assert result["files"][1]["available_in_drive"] is True
|
||||
assert result["files"][1]["drive_key"] == "pdf-toolkit/references/guide.md"
|
||||
assert result["file_tree"][0]["name"] == "references"
|
||||
assert result["file_tree"][1]["name"] == "scripts"
|
||||
assert result["file_tree"][2]["name"] == "SKILL.md"
|
||||
@ -787,6 +798,48 @@ def test_inspect_skill_falls_back_to_drive_keys_when_manifest_missing():
|
||||
assert [file["path"] for file in result["files"]] == ["SKILL.md"]
|
||||
|
||||
|
||||
def test_preview_skill_archive_member_from_manifest_without_drive_row():
|
||||
_commit_skill(manifest_files=["SKILL.md", "references/guide.md"])
|
||||
archive = _zip_bytes({"SKILL.md": b"# PDF Toolkit\n", "references/guide.md": b"Guide content\n"})
|
||||
|
||||
with patch("services.agent_drive_service.storage") as storage_mock:
|
||||
storage_mock.load_stream.return_value = iter([archive])
|
||||
result = AgentDriveService().preview(
|
||||
tenant_id=TENANT,
|
||||
agent_id=AGENT,
|
||||
key="pdf-toolkit/references/guide.md",
|
||||
)
|
||||
|
||||
assert result == {
|
||||
"key": "pdf-toolkit/references/guide.md",
|
||||
"size": len(b"Guide content\n"),
|
||||
"truncated": False,
|
||||
"binary": False,
|
||||
"text": "Guide content\n",
|
||||
}
|
||||
|
||||
|
||||
def test_download_url_signs_skill_archive_member_from_manifest_without_drive_row():
|
||||
_commit_skill(manifest_files=["SKILL.md", "references/guide.md"])
|
||||
|
||||
with patch.object(
|
||||
AgentDriveService,
|
||||
"sign_archive_member_url",
|
||||
return_value="https://signed.example/member",
|
||||
) as sign:
|
||||
url = AgentDriveService().download_url(
|
||||
tenant_id=TENANT,
|
||||
agent_id=AGENT,
|
||||
key="pdf-toolkit/references/guide.md",
|
||||
)
|
||||
|
||||
assert url == "https://signed.example/member"
|
||||
kwargs = sign.call_args.kwargs
|
||||
assert kwargs["key"] == "pdf-toolkit/references/guide.md"
|
||||
assert kwargs["member_path"] == "references/guide.md"
|
||||
assert kwargs["for_external"] is True
|
||||
|
||||
|
||||
def test_skill_metadata_rejects_non_canonical_rows():
|
||||
tf = _seed_tool_file(name="not-skill.md")
|
||||
with pytest.raises(AgentDriveError) as exc_info:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user