diff --git a/api/agent-notes/configs/feature/__init__.py.md b/api/agent-notes/configs/feature/__init__.py.md new file mode 100644 index 0000000000..2037edbb35 --- /dev/null +++ b/api/agent-notes/configs/feature/__init__.py.md @@ -0,0 +1,9 @@ +Summary: +Summary: +- Application configuration definitions, including file access settings. + +Invariants: +- File access settings drive signed URL expiration and base URLs. + +Tests: +- Config parsing tests under tests/unit_tests/configs. diff --git a/api/agent-notes/controllers/files/__init__.py.md b/api/agent-notes/controllers/files/__init__.py.md new file mode 100644 index 0000000000..5e4fe3a108 --- /dev/null +++ b/api/agent-notes/controllers/files/__init__.py.md @@ -0,0 +1,9 @@ +Summary: +- Registers file-related API namespaces and routes for files service. +- Includes app-assets download proxy controller. + +Invariants: +- files_ns must include all file controller modules to register routes. + +Tests: +- Coverage via controller unit tests and route registration smoke checks. diff --git a/api/agent-notes/controllers/files/app_assets_download.py.md b/api/agent-notes/controllers/files/app_assets_download.py.md new file mode 100644 index 0000000000..4111dd5ccb --- /dev/null +++ b/api/agent-notes/controllers/files/app_assets_download.py.md @@ -0,0 +1,14 @@ +Summary: +- App assets download proxy endpoint (signed URL verification, stream from storage). + +Invariants: +- Validates AssetPath fields (UUIDs, asset_type allowlist). +- Verifies tenant-scoped signature and expiration before reading storage. +- URL uses expires_at/nonce/sign query params. + +Edge Cases: +- Missing files return NotFound. +- Invalid signature or expired link returns Forbidden. + +Tests: +- Verify signature validation and invalid/expired cases. diff --git a/api/agent-notes/core/app_assets/builder/file_builder.py.md b/api/agent-notes/core/app_assets/builder/file_builder.py.md new file mode 100644 index 0000000000..4aa3e83ffe --- /dev/null +++ b/api/agent-notes/core/app_assets/builder/file_builder.py.md @@ -0,0 +1,9 @@ +Summary: +Summary: +- Collects file assets and emits FileAsset entries with storage keys. + +Invariants: +- Storage keys are derived via AppAssetStorage for draft files. + +Tests: +- Covered by asset build pipeline tests. diff --git a/api/agent-notes/core/app_assets/builder/skill_builder.py.md b/api/agent-notes/core/app_assets/builder/skill_builder.py.md new file mode 100644 index 0000000000..226182b726 --- /dev/null +++ b/api/agent-notes/core/app_assets/builder/skill_builder.py.md @@ -0,0 +1,14 @@ +Summary: +Summary: +- Builds skill artifacts from markdown assets and uploads resolved outputs. + +Invariants: +- Reads draft asset content via AppAssetStorage refs. +- Writes resolved artifacts via AppAssetStorage refs. +- FileAsset storage keys are derived via AppAssetStorage. + +Edge Cases: +- Missing or invalid JSON content yields empty skill content/metadata. + +Tests: +- Build pipeline unit tests covering compile/upload paths. diff --git a/api/agent-notes/core/app_assets/converters.py.md b/api/agent-notes/core/app_assets/converters.py.md new file mode 100644 index 0000000000..476b09b870 --- /dev/null +++ b/api/agent-notes/core/app_assets/converters.py.md @@ -0,0 +1,9 @@ +Summary: +Summary: +- Converts AppAssetFileTree to FileAsset items for packaging. + +Invariants: +- Storage keys for assets are derived via AppAssetStorage. + +Tests: +- Used in packaging/service tests for asset bundles. diff --git a/api/agent-notes/core/app_assets/parser/asset_parser.py.md b/api/agent-notes/core/app_assets/parser/asset_parser.py.md new file mode 100644 index 0000000000..85b9995b08 --- /dev/null +++ b/api/agent-notes/core/app_assets/parser/asset_parser.py.md @@ -0,0 +1,9 @@ +Summary: +Summary: +- Builds AssetItem entries for asset trees using AssetPath-derived storage keys. + +Invariants: +- Uses AssetPath to compute draft storage keys. + +Tests: +- Covered by asset parsing and packaging tests. diff --git a/api/agent-notes/core/app_assets/storage.py.md b/api/agent-notes/core/app_assets/storage.py.md new file mode 100644 index 0000000000..0c073becd1 --- /dev/null +++ b/api/agent-notes/core/app_assets/storage.py.md @@ -0,0 +1,19 @@ +Summary: +- Defines AssetPath facade + typed asset path classes for app-asset storage access. +- Maps asset paths to storage keys and generates presigned or signed-proxy URLs. +- Signs proxy URLs using tenant private keys and enforces expiration. +- Exposes app_asset_storage singleton for reuse. + +Invariants: +- AssetPathBase fields (tenant_id/app_id/resource_id/node_id) must be UUIDs. +- AssetPath.from_components enforces valid types and resolved node_id presence. +- Storage keys are derived internally via AssetPathBase.get_storage_key; callers never supply raw paths. +- AppAssetStorage.storage returns the cached presign wrapper (not the raw storage). + +Edge Cases: +- Storage backends without presign support must fall back to signed proxy URLs. +- Signed proxy verification enforces expiration and tenant-scoped signing keys. +- load_or_none treats SilentStorage "File Not Found" bytes as missing. + +Tests: +- Unit tests for ref validation, storage key mapping, and signed URL verification. diff --git a/api/agent-notes/core/app_bundle/source_zip_extractor.py.md b/api/agent-notes/core/app_bundle/source_zip_extractor.py.md new file mode 100644 index 0000000000..05f08a73ff --- /dev/null +++ b/api/agent-notes/core/app_bundle/source_zip_extractor.py.md @@ -0,0 +1,10 @@ +Summary: +Summary: +- Extracts asset files from a zip and persists them into app asset storage. + +Invariants: +- Rejects path traversal/absolute/backslash paths. +- Saves extracted files via AppAssetStorage draft refs. + +Tests: +- Zip security edge cases and tree construction tests. diff --git a/api/agent-notes/core/sandbox/initializer/app_assets_initializer.py.md b/api/agent-notes/core/sandbox/initializer/app_assets_initializer.py.md new file mode 100644 index 0000000000..1371d8b85c --- /dev/null +++ b/api/agent-notes/core/sandbox/initializer/app_assets_initializer.py.md @@ -0,0 +1,9 @@ +Summary: +Summary: +- Downloads published app asset zip into sandbox and extracts it. + +Invariants: +- Uses AppAssetStorage to generate download URLs for build zips (internal URL). + +Tests: +- Sandbox initialization integration tests. diff --git a/api/agent-notes/core/sandbox/initializer/draft_app_assets_initializer.py.md b/api/agent-notes/core/sandbox/initializer/draft_app_assets_initializer.py.md new file mode 100644 index 0000000000..43e2aed41c --- /dev/null +++ b/api/agent-notes/core/sandbox/initializer/draft_app_assets_initializer.py.md @@ -0,0 +1,12 @@ +Summary: +Summary: +- Downloads draft/resolved assets into sandbox for draft execution. + +Invariants: +- Uses AppAssetStorage to generate download URLs for draft/resolved refs (internal URL). + +Edge Cases: +- No nodes -> returns early. + +Tests: +- Sandbox draft initialization tests. diff --git a/api/agent-notes/core/skill/skill_manager.py.md b/api/agent-notes/core/skill/skill_manager.py.md new file mode 100644 index 0000000000..cf1586f86b --- /dev/null +++ b/api/agent-notes/core/skill/skill_manager.py.md @@ -0,0 +1,9 @@ +Summary: +Summary: +- Loads/saves skill bundles to app asset storage. + +Invariants: +- Skill bundles use AppAssetStorage refs and JSON serialization. + +Tests: +- Covered by skill bundle build/load unit tests. diff --git a/api/agent-notes/services/app_asset_service.py.md b/api/agent-notes/services/app_asset_service.py.md new file mode 100644 index 0000000000..e176ec6e3f --- /dev/null +++ b/api/agent-notes/services/app_asset_service.py.md @@ -0,0 +1,14 @@ +Summary: +- App asset CRUD, publish/build pipeline, and presigned URL generation. + +Invariants: +- Asset storage access goes through AppAssetStorage + AssetPath, using app_asset_storage singleton. +- Tree operations require tenant/app scoping and lock for mutation. +- Asset zips are packaged via raw storage with storage keys from AppAssetStorage. + +Edge Cases: +- File nodes larger than preview limit are rejected. +- Deletion runs asynchronously; storage failures are logged. + +Tests: +- Unit tests for storage URL generation and publish/build flows. diff --git a/api/agent-notes/services/app_bundle_service.py.md b/api/agent-notes/services/app_bundle_service.py.md new file mode 100644 index 0000000000..e68891e2d7 --- /dev/null +++ b/api/agent-notes/services/app_bundle_service.py.md @@ -0,0 +1,10 @@ +Summary: +Summary: +- Imports app bundles, including asset extraction into app asset storage. + +Invariants: +- Asset imports respect zip security checks and tenant/app scoping. +- Draft asset packaging uses AppAssetStorage for key mapping. + +Tests: +- Bundle import unit tests and zip validation coverage. diff --git a/api/agent-notes/tests/unit_tests/core/app_assets/test_storage.py.md b/api/agent-notes/tests/unit_tests/core/app_assets/test_storage.py.md new file mode 100644 index 0000000000..5ba4410605 --- /dev/null +++ b/api/agent-notes/tests/unit_tests/core/app_assets/test_storage.py.md @@ -0,0 +1,6 @@ +Summary: +Summary: +- Unit tests for AppAssetStorage ref validation, key mapping, and signing. + +Tests: +- Covers valid/invalid refs, signature verify, expiration handling, and proxy URL generation. diff --git a/api/controllers/files/__init__.py b/api/controllers/files/__init__.py index 5a0fbf7b1d..854cf3cbf8 100644 --- a/api/controllers/files/__init__.py +++ b/api/controllers/files/__init__.py @@ -14,12 +14,13 @@ api = ExternalApi( files_ns = Namespace("files", description="File operations", path="/") -from . import image_preview, storage_download, tool_files, upload +from . import app_assets_download, image_preview, storage_download, tool_files, upload api.add_namespace(files_ns) __all__ = [ "api", + "app_assets_download", "bp", "files_ns", "image_preview", diff --git a/api/controllers/files/app_assets_download.py b/api/controllers/files/app_assets_download.py new file mode 100644 index 0000000000..205b7353a9 --- /dev/null +++ b/api/controllers/files/app_assets_download.py @@ -0,0 +1,77 @@ +from urllib.parse import quote + +from flask import Response, request +from flask_restx import Resource +from pydantic import BaseModel, Field +from werkzeug.exceptions import Forbidden, NotFound + +from controllers.files import files_ns +from core.app_assets.storage import AppAssetSigner, AssetPath, app_asset_storage +from extensions.ext_storage import storage + +DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" + + +class AppAssetDownloadQuery(BaseModel): + expires_at: int = Field(..., description="Unix timestamp when the link expires") + nonce: str = Field(..., description="Random string for signature") + sign: str = Field(..., description="HMAC signature") + + +files_ns.schema_model( + AppAssetDownloadQuery.__name__, + AppAssetDownloadQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0), +) + + +@files_ns.route("/app-assets/////download") +@files_ns.route( + "/app-assets//////download" +) +class AppAssetDownloadApi(Resource): + def get( + self, + asset_type: str, + tenant_id: str, + app_id: str, + resource_id: str, + sub_resource_id: str | None = None, + ): + args = AppAssetDownloadQuery.model_validate(request.args.to_dict(flat=True)) + + try: + asset_path = AssetPath.from_components( + asset_type=asset_type, + tenant_id=tenant_id, + app_id=app_id, + resource_id=resource_id, + sub_resource_id=sub_resource_id, + ) + except ValueError as exc: + raise Forbidden(str(exc)) from exc + + if not AppAssetSigner.verify_download_signature( + asset_path=asset_path, + expires_at=args.expires_at, + nonce=args.nonce, + sign=args.sign, + ): + raise Forbidden("Invalid or expired download link") + + storage_key = app_asset_storage.get_storage_key(asset_path) + + try: + generator = storage.load_stream(storage_key) + except FileNotFoundError as exc: + raise NotFound("File not found") from exc + + encoded_filename = quote(storage_key.split("/")[-1]) + + return Response( + generator, + mimetype="application/octet-stream", + direct_passthrough=True, + headers={ + "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}", + }, + ) diff --git a/api/core/app_assets/__init__.py b/api/core/app_assets/__init__.py index 4490a8eda7..c81f5b2e55 100644 --- a/api/core/app_assets/__init__.py +++ b/api/core/app_assets/__init__.py @@ -6,7 +6,6 @@ from .entities import ( ) from .packager import AssetPackager, AssetZipPackager from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser -from .paths import AssetPaths __all__ = [ "AppAssetsAttrs", @@ -14,7 +13,6 @@ __all__ = [ "AssetItemParser", "AssetPackager", "AssetParser", - "AssetPaths", "AssetZipPackager", "FileAsset", "FileAssetParser", diff --git a/api/core/app_assets/builder/file_builder.py b/api/core/app_assets/builder/file_builder.py index d1ef0446b4..617c68bfbb 100644 --- a/api/core/app_assets/builder/file_builder.py +++ b/api/core/app_assets/builder/file_builder.py @@ -1,15 +1,17 @@ from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode from core.app_assets.entities import AssetItem, FileAsset -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AppAssetStorage, AssetPath from .base import BuildContext class FileBuilder: _nodes: list[tuple[AppAssetNode, str]] + _storage: AppAssetStorage - def __init__(self) -> None: + def __init__(self, storage: AppAssetStorage) -> None: self._nodes = [] + self._storage = storage def accept(self, node: AppAssetNode) -> bool: return True @@ -24,7 +26,7 @@ class FileBuilder: path=path, file_name=node.name, extension=node.extension or "", - storage_key=AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id), + storage_key=self._storage.get_storage_key(AssetPath.draft(ctx.tenant_id, ctx.app_id, node.id)), ) for node, path in self._nodes ] diff --git a/api/core/app_assets/builder/skill_builder.py b/api/core/app_assets/builder/skill_builder.py index ce72897cf5..9f75890b9c 100644 --- a/api/core/app_assets/builder/skill_builder.py +++ b/api/core/app_assets/builder/skill_builder.py @@ -1,15 +1,15 @@ import json from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass +from typing import Any, cast from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode from core.app_assets.entities import AssetItem, FileAsset -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AppAssetStorage, AssetPath, AssetPathBase from core.skill.entities.skill_bundle import SkillBundle from core.skill.entities.skill_document import SkillDocument from core.skill.skill_compiler import SkillCompiler from core.skill.skill_manager import SkillManager -from extensions.storage.base_storage import BaseStorage from .base import BuildContext @@ -19,23 +19,24 @@ class _LoadedSkill: node: AppAssetNode path: str content: str - metadata: dict + metadata: dict[str, Any] @dataclass class _CompiledSkill: node: AppAssetNode path: str - resolved_key: str + ref: AssetPathBase + storage_key: str content_bytes: bytes class SkillBuilder: _nodes: list[tuple[AppAssetNode, str]] _max_workers: int - _storage: BaseStorage + _storage: AppAssetStorage - def __init__(self, storage: BaseStorage, max_workers: int = 8) -> None: + def __init__(self, storage: AppAssetStorage, max_workers: int = 8) -> None: self._nodes = [] self._max_workers = max_workers self._storage = storage @@ -67,12 +68,13 @@ class SkillBuilder: artifact = artifact_set.get(skill.node.id) if artifact is None: continue - resolved_key = AssetPaths.build_resolved_file(ctx.tenant_id, ctx.app_id, ctx.build_id, skill.node.id) + resolved_ref = AssetPath.resolved(ctx.tenant_id, ctx.app_id, ctx.build_id, skill.node.id) to_upload.append( _CompiledSkill( node=skill.node, path=skill.path, - resolved_key=resolved_key, + ref=resolved_ref, + storage_key=self._storage.get_storage_key(resolved_ref), content_bytes=artifact.content.encode("utf-8"), ) ) @@ -87,19 +89,26 @@ class SkillBuilder: path=s.path, file_name=s.node.name, extension=s.node.extension or "", - storage_key=s.resolved_key, + storage_key=s.storage_key, ) for s in to_upload ] def _load_all(self, ctx: BuildContext) -> list[_LoadedSkill]: def load_one(node: AppAssetNode, path: str) -> _LoadedSkill: - draft_key = AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id) try: - data = json.loads(self._storage.load_once(draft_key)) - content = data.get("content", "") if isinstance(data, dict) else "" - metadata = data.get("metadata", {}) if isinstance(data, dict) else {} - except Exception: + draft_ref = AssetPath.draft(ctx.tenant_id, ctx.app_id, node.id) + data = json.loads(self._storage.load(draft_ref)) + content = "" + metadata: dict[str, Any] = {} + if isinstance(data, dict): + data_dict = cast(dict[str, Any], data) + content_value = data_dict.get("content", "") + content = content_value if isinstance(content_value, str) else str(content_value) + metadata_value = data_dict.get("metadata", {}) + if isinstance(metadata_value, dict): + metadata = cast(dict[str, Any], metadata_value) + except (FileNotFoundError, json.JSONDecodeError, TypeError, ValueError): content = "" metadata = {} return _LoadedSkill(node=node, path=path, content=content, metadata=metadata) @@ -110,7 +119,7 @@ class SkillBuilder: def _upload_all(self, skills: list[_CompiledSkill]) -> None: def upload_one(skill: _CompiledSkill) -> None: - self._storage.save(skill.resolved_key, skill.content_bytes) + self._storage.save(skill.ref, skill.content_bytes) with ThreadPoolExecutor(max_workers=self._max_workers) as executor: futures = [executor.submit(upload_one, skill) for skill in skills] diff --git a/api/core/app_assets/converters.py b/api/core/app_assets/converters.py index d949e13259..d7740f2a97 100644 --- a/api/core/app_assets/converters.py +++ b/api/core/app_assets/converters.py @@ -3,13 +3,14 @@ from __future__ import annotations from core.app.entities.app_asset_entities import AppAssetFileTree, AssetNodeType from core.app_assets.entities import FileAsset from core.app_assets.entities.assets import AssetItem -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AppAssetStorage, AssetPath def tree_to_asset_items( tree: AppAssetFileTree, tenant_id: str, app_id: str, + storage: AppAssetStorage, ) -> list[AssetItem]: """ Convert AppAssetFileTree to list of FileAsset for packaging. @@ -18,6 +19,7 @@ def tree_to_asset_items( tree: The asset file tree to convert tenant_id: Tenant ID for storage key generation app_id: App ID for storage key generation + storage: App asset storage for key mapping Returns: List of FileAsset items ready for packaging @@ -26,7 +28,8 @@ def tree_to_asset_items( for node in tree.nodes: if node.node_type == AssetNodeType.FILE: path = tree.get_path(node.id) - storage_key = AssetPaths.draft_file(tenant_id, app_id, node.id) + asset_path = AssetPath.draft(tenant_id, app_id, node.id) + storage_key = storage.get_storage_key(asset_path) items.append( FileAsset( asset_id=node.id, diff --git a/api/core/app_assets/parser/asset_parser.py b/api/core/app_assets/parser/asset_parser.py index 3a934520cc..a42fb2f879 100644 --- a/api/core/app_assets/parser/asset_parser.py +++ b/api/core/app_assets/parser/asset_parser.py @@ -1,6 +1,6 @@ from core.app.entities.app_asset_entities import AppAssetFileTree from core.app_assets.entities import AssetItem -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AssetPath, app_asset_storage from .base import AssetItemParser, FileAssetParser @@ -15,7 +15,7 @@ class AssetParser: self._tree = tree self._tenant_id = tenant_id self._app_id = app_id - self._parsers = {} + self._parsers: dict[str, AssetItemParser] = {} self._default_parser = FileAssetParser() def register(self, extension: str, parser: AssetItemParser) -> None: @@ -26,10 +26,10 @@ class AssetParser: for node in self._tree.walk_files(): path = self._tree.get_path(node.id).lstrip("/") - storage_key = AssetPaths.draft_file(self._tenant_id, self._app_id, node.id) + storage_key = app_asset_storage.get_storage_key(AssetPath.draft(self._tenant_id, self._app_id, node.id)) extension = node.extension or "" - parser = self._parsers.get(extension, self._default_parser) + parser: AssetItemParser = self._parsers.get(extension, self._default_parser) asset = parser.parse(node.id, path, node.name, extension, storage_key) assets.append(asset) diff --git a/api/core/app_assets/paths.py b/api/core/app_assets/paths.py deleted file mode 100644 index cdfbdcc923..0000000000 --- a/api/core/app_assets/paths.py +++ /dev/null @@ -1,23 +0,0 @@ -class AssetPaths: - _BASE = "app_assets" - - @staticmethod - def draft_file(tenant_id: str, app_id: str, node_id: str) -> str: - return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/draft/{node_id}" - - @staticmethod - def build_zip(tenant_id: str, app_id: str, assets_id: str) -> str: - return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}.zip" - - @staticmethod - def build_resolved_file(tenant_id: str, app_id: str, assets_id: str, node_id: str) -> str: - return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/resolved/{node_id}" - - @staticmethod - def build_skill_bundle(tenant_id: str, app_id: str, assets_id: str) -> str: - return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/skill_artifact_set.json" - - @staticmethod - def build_source_zip(tenant_id: str, app_id: str, workflow_id: str) -> str: - """Storage key for source assets zip (editable files snapshot at publish time).""" - return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/sources/{workflow_id}.zip" diff --git a/api/core/app_assets/storage.py b/api/core/app_assets/storage.py new file mode 100644 index 0000000000..9a1f5fbaf2 --- /dev/null +++ b/api/core/app_assets/storage.py @@ -0,0 +1,339 @@ +from __future__ import annotations + +import base64 +import hashlib +import hmac +import os +import time +import urllib.parse +from collections.abc import Callable, Iterable +from dataclasses import dataclass +from typing import Any, ClassVar +from uuid import UUID + +from configs import dify_config +from extensions.ext_redis import redis_client +from extensions.ext_storage import storage +from extensions.storage.base_storage import BaseStorage +from extensions.storage.cached_presign_storage import CachedPresignStorage +from extensions.storage.silent_storage import SilentStorage +from libs import rsa + +_ASSET_BASE = "app_assets" +_SILENT_STORAGE_NOT_FOUND = b"File Not Found" +_PATH_TEMPLATES: dict[str, str] = { + "draft": f"{_ASSET_BASE}/{{t}}/{{a}}/draft/{{r}}", + "build-zip": f"{_ASSET_BASE}/{{t}}/{{a}}/artifacts/{{r}}.zip", + "resolved": f"{_ASSET_BASE}/{{t}}/{{a}}/artifacts/{{r}}/resolved/{{s}}", + "skill-bundle": f"{_ASSET_BASE}/{{t}}/{{a}}/artifacts/{{r}}/skill_artifact_set.json", + "source-zip": f"{_ASSET_BASE}/{{t}}/{{a}}/sources/{{r}}.zip", +} +_ASSET_PATH_REGISTRY: dict[str, tuple[bool, Callable[..., AssetPathBase]]] = {} + + +def _require_uuid(value: str, field_name: str) -> None: + try: + UUID(value) + except (ValueError, TypeError) as exc: + raise ValueError(f"{field_name} must be a UUID") from exc + + +def register_asset_path(asset_type: str, *, requires_node: bool, factory: Callable[..., AssetPathBase]) -> None: + _ASSET_PATH_REGISTRY[asset_type] = (requires_node, factory) + + +@dataclass(frozen=True) +class AssetPathBase: + asset_type: ClassVar[str] + tenant_id: str + app_id: str + resource_id: str + + def __post_init__(self) -> None: + _require_uuid(self.tenant_id, "tenant_id") + _require_uuid(self.app_id, "app_id") + _require_uuid(self.resource_id, "resource_id") + + def get_storage_key(self) -> str: + return _PATH_TEMPLATES[self.asset_type].format( + t=self.tenant_id, + a=self.app_id, + r=self.resource_id, + s=self.signature_sub_resource_id() or "", + ) + + def signature_resource_id(self) -> str: + return self.resource_id + + def signature_sub_resource_id(self) -> str: + return "" + + def proxy_path_parts(self) -> list[str]: + parts = [self.asset_type, self.tenant_id, self.app_id, self.signature_resource_id()] + sub_resource_id = self.signature_sub_resource_id() + if sub_resource_id: + parts.append(sub_resource_id) + return parts + + +@dataclass(frozen=True) +class _DraftAssetPath(AssetPathBase): + asset_type: ClassVar[str] = "draft" + + +@dataclass(frozen=True) +class _BuildZipAssetPath(AssetPathBase): + asset_type: ClassVar[str] = "build-zip" + + +@dataclass(frozen=True) +class _ResolvedAssetPath(AssetPathBase): + asset_type: ClassVar[str] = "resolved" + node_id: str + + def __post_init__(self) -> None: + super().__post_init__() + _require_uuid(self.node_id, "node_id") + + def signature_sub_resource_id(self) -> str: + return self.node_id + + +@dataclass(frozen=True) +class _SkillBundleAssetPath(AssetPathBase): + asset_type: ClassVar[str] = "skill-bundle" + + +@dataclass(frozen=True) +class _SourceZipAssetPath(AssetPathBase): + asset_type: ClassVar[str] = "source-zip" + + +class AssetPath: + @staticmethod + def draft(tenant_id: str, app_id: str, node_id: str) -> AssetPathBase: + return _DraftAssetPath(tenant_id=tenant_id, app_id=app_id, resource_id=node_id) + + @staticmethod + def build_zip(tenant_id: str, app_id: str, assets_id: str) -> AssetPathBase: + return _BuildZipAssetPath(tenant_id=tenant_id, app_id=app_id, resource_id=assets_id) + + @staticmethod + def resolved(tenant_id: str, app_id: str, assets_id: str, node_id: str) -> AssetPathBase: + return _ResolvedAssetPath(tenant_id=tenant_id, app_id=app_id, resource_id=assets_id, node_id=node_id) + + @staticmethod + def skill_bundle(tenant_id: str, app_id: str, assets_id: str) -> AssetPathBase: + return _SkillBundleAssetPath(tenant_id=tenant_id, app_id=app_id, resource_id=assets_id) + + @staticmethod + def source_zip(tenant_id: str, app_id: str, workflow_id: str) -> AssetPathBase: + return _SourceZipAssetPath(tenant_id=tenant_id, app_id=app_id, resource_id=workflow_id) + + @staticmethod + def from_components( + asset_type: str, + tenant_id: str, + app_id: str, + resource_id: str, + sub_resource_id: str | None = None, + ) -> AssetPathBase: + entry = _ASSET_PATH_REGISTRY.get(asset_type) + if not entry: + raise ValueError(f"Unsupported asset type: {asset_type}") + requires_node, factory = entry + if requires_node and not sub_resource_id: + raise ValueError("resolved assets require node_id") + if not requires_node and sub_resource_id: + raise ValueError(f"{asset_type} assets do not accept node_id") + if requires_node: + return factory(tenant_id, app_id, resource_id, sub_resource_id) + return factory(tenant_id, app_id, resource_id) + + +register_asset_path("draft", requires_node=False, factory=AssetPath.draft) +register_asset_path("build-zip", requires_node=False, factory=AssetPath.build_zip) +register_asset_path("resolved", requires_node=True, factory=AssetPath.resolved) +register_asset_path("skill-bundle", requires_node=False, factory=AssetPath.skill_bundle) +register_asset_path("source-zip", requires_node=False, factory=AssetPath.source_zip) + + +class AppAssetSigner: + SIGNATURE_PREFIX = "app-asset" + SIGNATURE_VERSION = "v1" + OPERATION_DOWNLOAD = "download" + + @classmethod + def create_download_signature(cls, asset_path: AssetPathBase, expires_at: int, nonce: str) -> str: + return cls._create_signature( + asset_path=asset_path, + operation=cls.OPERATION_DOWNLOAD, + expires_at=expires_at, + nonce=nonce, + ) + + @classmethod + def verify_download_signature(cls, asset_path: AssetPathBase, expires_at: int, nonce: str, sign: str) -> bool: + if expires_at <= 0: + return False + + expected_sign = cls.create_download_signature( + asset_path=asset_path, + expires_at=expires_at, + nonce=nonce, + ) + if not hmac.compare_digest(sign, expected_sign): + return False + + current_time = int(time.time()) + if expires_at < current_time: + return False + + if expires_at - current_time > dify_config.FILES_ACCESS_TIMEOUT: + return False + + return True + + @classmethod + def _create_signature(cls, *, asset_path: AssetPathBase, operation: str, expires_at: int, nonce: str) -> str: + key = cls._tenant_key(asset_path.tenant_id) + message = cls._signature_message( + asset_path=asset_path, + operation=operation, + expires_at=expires_at, + nonce=nonce, + ) + sign = hmac.new(key, message.encode(), hashlib.sha256).digest() + return base64.urlsafe_b64encode(sign).decode() + + @classmethod + def _signature_message(cls, *, asset_path: AssetPathBase, operation: str, expires_at: int, nonce: str) -> str: + sub_resource_id = asset_path.signature_sub_resource_id() + return ( + f"{cls.SIGNATURE_PREFIX}|{cls.SIGNATURE_VERSION}|{operation}|" + f"{asset_path.asset_type}|{asset_path.tenant_id}|{asset_path.app_id}|" + f"{asset_path.signature_resource_id()}|{sub_resource_id}|{expires_at}|{nonce}" + ) + + @classmethod + def _tenant_key(cls, tenant_id: str) -> bytes: + try: + rsa_key, _ = rsa.get_decrypt_decoding(tenant_id) + except rsa.PrivkeyNotFoundError as exc: + raise ValueError(f"Tenant private key missing for tenant_id={tenant_id}") from exc + private_key = rsa_key.export_key() + return hashlib.sha256(private_key).digest() + + +class AppAssetStorage: + _base_storage: BaseStorage + _storage: CachedPresignStorage + + def __init__(self, storage: BaseStorage, *, redis_client: Any, cache_key_prefix: str = "app_assets") -> None: + self._base_storage = storage + self._storage = CachedPresignStorage( + storage=storage, + redis_client=redis_client, + cache_key_prefix=cache_key_prefix, + ) + + @property + def storage(self) -> BaseStorage: + return self._storage + + def save(self, asset_path: AssetPathBase, content: bytes) -> None: + self._storage.save(self.get_storage_key(asset_path), content) + + def load(self, asset_path: AssetPathBase) -> bytes: + return self._storage.load_once(self.get_storage_key(asset_path)) + + def load_or_none(self, asset_path: AssetPathBase) -> bytes | None: + try: + data = self._storage.load_once(self.get_storage_key(asset_path)) + except FileNotFoundError: + return None + if data == _SILENT_STORAGE_NOT_FOUND: + return None + return data + + def delete(self, asset_path: AssetPathBase) -> None: + self._storage.delete(self.get_storage_key(asset_path)) + + def get_storage_key(self, asset_path: AssetPathBase) -> str: + return asset_path.get_storage_key() + + def get_download_url(self, asset_path: AssetPathBase, expires_in: int = 3600, *, for_external: bool = True) -> str: + storage_key = self.get_storage_key(asset_path) + try: + return self._storage.get_download_url(storage_key, expires_in) + except NotImplementedError: + pass + + return self._generate_signed_proxy_url(asset_path, expires_in, for_external=for_external) + + def get_download_urls( + self, + asset_paths: Iterable[AssetPathBase], + expires_in: int = 3600, + *, + for_external: bool = True, + ) -> list[str]: + asset_paths_list = list(asset_paths) + storage_keys = [self.get_storage_key(asset_path) for asset_path in asset_paths_list] + + try: + return self._storage.get_download_urls(storage_keys, expires_in) + except NotImplementedError: + pass + + return [ + self._generate_signed_proxy_url(asset_path, expires_in, for_external=for_external) + for asset_path in asset_paths_list + ] + + # FIXME(Mairuis): support fallback to signed proxy url + def get_upload_url(self, asset_path: AssetPathBase, expires_in: int = 3600) -> str: + return self._storage.get_upload_url(self.get_storage_key(asset_path), expires_in) + + def _generate_signed_proxy_url(self, asset_path: AssetPathBase, expires_in: int, *, for_external: bool) -> str: + expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT) + expires_at = int(time.time()) + max(expires_in, 1) + nonce = os.urandom(16).hex() + sign = AppAssetSigner.create_download_signature(asset_path=asset_path, expires_at=expires_at, nonce=nonce) + + base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL) + url = self._build_proxy_url(base_url=base_url, asset_path=asset_path) + query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign}) + return f"{url}?{query}" + + @staticmethod + def _build_proxy_url(*, base_url: str, asset_path: AssetPathBase) -> str: + encoded_parts = [urllib.parse.quote(part, safe="") for part in asset_path.proxy_path_parts()] + path = "/".join(encoded_parts) + return f"{base_url}/files/app-assets/{path}/download" + + +class _LazyAppAssetStorage: + _instance: AppAssetStorage | None + _cache_key_prefix: str + + def __init__(self, *, cache_key_prefix: str) -> None: + self._instance = None + self._cache_key_prefix = cache_key_prefix + + def _get_instance(self) -> AppAssetStorage: + if self._instance is None: + if not hasattr(storage, "storage_runner"): + raise RuntimeError("Storage is not initialized; call storage.init_app before using app_asset_storage") + self._instance = AppAssetStorage( + storage=SilentStorage(storage.storage_runner), + redis_client=redis_client, + cache_key_prefix=self._cache_key_prefix, + ) + return self._instance + + def __getattr__(self, name: str): + return getattr(self._get_instance(), name) + + +app_asset_storage = _LazyAppAssetStorage(cache_key_prefix="app_assets") diff --git a/api/core/app_bundle/source_zip_extractor.py b/api/core/app_bundle/source_zip_extractor.py index 16c44864ee..7d489015b5 100644 --- a/api/core/app_bundle/source_zip_extractor.py +++ b/api/core/app_bundle/source_zip_extractor.py @@ -2,19 +2,19 @@ from __future__ import annotations import io import zipfile -from collections.abc import Callable from typing import TYPE_CHECKING from uuid import uuid4 from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode from core.app.entities.app_bundle_entities import ExtractedFile, ExtractedFolder, ZipSecurityError +from core.app_assets.storage import AssetPath if TYPE_CHECKING: - from extensions.ext_storage import Storage + from core.app_assets.storage import AppAssetStorage class SourceZipExtractor: - def __init__(self, storage: Storage) -> None: + def __init__(self, storage: AppAssetStorage) -> None: self._storage = storage def extract_entries( @@ -49,7 +49,6 @@ class SourceZipExtractor: files: list[ExtractedFile], tenant_id: str, app_id: str, - storage_key_fn: Callable[[str, str, str], str], ) -> AppAssetFileTree: tree = AppAssetFileTree() path_to_node_id: dict[str, str] = {} @@ -79,8 +78,8 @@ class SourceZipExtractor: node = AppAssetNode.create_file(node_id, name, parent_id, len(file.content)) tree.add(node) - storage_key = storage_key_fn(tenant_id, app_id, node_id) - self._storage.save(storage_key, file.content) + asset_path = AssetPath.draft(tenant_id, app_id, node_id) + self._storage.save(asset_path, file.content) return tree diff --git a/api/core/sandbox/initializer/app_assets_initializer.py b/api/core/sandbox/initializer/app_assets_initializer.py index 8050f9cd12..f9a88b42e9 100644 --- a/api/core/sandbox/initializer/app_assets_initializer.py +++ b/api/core/sandbox/initializer/app_assets_initializer.py @@ -1,11 +1,9 @@ import logging from core.app_assets.constants import AppAssetsAttrs -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AssetPath, app_asset_storage from core.sandbox.sandbox import Sandbox from core.virtual_environment.__base.helpers import pipeline -from extensions.ext_storage import storage -from extensions.storage.file_presign_storage import FilePresignStorage from services.app_asset_service import AppAssetService from ..entities import AppAssets @@ -28,8 +26,9 @@ class AppAssetsInitializer(AsyncSandboxInitializer): sandbox.attrs.set(AppAssetsAttrs.FILE_TREE, app_assets.asset_tree) sandbox.attrs.set(AppAssetsAttrs.APP_ASSETS_ID, self._assets_id) vm = sandbox.vm - zip_key = AssetPaths.build_zip(self._tenant_id, self._app_id, self._assets_id) - download_url = FilePresignStorage(storage.storage_runner).get_download_url(zip_key) + asset_storage = app_asset_storage + zip_ref = AssetPath.build_zip(self._tenant_id, self._app_id, self._assets_id) + download_url = asset_storage.get_download_url(zip_ref, for_external=False) ( pipeline(vm) diff --git a/api/core/sandbox/initializer/draft_app_assets_initializer.py b/api/core/sandbox/initializer/draft_app_assets_initializer.py index c942394186..30b4cd0462 100644 --- a/api/core/sandbox/initializer/draft_app_assets_initializer.py +++ b/api/core/sandbox/initializer/draft_app_assets_initializer.py @@ -1,7 +1,7 @@ import logging from core.app_assets.constants import AppAssetsAttrs -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AssetPath, app_asset_storage from core.sandbox.entities import AppAssets from core.sandbox.sandbox import Sandbox from core.sandbox.services import AssetDownloadService @@ -32,18 +32,18 @@ class DraftAppAssetsInitializer(AsyncSandboxInitializer): vm = sandbox.vm build_id = self._assets_id tree = app_assets.asset_tree - storage = AppAssetService.assets_storage() + storage = app_asset_storage nodes = list(tree.walk_files()) if not nodes: return # FIXME(Mairuis): should be more graceful - storage_keys = [ - AssetPaths.build_resolved_file(self._tenant_id, self._app_id, build_id, node.id) + refs = [ + AssetPath.resolved(self._tenant_id, self._app_id, build_id, node.id) if node.extension == "md" - else AssetPaths.draft_file(self._tenant_id, self._app_id, node.id) + else AssetPath.draft(self._tenant_id, self._app_id, node.id) for node in nodes ] - urls = storage.get_download_urls(storage_keys, DRAFT_ASSETS_EXPIRES_IN) + urls = storage.get_download_urls(refs, DRAFT_ASSETS_EXPIRES_IN, for_external=False) items = [AssetDownloadItem(path=tree.get_path(node.id).lstrip("/"), url=url) for node, url in zip(nodes, urls)] script = AssetDownloadService.build_download_script(items, AppAssets.PATH) pipeline(vm).add( diff --git a/api/core/skill/skill_manager.py b/api/core/skill/skill_manager.py index 4767d93c00..dd8c2c4fd4 100644 --- a/api/core/skill/skill_manager.py +++ b/api/core/skill/skill_manager.py @@ -1,7 +1,8 @@ import logging -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AppAssetStorage, AssetPath from core.skill.entities.skill_bundle import SkillBundle +from extensions.ext_redis import redis_client from extensions.ext_storage import storage logger = logging.getLogger(__name__) @@ -14,8 +15,8 @@ class SkillManager: app_id: str, assets_id: str, ) -> SkillBundle: - key = AssetPaths.build_skill_bundle(tenant_id, app_id, assets_id) - data = storage.load_once(key) + asset_path = AssetPath.skill_bundle(tenant_id, app_id, assets_id) + data = AppAssetStorage(storage.storage_runner, redis_client=redis_client).load(asset_path) return SkillBundle.model_validate_json(data) @staticmethod @@ -25,5 +26,8 @@ class SkillManager: assets_id: str, bundle: SkillBundle, ) -> None: - key = AssetPaths.build_skill_bundle(tenant_id, app_id, assets_id) - storage.save(key, bundle.model_dump_json(indent=2).encode("utf-8")) + asset_path = AssetPath.skill_bundle(tenant_id, app_id, assets_id) + AppAssetStorage(storage.storage_runner, redis_client=redis_client).save( + asset_path, + bundle.model_dump_json(indent=2).encode("utf-8"), + ) diff --git a/api/extensions/storage/base_storage.py b/api/extensions/storage/base_storage.py index a8f81c606b..f72b984454 100644 --- a/api/extensions/storage/base_storage.py +++ b/api/extensions/storage/base_storage.py @@ -8,7 +8,7 @@ class BaseStorage(ABC): """Interface for file storage.""" @abstractmethod - def save(self, filename: str, data: bytes): + def save(self, filename: str, data: bytes) -> None: raise NotImplementedError @abstractmethod @@ -16,22 +16,22 @@ class BaseStorage(ABC): raise NotImplementedError @abstractmethod - def load_stream(self, filename: str) -> Generator: + def load_stream(self, filename: str) -> Generator[bytes, None, None]: raise NotImplementedError @abstractmethod - def download(self, filename, target_filepath): + def download(self, filename: str, target_filepath: str) -> None: raise NotImplementedError @abstractmethod - def exists(self, filename): + def exists(self, filename: str) -> bool: raise NotImplementedError @abstractmethod - def delete(self, filename): + def delete(self, filename: str) -> None: raise NotImplementedError - def scan(self, path, files=True, directories=False) -> list[str]: + def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: """ Scan files and directories in the given path. This method is implemented only in some storage backends. diff --git a/api/services/app_asset_service.py b/api/services/app_asset_service.py index 817756bd6a..932df8e076 100644 --- a/api/services/app_asset_service.py +++ b/api/services/app_asset_service.py @@ -19,14 +19,9 @@ from core.app_assets.builder.skill_builder import SkillBuilder from core.app_assets.converters import tree_to_asset_items from core.app_assets.entities.assets import AssetItem from core.app_assets.packager import AssetZipPackager -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import AssetPath, app_asset_storage from extensions.ext_database import db from extensions.ext_redis import redis_client -from extensions.ext_storage import storage -from extensions.storage.base_storage import BaseStorage -from extensions.storage.cached_presign_storage import CachedPresignStorage -from extensions.storage.file_presign_storage import FilePresignStorage -from extensions.storage.silent_storage import SilentStorage from models.app_asset import AppAssets from models.model import App @@ -49,16 +44,6 @@ class AppAssetService: def _lock(app_id: str): return redis_client.lock(f"app_asset:lock:{app_id}", timeout=AppAssetService._LOCK_TIMEOUT_SECONDS) - @staticmethod - def assets_storage() -> BaseStorage: - return SilentStorage( - CachedPresignStorage( - storage=FilePresignStorage(storage.storage_runner), - redis_client=redis_client, - cache_key_prefix=AppAssetService._DRAFT_CACHE_KEY_PREFIX, - ) - ) - @staticmethod def get_or_create_assets(session: Session, app_model: App, account_id: str) -> AppAssets: assets = ( @@ -176,8 +161,9 @@ class AppAssetService: max_size_mb = AppAssetService.MAX_PREVIEW_CONTENT_SIZE / 1024 / 1024 raise AppAssetNodeTooLargeError(f"File node {node_id} size exceeded the limit: {max_size_mb} MB") - storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) - return AppAssetService.assets_storage().load_once(storage_key) + asset_storage = app_asset_storage + asset_path = AssetPath.draft(app_model.tenant_id, app_model.id, node_id) + return asset_storage.load(asset_path) @staticmethod def update_file_content( @@ -196,8 +182,9 @@ class AppAssetService: except TreeNodeNotFoundError as e: raise AppAssetNodeNotFoundError(str(e)) from e - storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) - AppAssetService.assets_storage().save(storage_key, content) + asset_storage = app_asset_storage + asset_path = AssetPath.draft(app_model.tenant_id, app_model.id, node_id) + asset_storage.save(asset_path, content) assets.asset_tree = tree assets.updated_by = account_id @@ -293,14 +280,19 @@ class AppAssetService: assets.updated_by = account_id session.commit() - # FIXME(Mairuis): sync deletion queue + # FIXME(Mairuis): sync deletion queue, failed is fine def _delete_file_from_storage(tenant_id: str, app_id: str, node_ids: list[str]) -> None: - for nid in removed_ids: - storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, nid) + asset_storage = app_asset_storage + for nid in node_ids: + asset_path = AssetPath.draft(tenant_id, app_id, nid) try: - AppAssetService.assets_storage().delete(storage_key) + asset_storage.delete(asset_path) except Exception: - logger.warning("Failed to delete storage file %s", storage_key, exc_info=True) + logger.warning( + "Failed to delete storage file %s", + asset_storage.get_storage_key(asset_path), + exc_info=True, + ) threading.Thread( target=lambda: _delete_file_from_storage(app_model.tenant_id, app_model.id, removed_ids) @@ -327,21 +319,22 @@ class AppAssetService: session.add(published) session.flush() + asset_storage = app_asset_storage ctx = BuildContext(tenant_id=tenant_id, app_id=app_id, build_id=publish_id) built_assets = AssetBuildPipeline( - [SkillBuilder(storage=AppAssetService.assets_storage()), FileBuilder()] + [SkillBuilder(storage=asset_storage), FileBuilder(storage=asset_storage)] ).build_all(tree, ctx) - packager = AssetZipPackager(AppAssetService.assets_storage()) + packager = AssetZipPackager(asset_storage.storage) runtime_zip_bytes = packager.package(built_assets) - runtime_zip_key = AssetPaths.build_zip(tenant_id, app_id, publish_id) - AppAssetService.assets_storage().save(runtime_zip_key, runtime_zip_bytes) + runtime_zip_path = AssetPath.build_zip(tenant_id, app_id, publish_id) + asset_storage.save(runtime_zip_path, runtime_zip_bytes) - source_items = tree_to_asset_items(tree, tenant_id, app_id) + source_items = tree_to_asset_items(tree, tenant_id, app_id, asset_storage) source_zip_bytes = packager.package(source_items) - source_zip_key = AssetPaths.build_source_zip(tenant_id, app_id, workflow_id) - AppAssetService.assets_storage().save(source_zip_key, source_zip_bytes) + source_zip_path = AssetPath.source_zip(tenant_id, app_id, workflow_id) + asset_storage.save(source_zip_path, source_zip_bytes) return published @@ -350,15 +343,16 @@ class AppAssetService: # Build resolved draft assets without packaging into a zip. tree = assets.asset_tree + asset_storage = app_asset_storage ctx = BuildContext(tenant_id=tenant_id, app_id=app_id, build_id=assets.id) built_assets: list[AssetItem] = AssetBuildPipeline( - [SkillBuilder(storage=AppAssetService.assets_storage()), FileBuilder()] + [SkillBuilder(storage=asset_storage), FileBuilder(storage=asset_storage)] ).build_all(tree, ctx) - packager = AssetZipPackager(storage=AppAssetService.assets_storage()) + packager = AssetZipPackager(storage=asset_storage.storage) zip_bytes = packager.package(built_assets) - zip_key = AssetPaths.build_zip(tenant_id, app_id, assets.id) - AppAssetService.assets_storage().save(zip_key, zip_bytes) + zip_path = AssetPath.build_zip(tenant_id, app_id, assets.id) + asset_storage.save(zip_path, zip_bytes) @staticmethod def get_file_download_url( @@ -375,17 +369,18 @@ class AppAssetService: if not node or node.node_type != AssetNodeType.FILE: raise AppAssetNodeNotFoundError(f"File node {node_id} not found") - storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) - return AppAssetService.assets_storage().get_download_url(storage_key, expires_in) + asset_storage = app_asset_storage + asset_path = AssetPath.draft(app_model.tenant_id, app_model.id, node_id) + return asset_storage.get_download_url(asset_path, expires_in) @staticmethod def get_source_zip_bytes(tenant_id: str, app_id: str, workflow_id: str) -> bytes | None: - source_zip_key = AssetPaths.build_source_zip(tenant_id, app_id, workflow_id) - try: - return AppAssetService.assets_storage().load_once(source_zip_key) - except Exception: - logger.warning("Source zip not found: %s", source_zip_key) - return None + asset_storage = app_asset_storage + asset_path = AssetPath.source_zip(tenant_id, app_id, workflow_id) + source_zip = asset_storage.load_or_none(asset_path) + if source_zip is None: + logger.warning("Source zip not found: %s", asset_storage.get_storage_key(asset_path)) + return source_zip @staticmethod def set_draft_assets( @@ -439,15 +434,15 @@ class AppAssetService: assets.updated_by = account_id session.commit() - storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node_id) - presign_storage = AppAssetService.assets_storage() + asset_path = AssetPath.draft(app_model.tenant_id, app_model.id, node_id) + asset_storage = app_asset_storage # put empty content to create the file record # which avoids file not found error when uploading via presigned URL is never touched # resulting in inconsistent state - AppAssetService.assets_storage().save(storage_key, b"") + asset_storage.save(asset_path, b"") - upload_url = presign_storage.get_upload_url(storage_key, expires_in) + upload_url = asset_storage.get_upload_url(asset_path, expires_in) return node, upload_url @@ -482,12 +477,12 @@ class AppAssetService: assets.updated_by = account_id session.commit() - storage = AppAssetService.assets_storage() + asset_storage = app_asset_storage def fill_urls(node: BatchUploadNode) -> None: if node.node_type == AssetNodeType.FILE and node.id: - storage_key = AssetPaths.draft_file(app_model.tenant_id, app_model.id, node.id) - node.upload_url = storage.get_upload_url(storage_key, expires_in) + asset_path = AssetPath.draft(app_model.tenant_id, app_model.id, node.id) + node.upload_url = asset_storage.get_upload_url(asset_path, expires_in) for child in node.children: fill_urls(child) diff --git a/api/services/app_bundle_service.py b/api/services/app_bundle_service.py index f8740f0c8f..ab48762135 100644 --- a/api/services/app_bundle_service.py +++ b/api/services/app_bundle_service.py @@ -17,10 +17,9 @@ from core.app.entities.app_bundle_entities import ( ) from core.app_assets.converters import tree_to_asset_items from core.app_assets.packager import AssetZipPackager -from core.app_assets.paths import AssetPaths +from core.app_assets.storage import app_asset_storage from core.app_bundle import SourceZipExtractor from extensions.ext_database import db -from extensions.ext_storage import storage from models import Account, App from .app_asset_service import AppAssetService @@ -157,8 +156,9 @@ class AppBundleService: if not tree.nodes: return None - items = tree_to_asset_items(tree, app_model.tenant_id, app_model.id) - packager = AssetZipPackager(AppAssetService.assets_storage()) + asset_storage = app_asset_storage + items = tree_to_asset_items(tree, app_model.tenant_id, app_model.id, asset_storage) + packager = AssetZipPackager(asset_storage.storage) return packager.package(items) @staticmethod @@ -221,7 +221,8 @@ class AppBundleService: logger.warning("App not found for asset import: %s", app_id) return - extractor = SourceZipExtractor(storage) + asset_storage = app_asset_storage + extractor = SourceZipExtractor(asset_storage) try: folders, files = extractor.extract_entries( zip_bytes, @@ -239,7 +240,6 @@ class AppBundleService: files=files, tenant_id=app_model.tenant_id, app_id=app_model.id, - storage_key_fn=AssetPaths.draft_file, ) AppAssetService.set_draft_assets( diff --git a/api/tests/unit_tests/core/app_assets/test_storage.py b/api/tests/unit_tests/core/app_assets/test_storage.py new file mode 100644 index 0000000000..05a26c7415 --- /dev/null +++ b/api/tests/unit_tests/core/app_assets/test_storage.py @@ -0,0 +1,152 @@ +import time +from uuid import uuid4 + +import pytest + +from configs import dify_config +from core.app_assets.storage import AppAssetSigner, AppAssetStorage, AssetPath +from extensions.storage.base_storage import BaseStorage +from libs import rsa + + +class DummyStorage(BaseStorage): + def save(self, filename: str, data: bytes): + return None + + def load_once(self, filename: str) -> bytes: + raise FileNotFoundError + + def load_stream(self, filename: str): + raise FileNotFoundError + + def download(self, filename: str, target_filepath: str): + return None + + def exists(self, filename: str): + return False + + def delete(self, filename: str): + return None + + def get_download_url(self, filename: str, expires_in: int = 3600) -> str: + raise NotImplementedError + + def get_download_urls(self, filenames: list[str], expires_in: int = 3600) -> list[str]: + raise NotImplementedError + + def get_upload_url(self, filename: str, expires_in: int = 3600) -> str: + raise NotImplementedError + + +class DummyRedis: + def mget(self, keys: list[str]) -> list[None]: + return [None for _ in keys] + + def setex(self, key: str, ttl: int, value: str) -> None: + return None + + def delete(self, *keys: str) -> None: + return None + + def pipeline(self): + return self + + def execute(self) -> None: + return None + + +def test_asset_path_validation(): + tenant_id = str(uuid4()) + app_id = str(uuid4()) + resource_id = str(uuid4()) + + ref = AssetPath.draft(tenant_id=tenant_id, app_id=app_id, node_id=resource_id) + assert "/draft/" in ref.get_storage_key() + + with pytest.raises(ValueError): + AssetPath.draft(tenant_id="not-a-uuid", app_id=app_id, node_id=resource_id) + + with pytest.raises(ValueError): + AssetPath.draft(tenant_id=tenant_id, app_id=app_id, node_id="not-a-uuid") + + +def test_storage_key_mapping(): + tenant_id = str(uuid4()) + app_id = str(uuid4()) + node_id = str(uuid4()) + + storage = AppAssetStorage(DummyStorage(), redis_client=DummyRedis()) + ref = AssetPath.draft(tenant_id, app_id, node_id) + assert storage.get_storage_key(ref) == ref.get_storage_key() + + +def test_signature_verification(monkeypatch: pytest.MonkeyPatch): + tenant_id = str(uuid4()) + app_id = str(uuid4()) + resource_id = str(uuid4()) + asset_path = AssetPath.draft(tenant_id, app_id, resource_id) + + class _FakeKey: + def export_key(self) -> bytes: + return b"tenant-private-key" + + def _fake_get_decrypt_decoding(_tenant_id: str) -> tuple[_FakeKey, None]: + return _FakeKey(), None + + monkeypatch.setattr(dify_config, "FILES_ACCESS_TIMEOUT", 300, raising=False) + monkeypatch.setattr(rsa, "get_decrypt_decoding", _fake_get_decrypt_decoding) + + expires_at = int(time.time()) + 120 + nonce = "nonce" + sign = AppAssetSigner.create_download_signature(asset_path=asset_path, expires_at=expires_at, nonce=nonce) + + assert AppAssetSigner.verify_download_signature( + asset_path=asset_path, + expires_at=expires_at, + nonce=nonce, + sign=sign, + ) + + expired_at = int(time.time()) - 1 + expired_sign = AppAssetSigner.create_download_signature(asset_path=asset_path, expires_at=expired_at, nonce=nonce) + assert not AppAssetSigner.verify_download_signature( + asset_path=asset_path, + expires_at=expired_at, + nonce=nonce, + sign=expired_sign, + ) + + too_far = int(time.time()) + 3600 + far_sign = AppAssetSigner.create_download_signature(asset_path=asset_path, expires_at=too_far, nonce=nonce) + assert not AppAssetSigner.verify_download_signature( + asset_path=asset_path, + expires_at=too_far, + nonce=nonce, + sign=far_sign, + ) + + +def test_signed_proxy_url_generation(monkeypatch: pytest.MonkeyPatch): + tenant_id = str(uuid4()) + app_id = str(uuid4()) + resource_id = str(uuid4()) + asset_path = AssetPath.draft(tenant_id, app_id, resource_id) + + class _FakeKey: + def export_key(self) -> bytes: + return b"tenant-private-key" + + def _fake_get_decrypt_decoding(_tenant_id: str) -> tuple[_FakeKey, None]: + return _FakeKey(), None + + monkeypatch.setattr(dify_config, "FILES_ACCESS_TIMEOUT", 300, raising=False) + monkeypatch.setattr(rsa, "get_decrypt_decoding", _fake_get_decrypt_decoding) + monkeypatch.setattr(dify_config, "FILES_URL", "http://files.local", raising=False) + + storage = AppAssetStorage(DummyStorage(), redis_client=DummyRedis()) + url = storage.get_download_url(asset_path, expires_in=120) + + assert url.startswith(f"http://files.local/files/app-assets/draft/{tenant_id}/{app_id}/{resource_id}/download?") + assert "expires_at=" in url + assert "nonce=" in url + assert "sign=" in url