From 6bb09dc58ceba2c35ad008be9363dc039de73835 Mon Sep 17 00:00:00 2001 From: Harry Date: Thu, 15 Jan 2026 17:19:46 +0800 Subject: [PATCH] feat(app-assets): add file download functionality with pre-signed URLs and enhance asset management --- api/controllers/console/app/app_asset.py | 48 ++++- api/core/app/entities/app_asset_entities.py | 12 +- api/extensions/ext_storage.py | 3 + api/extensions/storage/aws_s3_storage.py | 8 + api/extensions/storage/base_storage.py | 19 ++ ..._15_1649-d88f3edbd99d_rename_app_assets.py | 68 +++++++ api/services/app_asset_service.py | 181 ++++++++++++++---- api/services/errors/app_asset.py | 4 + 8 files changed, 296 insertions(+), 47 deletions(-) create mode 100644 api/migrations/versions/2026_01_15_1649-d88f3edbd99d_rename_app_assets.py diff --git a/api/controllers/console/app/app_asset.py b/api/controllers/console/app/app_asset.py index 046660d680..34c8f99db5 100644 --- a/api/controllers/console/app/app_asset.py +++ b/api/controllers/console/app/app_asset.py @@ -1,6 +1,7 @@ -from flask import request +from flask import Response, request from flask_restx import Resource from pydantic import BaseModel, Field, field_validator +from werkzeug.exceptions import Forbidden from controllers.console import console_ns from controllers.console.app.error import ( @@ -257,3 +258,48 @@ class AppAssetPublishResource(Resource): "version": published.version, "asset_tree": published.asset_tree.model_dump(), }, 201 + + +@console_ns.route("/apps//assets/files//download-url") +class AppAssetFileDownloadUrlResource(Resource): + @setup_required + @login_required + @account_initialization_required + @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def get(self, app_model: App, node_id: str): + current_user, _ = current_account_with_tenant() + try: + download_url = AppAssetService.get_file_download_url(app_model, current_user.id, node_id) + return {"download_url": download_url} + except ServiceNodeNotFoundError: + raise AppAssetNodeNotFoundError() + + +@console_ns.route("/apps//assets/files//download") +class AppAssetFileDownloadResource(Resource): + @setup_required + @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def get(self, app_model: App, node_id: str): + timestamp = request.args.get("timestamp", "") + nonce = request.args.get("nonce", "") + sign = request.args.get("sign", "") + + if not AppAssetService.verify_download_signature( + app_id=app_model.id, + node_id=node_id, + timestamp=timestamp, + nonce=nonce, + sign=sign, + ): + raise Forbidden("Invalid or expired download link") + + try: + content, filename = AppAssetService.get_file_for_download(app_model, node_id) + except ServiceNodeNotFoundError: + raise AppAssetNodeNotFoundError() + + return Response( + content, + mimetype="application/octet-stream", + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) diff --git a/api/core/app/entities/app_asset_entities.py b/api/core/app/entities/app_asset_entities.py index 7122c916ac..e2125a1ebb 100644 --- a/api/core/app/entities/app_asset_entities.py +++ b/api/core/app/entities/app_asset_entities.py @@ -41,7 +41,7 @@ class AppAssetNode(BaseModel): ) -class AppAssetTreeView(BaseModel): +class AppAssetNodeView(BaseModel): id: str = Field(description="Unique identifier for the node") node_type: str = Field(description="Type of node: 'file' or 'folder'") name: str = Field(description="Name of the file or folder") @@ -49,7 +49,7 @@ class AppAssetTreeView(BaseModel): extension: str = Field(default="", description="File extension without dot") size: int = Field(default=0, description="File size in bytes") checksum: str = Field(default="", description="SHA-256 checksum of file content") - children: list[AppAssetTreeView] = Field(default_factory=list, description="Child nodes for folders") + children: list[AppAssetNodeView] = Field(default_factory=list, description="Child nodes for folders") class TreeNodeNotFoundError(Exception): @@ -201,7 +201,7 @@ class AppAssetFileTree(BaseModel): def walk_files(self) -> Generator[AppAssetNode, None, None]: return (n for n in self.nodes if n.node_type == AssetNodeType.FILE) - def transform(self) -> list[AppAssetTreeView]: + def transform(self) -> list[AppAssetNodeView]: by_parent: dict[str | None, list[AppAssetNode]] = defaultdict(list) for n in self.nodes: by_parent[n.parent_id].append(n) @@ -210,16 +210,16 @@ class AppAssetFileTree(BaseModel): children.sort(key=lambda x: x.order) paths: dict[str, str] = {} - tree_views: dict[str, AppAssetTreeView] = {} + tree_views: dict[str, AppAssetNodeView] = {} def build_view(node: AppAssetNode, parent_path: str) -> None: path = f"{parent_path}/{node.name}" paths[node.id] = path - child_views: list[AppAssetTreeView] = [] + child_views: list[AppAssetNodeView] = [] for child in by_parent.get(node.id, []): build_view(child, path) child_views.append(tree_views[child.id]) - tree_views[node.id] = AppAssetTreeView( + tree_views[node.id] = AppAssetNodeView( id=node.id, node_type=node.node_type.value, name=node.name, diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 6df0879694..b801b2b578 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -118,6 +118,9 @@ class Storage: def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: return self.storage_runner.scan(path, files=files, directories=directories) + def get_download_url(self, filename: str, expires_in: int = 3600) -> str: + return self.storage_runner.get_download_url(filename, expires_in) + storage = Storage() diff --git a/api/extensions/storage/aws_s3_storage.py b/api/extensions/storage/aws_s3_storage.py index 6ab2a95e3c..1d47926f3d 100644 --- a/api/extensions/storage/aws_s3_storage.py +++ b/api/extensions/storage/aws_s3_storage.py @@ -85,3 +85,11 @@ class AwsS3Storage(BaseStorage): def delete(self, filename): self.client.delete_object(Bucket=self.bucket_name, Key=filename) + + def get_download_url(self, filename: str, expires_in: int = 3600) -> str: + url: str = self.client.generate_presigned_url( + ClientMethod="get_object", + Params={"Bucket": self.bucket_name, "Key": filename}, + ExpiresIn=expires_in, + ) + return url diff --git a/api/extensions/storage/base_storage.py b/api/extensions/storage/base_storage.py index 8ddedb24ae..5243b66299 100644 --- a/api/extensions/storage/base_storage.py +++ b/api/extensions/storage/base_storage.py @@ -38,3 +38,22 @@ class BaseStorage(ABC): If a storage backend doesn't support scanning, it will raise NotImplementedError. """ raise NotImplementedError("This storage backend doesn't support scanning") + + def get_download_url(self, filename: str, expires_in: int = 3600) -> str: + """ + Generate a pre-signed URL for downloading a file. + + Storage backends that support pre-signed URLs (e.g., S3, Azure Blob, GCS) + should override this method to return a direct download URL. + + Args: + filename: The file path/key in storage + expires_in: URL validity duration in seconds (default: 1 hour) + + Returns: + Pre-signed URL string + + Raises: + NotImplementedError: If this storage backend doesn't support pre-signed URLs + """ + raise NotImplementedError("This storage backend doesn't support pre-signed URLs") diff --git a/api/migrations/versions/2026_01_15_1649-d88f3edbd99d_rename_app_assets.py b/api/migrations/versions/2026_01_15_1649-d88f3edbd99d_rename_app_assets.py new file mode 100644 index 0000000000..7acf4b17da --- /dev/null +++ b/api/migrations/versions/2026_01_15_1649-d88f3edbd99d_rename_app_assets.py @@ -0,0 +1,68 @@ +"""rename_app_assets + +Revision ID: d88f3edbd99d +Revises: a1b2c3d4e5f6 +Create Date: 2026-01-15 16:49:11.833689 + +""" +from alembic import op +import models as models +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'd88f3edbd99d' +down_revision = 'a1b2c3d4e5f6' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('app_assets', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('asset_tree', models.types.LongText(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_assets_pkey') + ) + with op.batch_alter_table('app_assets', schema=None) as batch_op: + batch_op.create_index('app_assets_version_idx', ['tenant_id', 'app_id', 'version'], unique=False) + + with op.batch_alter_table('app_asset_drafts', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('app_asset_draft_version_idx')) + + op.drop_table('app_asset_drafts') + with op.batch_alter_table('trigger_oauth_tenant_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=sa.VARCHAR(length=512), + type_=sa.String(length=255), + existing_nullable=False) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('app_asset_drafts', + sa.Column('id', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('tenant_id', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('app_id', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('version', sa.VARCHAR(length=255), autoincrement=False, nullable=False), + sa.Column('asset_tree', sa.TEXT(), autoincrement=False, nullable=False), + sa.Column('created_by', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('created_at', postgresql.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP'), autoincrement=False, nullable=False), + sa.Column('updated_by', sa.UUID(), autoincrement=False, nullable=True), + sa.Column('updated_at', postgresql.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP'), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('app_asset_draft_pkey')) + ) + with op.batch_alter_table('app_asset_drafts', schema=None) as batch_op: + batch_op.create_index(batch_op.f('app_asset_draft_version_idx'), ['tenant_id', 'app_id', 'version'], unique=False) + + op.drop_table('app_assets') + # ### end Alembic commands ### diff --git a/api/services/app_asset_service.py b/api/services/app_asset_service.py index 156df7516f..511cc8360d 100644 --- a/api/services/app_asset_service.py +++ b/api/services/app_asset_service.py @@ -1,11 +1,17 @@ +import base64 import hashlib +import hmac import io import logging +import os +import time +import urllib.parse import zipfile from uuid import uuid4 from sqlalchemy.orm import Session +from configs import dify_config from core.app.entities.app_asset_entities import ( AppAssetFileTree, AppAssetNode, @@ -22,6 +28,7 @@ from models.model import App from .errors.app_asset import ( AppAssetNodeNotFoundError, + AppAssetNodeTooLargeError, AppAssetParentNotFoundError, AppAssetPathConflictError, ) @@ -30,9 +37,11 @@ logger = logging.getLogger(__name__) class AppAssetService: + MAX_PREVIEW_CONTENT_SIZE = 5 * 1024 * 1024 # 5MB + @staticmethod - def get_or_create_draft(session: Session, app_model: App, account_id: str) -> AppAssets: - draft = ( + def get_or_create_assets(session: Session, app_model: App, account_id: str) -> AppAssets: + assets = ( session.query(AppAssets) .filter( AppAssets.tenant_id == app_model.tenant_id, @@ -41,23 +50,23 @@ class AppAssetService: ) .first() ) - if not draft: - draft = AppAssets( + if not assets: + assets = AppAssets( id=str(uuid4()), tenant_id=app_model.tenant_id, app_id=app_model.id, version=AppAssets.VERSION_DRAFT, created_by=account_id, ) - session.add(draft) + session.add(assets) session.commit() - return draft + return assets @staticmethod def get_asset_tree(app_model: App, account_id: str) -> AppAssetFileTree: with Session(db.engine) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - return draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + return assets.asset_tree @staticmethod def create_folder( @@ -67,8 +76,8 @@ class AppAssetService: parent_id: str | None = None, ) -> AppAssetNode: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree node = AppAssetNode.create_folder(str(uuid4()), name, parent_id) @@ -79,8 +88,8 @@ class AppAssetService: except TreePathConflictError as e: raise AppAssetPathConflictError(str(e)) from e - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() return node @@ -94,8 +103,8 @@ class AppAssetService: parent_id: str | None = None, ) -> AppAssetNode: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree node_id = str(uuid4()) checksum = hashlib.sha256(content).hexdigest() @@ -111,8 +120,8 @@ class AppAssetService: storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) storage.save(storage_key, content) - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() return node @@ -120,13 +129,17 @@ class AppAssetService: @staticmethod def get_file_content(app_model: App, account_id: str, node_id: str) -> bytes: with Session(db.engine) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree node = tree.get(node_id) if not node or node.node_type != AssetNodeType.FILE: raise AppAssetNodeNotFoundError(f"File node {node_id} not found") + if node.size > AppAssetService.MAX_PREVIEW_CONTENT_SIZE: + max_size_mb = AppAssetService.MAX_PREVIEW_CONTENT_SIZE / 1024 / 1024 + raise AppAssetNodeTooLargeError(f"File node {node_id} size exceeded the limit: {max_size_mb} MB") + storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) return storage.load_once(storage_key) @@ -138,8 +151,8 @@ class AppAssetService: content: bytes, ) -> AppAssetNode: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree checksum = hashlib.sha256(content).hexdigest() @@ -151,8 +164,8 @@ class AppAssetService: storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) storage.save(storage_key, content) - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() return node @@ -165,8 +178,8 @@ class AppAssetService: new_name: str, ) -> AppAssetNode: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree try: node = tree.rename(node_id, new_name) @@ -175,8 +188,8 @@ class AppAssetService: except TreePathConflictError as e: raise AppAssetPathConflictError(str(e)) from e - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() return node @@ -189,8 +202,8 @@ class AppAssetService: new_parent_id: str | None, ) -> AppAssetNode: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree try: node = tree.move(node_id, new_parent_id) @@ -201,8 +214,8 @@ class AppAssetService: except TreePathConflictError as e: raise AppAssetPathConflictError(str(e)) from e - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() return node @@ -215,16 +228,16 @@ class AppAssetService: after_node_id: str | None, ) -> AppAssetNode: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree try: node = tree.reorder(node_id, after_node_id) except TreeNodeNotFoundError as e: raise AppAssetNodeNotFoundError(str(e)) from e - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() return node @@ -232,8 +245,8 @@ class AppAssetService: @staticmethod def delete_node(app_model: App, account_id: str, node_id: str) -> None: with Session(db.engine) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree try: removed_ids = tree.remove(node_id) @@ -247,15 +260,15 @@ class AppAssetService: except Exception: logger.warning("Failed to delete storage file %s", storage_key, exc_info=True) - draft.asset_tree = tree - draft.updated_by = account_id + assets.asset_tree = tree + assets.updated_by = account_id session.commit() @staticmethod def publish(app_model: App, account_id: str) -> AppAssets: with Session(db.engine, expire_on_commit=False) as session: - draft = AppAssetService.get_or_create_draft(session, app_model, account_id) - tree = draft.asset_tree + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree # TODO: use sandbox virtual environment to create zip file zip_buffer = io.BytesIO() @@ -311,3 +324,91 @@ class AppAssetService: if archive_path not in zf.namelist(): raise AppAssetNodeNotFoundError(f"File {file_path} not found in published version") return zf.read(archive_path) + + @staticmethod + def get_file_download_url( + app_model: App, + account_id: str, + node_id: str, + expires_in: int = 3600, + ) -> str: + with Session(db.engine) as session: + assets = AppAssetService.get_or_create_assets(session, app_model, account_id) + tree = assets.asset_tree + + node = tree.get(node_id) + if not node or node.node_type != AssetNodeType.FILE: + raise AppAssetNodeNotFoundError(f"File node {node_id} not found") + + storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) + + try: + return storage.get_download_url(storage_key, expires_in) + except NotImplementedError: + return AppAssetService._generate_signed_proxy_url( + app_id=app_model.id, + node_id=node_id, + expires_in=expires_in, + ) + + @staticmethod + def _generate_signed_proxy_url(app_id: str, node_id: str, expires_in: int) -> str: + base_url = dify_config.FILES_URL + url = f"{base_url}/console/api/apps/{app_id}/assets/files/{node_id}/download" + + timestamp = str(int(time.time())) + nonce = os.urandom(16).hex() + key = dify_config.SECRET_KEY.encode() + msg = f"app-asset-download|{app_id}|{node_id}|{timestamp}|{nonce}" + sign = hmac.new(key, msg.encode(), hashlib.sha256).digest() + encoded_sign = base64.urlsafe_b64encode(sign).decode() + + query = {"timestamp": timestamp, "nonce": nonce, "sign": encoded_sign} + query_string = urllib.parse.urlencode(query) + + return f"{url}?{query_string}" + + @staticmethod + def verify_download_signature( + *, + app_id: str, + node_id: str, + timestamp: str, + nonce: str, + sign: str, + ) -> bool: + data_to_sign = f"app-asset-download|{app_id}|{node_id}|{timestamp}|{nonce}" + secret_key = dify_config.SECRET_KEY.encode() + recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode() + + if sign != recalculated_encoded_sign: + return False + + current_time = int(time.time()) + return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT + + @staticmethod + def get_file_for_download(app_model: App, node_id: str) -> tuple[bytes, str]: + with Session(db.engine) as session: + assets = ( + session.query(AppAssets) + .filter( + AppAssets.tenant_id == app_model.tenant_id, + AppAssets.app_id == app_model.id, + AppAssets.version == AppAssets.VERSION_DRAFT, + ) + .first() + ) + if not assets: + raise AppAssetNodeNotFoundError(f"Assets not found for app {app_model.id}") + + tree = assets.asset_tree + node = tree.get(node_id) + if not node or node.node_type != AssetNodeType.FILE: + raise AppAssetNodeNotFoundError(f"File node {node_id} not found") + + storage_key = AppAssets.get_storage_key(app_model.tenant_id, app_model.id, node_id) + content = storage.load_once(storage_key) + + return content, node.name diff --git a/api/services/errors/app_asset.py b/api/services/errors/app_asset.py index de114c5a84..7fe1c1a30f 100644 --- a/api/services/errors/app_asset.py +++ b/api/services/errors/app_asset.py @@ -11,3 +11,7 @@ class AppAssetParentNotFoundError(BaseServiceError): class AppAssetPathConflictError(BaseServiceError): pass + + +class AppAssetNodeTooLargeError(BaseServiceError): + pass