From acd6942d210a3650e039f120e676f056d21e9131 Mon Sep 17 00:00:00 2001 From: Luyu Zhang Date: Tue, 28 Apr 2026 17:12:00 -0700 Subject: [PATCH] feat(storage): redirect signed file previews to S3 public base URL Add an optional S3_PUBLIC_BASE_URL setting that, when configured, lets file controllers 302-redirect signed previews to the object store / CDN instead of streaming bytes through the Dify API. Works with any S3-compatible backend exposing a public domain (Cloudflare R2 custom domain, MinIO public endpoint, Aliyun OSS public domain, etc.) so that egress and request handling for images, attachments, tool outputs, and webapp logos no longer go through the API container. Signature verification is preserved: the API still validates the HMAC before issuing the redirect. When S3_PUBLIC_BASE_URL is unset the behavior is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/.env.example | 8 ++ .../storage/amazon_s3_storage_config.py | 13 +++ api/controllers/files/image_preview.py | 17 +++- api/controllers/files/tool_files.py | 6 +- api/core/tools/tool_file_manager.py | 17 ++++ api/extensions/ext_storage.py | 3 + api/extensions/storage/aws_s3_storage.py | 8 ++ api/extensions/storage/base_storage.py | 11 +++ api/services/file_service.py | 21 +++-- .../controllers/files/test_image_preview.py | 72 ++++++++++++++++ .../controllers/files/test_tool_files.py | 47 +++++++++++ .../extensions/storage/test_aws_s3_storage.py | 84 +++++++++++++++++++ .../unit_tests/services/test_file_service.py | 69 ++++++++++++++- docker/.env.example | 8 ++ docker/docker-compose.yaml | 1 + 15 files changed, 371 insertions(+), 14 deletions(-) create mode 100644 api/tests/unit_tests/extensions/storage/test_aws_s3_storage.py diff --git a/api/.env.example b/api/.env.example index f6f65011ea..0399f18306 100644 --- a/api/.env.example +++ b/api/.env.example @@ -116,6 +116,14 @@ S3_ACCESS_KEY=your-access-key S3_SECRET_KEY=your-secret-key S3_REGION=your-region S3_ADDRESS_STYLE=auto +# Optional public base URL for objects in the bucket. When set, signed file +# previews are served by 302-redirecting to "/" so that bytes +# are delivered directly by the object store / CDN. Examples: +# Cloudflare R2 custom domain: https://cdn.example.com +# MinIO public endpoint: https://minio.example.com/your-bucket +# Aliyun OSS public domain: https://your-bucket.oss-cn-hangzhou.aliyuncs.com +# Leave empty to keep the default API-streamed behavior. +S3_PUBLIC_BASE_URL= # Workflow run and Conversation archive storage (S3-compatible) ARCHIVE_STORAGE_ENABLED=false diff --git a/api/configs/middleware/storage/amazon_s3_storage_config.py b/api/configs/middleware/storage/amazon_s3_storage_config.py index 9277a335f7..e8243e92f6 100644 --- a/api/configs/middleware/storage/amazon_s3_storage_config.py +++ b/api/configs/middleware/storage/amazon_s3_storage_config.py @@ -43,3 +43,16 @@ class S3StorageConfig(BaseSettings): description="Use AWS managed IAM roles for authentication instead of access/secret keys", default=False, ) + + S3_PUBLIC_BASE_URL: str | None = Field( + description=( + "Optional public base URL for objects in the bucket " + "(e.g., a Cloudflare R2 custom domain, MinIO public endpoint, or " + "OSS public domain). When set, signed file previews are served via " + "302 redirect to '/' so that bytes are delivered " + "directly by the object store / CDN instead of proxied by Dify's API. " + "Trailing slashes are ignored. Leave empty to keep the default " + "API-streamed behavior." + ), + default=None, + ) diff --git a/api/controllers/files/image_preview.py b/api/controllers/files/image_preview.py index a91e745f80..7cfaaba4fb 100644 --- a/api/controllers/files/image_preview.py +++ b/api/controllers/files/image_preview.py @@ -1,6 +1,6 @@ from urllib.parse import quote -from flask import Response, request +from flask import Response, redirect, request from flask_restx import Resource from pydantic import BaseModel, Field from werkzeug.exceptions import NotFound @@ -64,7 +64,7 @@ class ImagePreviewApi(Resource): sign = args.sign try: - generator, mimetype = FileService(db.engine).get_image_preview( + public_url, generator, mimetype = FileService(db.engine).get_image_preview( file_id=file_id, timestamp=timestamp, nonce=nonce, @@ -73,6 +73,9 @@ class ImagePreviewApi(Resource): except services.errors.file.UnsupportedFileTypeError: raise UnsupportedFileTypeError() + if public_url: + return redirect(public_url, code=302) + return Response(generator, mimetype=mimetype) @@ -103,7 +106,7 @@ class FilePreviewApi(Resource): args = FilePreviewQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore try: - generator, upload_file = FileService(db.engine).get_file_generator_by_file_id( + public_url, generator, upload_file = FileService(db.engine).get_file_generator_by_file_id( file_id=file_id, timestamp=args.timestamp, nonce=args.nonce, @@ -112,6 +115,9 @@ class FilePreviewApi(Resource): except services.errors.file.UnsupportedFileTypeError: raise UnsupportedFileTypeError() + if public_url: + return redirect(public_url, code=302) + response = Response( generator, mimetype=upload_file.mime_type, @@ -175,10 +181,13 @@ class WorkspaceWebappLogoApi(Resource): raise NotFound("webapp logo is not found") try: - generator, mimetype = FileService(db.engine).get_public_image_preview( + public_url, generator, mimetype = FileService(db.engine).get_public_image_preview( webapp_logo_file_id, ) except services.errors.file.UnsupportedFileTypeError: raise UnsupportedFileTypeError() + if public_url: + return redirect(public_url, code=302) + return Response(generator, mimetype=mimetype) diff --git a/api/controllers/files/tool_files.py b/api/controllers/files/tool_files.py index 2f1e2f28bd..65d01eaeed 100644 --- a/api/controllers/files/tool_files.py +++ b/api/controllers/files/tool_files.py @@ -1,6 +1,6 @@ from urllib.parse import quote -from flask import Response, request +from flask import Response, redirect, request from flask_restx import Resource from pydantic import BaseModel, Field from werkzeug.exceptions import Forbidden, NotFound @@ -57,6 +57,10 @@ class ToolFileApi(Resource): try: tool_file_manager = ToolFileManager() + public_url, tool_file = tool_file_manager.get_public_url_and_file_by_tool_file_id(file_id) + if public_url and tool_file: + return redirect(public_url, code=302) + stream, tool_file = tool_file_manager.get_file_generator_by_tool_file_id( file_id, ) diff --git a/api/core/tools/tool_file_manager.py b/api/core/tools/tool_file_manager.py index c87e8a3ae0..044ea4eda6 100644 --- a/api/core/tools/tool_file_manager.py +++ b/api/core/tools/tool_file_manager.py @@ -225,6 +225,23 @@ class ToolFileManager: return stream, self._build_graph_file_reference(tool_file) + def get_public_url_and_file_by_tool_file_id(self, tool_file_id: str) -> tuple[str | None, File | None]: + """ + Resolve a tool file to a public URL when the storage backend exposes one. + + Returns (public_url, file_reference). If the backend has no public URL + configured, returns (None, file_reference) and callers should fall back + to the streaming path. + """ + with session_factory.create_session() as session: + tool_file: ToolFile | None = session.scalar(select(ToolFile).where(ToolFile.id == tool_file_id).limit(1)) + + if not tool_file: + return None, None + + public_url = storage.get_public_url(tool_file.file_key) + return public_url, self._build_graph_file_reference(tool_file) + # init tool_file_parser from graphon.file.tool_file_parser import set_tool_file_manager_factory diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index db5a6e4812..cc42fc05fe 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -119,6 +119,9 @@ class Storage: def delete(self, filename: str): return self.storage_runner.delete(filename) + def get_public_url(self, filename: str) -> str | None: + return self.storage_runner.get_public_url(filename) + def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]: return self.storage_runner.scan(path, files=files, directories=directories) diff --git a/api/extensions/storage/aws_s3_storage.py b/api/extensions/storage/aws_s3_storage.py index 978f60c9b0..956073a86a 100644 --- a/api/extensions/storage/aws_s3_storage.py +++ b/api/extensions/storage/aws_s3_storage.py @@ -1,5 +1,6 @@ import logging from collections.abc import Generator +from urllib.parse import quote import boto3 from botocore.client import Config @@ -17,6 +18,8 @@ class AwsS3Storage(BaseStorage): def __init__(self): super().__init__() self.bucket_name = dify_config.S3_BUCKET_NAME + public_base_url = dify_config.S3_PUBLIC_BASE_URL + self.public_base_url = public_base_url.rstrip("/") if public_base_url else None if dify_config.S3_USE_AWS_MANAGED_IAM: logger.info("Using AWS managed IAM role for S3") @@ -85,3 +88,8 @@ class AwsS3Storage(BaseStorage): def delete(self, filename: str): self.client.delete_object(Bucket=self.bucket_name, Key=filename) + + def get_public_url(self, filename: str) -> str | None: + if not self.public_base_url: + return None + return f"{self.public_base_url}/{quote(filename, safe='/')}" diff --git a/api/extensions/storage/base_storage.py b/api/extensions/storage/base_storage.py index a73d429ccd..7363d821c6 100644 --- a/api/extensions/storage/base_storage.py +++ b/api/extensions/storage/base_storage.py @@ -31,6 +31,17 @@ class BaseStorage(ABC): def delete(self, filename: str): raise NotImplementedError + def get_public_url(self, filename: str) -> str | None: + """ + Return a publicly accessible URL for the given object, or None if the + backend is not configured to serve content publicly. + + When set, file controllers will 302-redirect signed preview requests to + this URL after verifying the signature, so that the bytes themselves are + served by the object store / CDN instead of streamed through Dify's API. + """ + return None + def scan(self, path, files=True, directories=False) -> list[str]: """ Scan files and directories in the given path. diff --git a/api/services/file_service.py b/api/services/file_service.py index f60afe2f19..0d6e22c58e 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -210,9 +210,12 @@ class FileService: if extension.lower() not in IMAGE_EXTENSIONS: raise UnsupportedFileTypeError() - generator = storage.load(upload_file.key, stream=True) + public_url = storage.get_public_url(upload_file.key) + if public_url: + return public_url, None, upload_file.mime_type - return generator, upload_file.mime_type + generator = storage.load(upload_file.key, stream=True) + return None, generator, upload_file.mime_type def get_file_generator_by_file_id(self, file_id: str, timestamp: str, nonce: str, sign: str): result = file_helpers.verify_file_signature(upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign) @@ -225,9 +228,12 @@ class FileService: if not upload_file: raise NotFound("File not found or signature is invalid") - generator = storage.load(upload_file.key, stream=True) + public_url = storage.get_public_url(upload_file.key) + if public_url: + return public_url, None, upload_file - return generator, upload_file + generator = storage.load(upload_file.key, stream=True) + return None, generator, upload_file def get_public_image_preview(self, file_id: str): with self._session_maker(expire_on_commit=False) as session: @@ -241,9 +247,12 @@ class FileService: if extension.lower() not in IMAGE_EXTENSIONS: raise UnsupportedFileTypeError() - generator = storage.load(upload_file.key) + public_url = storage.get_public_url(upload_file.key) + if public_url: + return public_url, None, upload_file.mime_type - return generator, upload_file.mime_type + generator = storage.load(upload_file.key) + return None, generator, upload_file.mime_type def get_file_content(self, file_id: str) -> str: with self._session_maker(expire_on_commit=False) as session: diff --git a/api/tests/unit_tests/controllers/files/test_image_preview.py b/api/tests/unit_tests/controllers/files/test_image_preview.py index 49846b89ee..361a35750f 100644 --- a/api/tests/unit_tests/controllers/files/test_image_preview.py +++ b/api/tests/unit_tests/controllers/files/test_image_preview.py @@ -49,6 +49,7 @@ class TestImagePreviewApi: generator = iter([b"img"]) mock_file_service.return_value.get_image_preview.return_value = ( + None, generator, "image/png", ) @@ -60,6 +61,30 @@ class TestImagePreviewApi: assert response.mimetype == "image/png" + @patch.object(module, "FileService") + def test_redirects_to_public_url(self, mock_file_service): + module.request = fake_request( + { + "timestamp": "123", + "nonce": "abc", + "sign": "sig", + } + ) + + mock_file_service.return_value.get_image_preview.return_value = ( + "https://cdn.example.com/upload_files/tenant/abc.png", + None, + "image/png", + ) + + api = module.ImagePreviewApi() + get_fn = unwrap(api.get) + + response = get_fn("file-id") + + assert response.status_code == 302 + assert response.headers["Location"] == "https://cdn.example.com/upload_files/tenant/abc.png" + @patch.object(module, "FileService") def test_unsupported_file_type(self, mock_file_service): module.request = fake_request( @@ -98,6 +123,7 @@ class TestFilePreviewApi: upload_file = DummyUploadFile(size=100) mock_file_service.return_value.get_file_generator_by_file_id.return_value = ( + None, generator, upload_file, ) @@ -112,6 +138,32 @@ class TestFilePreviewApi: assert "Accept-Ranges" not in response.headers mock_enforce.assert_called_once() + @patch.object(module, "FileService") + def test_redirects_to_public_url(self, mock_file_service): + module.request = fake_request( + { + "timestamp": "123", + "nonce": "abc", + "sign": "sig", + "as_attachment": False, + } + ) + + upload_file = DummyUploadFile(size=100) + mock_file_service.return_value.get_file_generator_by_file_id.return_value = ( + "https://cdn.example.com/upload_files/tenant/abc.bin", + None, + upload_file, + ) + + api = module.FilePreviewApi() + get_fn = unwrap(api.get) + + response = get_fn("file-id") + + assert response.status_code == 302 + assert response.headers["Location"] == "https://cdn.example.com/upload_files/tenant/abc.bin" + @patch.object(module, "enforce_download_for_html") @patch.object(module, "FileService") def test_as_attachment(self, mock_file_service, mock_enforce): @@ -132,6 +184,7 @@ class TestFilePreviewApi: ) mock_file_service.return_value.get_file_generator_by_file_id.return_value = ( + None, generator, upload_file, ) @@ -175,6 +228,7 @@ class TestWorkspaceWebappLogoApi: generator = iter([b"logo"]) mock_file_service.return_value.get_public_image_preview.return_value = ( + None, generator, "image/png", ) @@ -186,6 +240,24 @@ class TestWorkspaceWebappLogoApi: assert response.mimetype == "image/png" + @patch.object(module, "FileService") + @patch.object(module.TenantService, "get_custom_config") + def test_redirects_to_public_url(self, mock_config, mock_file_service): + mock_config.return_value = {"replace_webapp_logo": "logo-id"} + mock_file_service.return_value.get_public_image_preview.return_value = ( + "https://cdn.example.com/upload_files/tenant/logo.png", + None, + "image/png", + ) + + api = module.WorkspaceWebappLogoApi() + get_fn = unwrap(api.get) + + response = get_fn("workspace-id") + + assert response.status_code == 302 + assert response.headers["Location"] == "https://cdn.example.com/upload_files/tenant/logo.png" + @patch.object(module.TenantService, "get_custom_config") def test_logo_not_configured(self, mock_config): mock_config.return_value = {} diff --git a/api/tests/unit_tests/controllers/files/test_tool_files.py b/api/tests/unit_tests/controllers/files/test_tool_files.py index edb91c3f26..1e493f7197 100644 --- a/api/tests/unit_tests/controllers/files/test_tool_files.py +++ b/api/tests/unit_tests/controllers/files/test_tool_files.py @@ -50,6 +50,10 @@ class TestToolFileApi: stream = iter([b"data"]) tool_file = DummyToolFile(size=100) + mock_tool_file_manager.return_value.get_public_url_and_file_by_tool_file_id.return_value = ( + None, + tool_file, + ) mock_tool_file_manager.return_value.get_file_generator_by_tool_file_id.return_value = ( stream, tool_file, @@ -69,6 +73,37 @@ class TestToolFileApi: sign="sig", ) + @patch.object(module, "verify_tool_file_signature", return_value=True) + @patch.object(module, "ToolFileManager") + def test_redirects_to_public_url( + self, + mock_tool_file_manager, + mock_verify, + ): + module.request = fake_request( + { + "timestamp": "123", + "nonce": "abc", + "sign": "sig", + "as_attachment": False, + } + ) + + tool_file = DummyToolFile(size=100) + mock_tool_file_manager.return_value.get_public_url_and_file_by_tool_file_id.return_value = ( + "https://cdn.example.com/tool_files/abc.txt", + tool_file, + ) + + api = module.ToolFileApi() + get_fn = unwrap(api.get) + + response = get_fn("file-id", "txt") + + assert response.status_code == 302 + assert response.headers["Location"] == "https://cdn.example.com/tool_files/abc.txt" + mock_tool_file_manager.return_value.get_file_generator_by_tool_file_id.assert_not_called() + @patch.object(module, "verify_tool_file_signature", return_value=True) @patch.object(module, "ToolFileManager") def test_as_attachment( @@ -91,6 +126,10 @@ class TestToolFileApi: filename="doc.pdf", ) + mock_tool_file_manager.return_value.get_public_url_and_file_by_tool_file_id.return_value = ( + None, + tool_file, + ) mock_tool_file_manager.return_value.get_file_generator_by_tool_file_id.return_value = ( stream, tool_file, @@ -137,6 +176,10 @@ class TestToolFileApi: } ) + mock_tool_file_manager.return_value.get_public_url_and_file_by_tool_file_id.return_value = ( + None, + None, + ) mock_tool_file_manager.return_value.get_file_generator_by_tool_file_id.return_value = ( None, None, @@ -164,6 +207,10 @@ class TestToolFileApi: } ) + mock_tool_file_manager.return_value.get_public_url_and_file_by_tool_file_id.return_value = ( + None, + DummyToolFile(), + ) mock_tool_file_manager.return_value.get_file_generator_by_tool_file_id.side_effect = Exception("boom") api = module.ToolFileApi() diff --git a/api/tests/unit_tests/extensions/storage/test_aws_s3_storage.py b/api/tests/unit_tests/extensions/storage/test_aws_s3_storage.py new file mode 100644 index 0000000000..fa89d9ef0c --- /dev/null +++ b/api/tests/unit_tests/extensions/storage/test_aws_s3_storage.py @@ -0,0 +1,84 @@ +from unittest.mock import Mock, patch + +from botocore.exceptions import ClientError + +from extensions.storage.aws_s3_storage import AwsS3Storage + + +def _build_storage(public_base_url: str | None = None) -> AwsS3Storage: + with patch("extensions.storage.aws_s3_storage.dify_config", autospec=True) as mock_config: + mock_config.S3_BUCKET_NAME = "test-bucket" + mock_config.S3_PUBLIC_BASE_URL = public_base_url + mock_config.S3_USE_AWS_MANAGED_IAM = False + mock_config.S3_ACCESS_KEY = "ak" + mock_config.S3_SECRET_KEY = "sk" + mock_config.S3_ENDPOINT = "https://example.com" + mock_config.S3_REGION = "auto" + mock_config.S3_ADDRESS_STYLE = "auto" + + with patch("extensions.storage.aws_s3_storage.boto3") as mock_boto3: + client = Mock() + client.head_bucket.return_value = None + mock_boto3.client.return_value = client + mock_boto3.Session.return_value.client.return_value = client + return AwsS3Storage() + + +class TestAwsS3StoragePublicUrl: + def test_returns_none_when_public_base_url_unset(self): + storage = _build_storage(public_base_url=None) + assert storage.get_public_url("upload_files/tenant/abc.png") is None + + def test_returns_none_when_public_base_url_empty_string(self): + storage = _build_storage(public_base_url="") + assert storage.get_public_url("upload_files/tenant/abc.png") is None + + def test_composes_url_when_configured(self): + storage = _build_storage(public_base_url="https://cdn.example.com") + assert ( + storage.get_public_url("upload_files/tenant/abc.png") + == "https://cdn.example.com/upload_files/tenant/abc.png" + ) + + def test_strips_trailing_slash(self): + storage = _build_storage(public_base_url="https://cdn.example.com/") + assert ( + storage.get_public_url("upload_files/tenant/abc.png") + == "https://cdn.example.com/upload_files/tenant/abc.png" + ) + + def test_preserves_path_separators_in_key(self): + # Object key path separators must not be percent-encoded. + storage = _build_storage(public_base_url="https://cdn.example.com") + url = storage.get_public_url("a/b/c.txt") + assert url == "https://cdn.example.com/a/b/c.txt" + + def test_quotes_unsafe_characters_in_key(self): + storage = _build_storage(public_base_url="https://cdn.example.com") + url = storage.get_public_url("upload_files/has space.png") + assert url == "https://cdn.example.com/upload_files/has%20space.png" + + +class TestAwsS3StorageBucketCheck: + def test_init_handles_403_on_head_bucket(self): + # Regression: R2 / hardened buckets often return 403 on head_bucket; the + # constructor must swallow the error instead of crashing. + with patch("extensions.storage.aws_s3_storage.dify_config", autospec=True) as mock_config: + mock_config.S3_BUCKET_NAME = "test-bucket" + mock_config.S3_PUBLIC_BASE_URL = None + mock_config.S3_USE_AWS_MANAGED_IAM = False + mock_config.S3_ACCESS_KEY = "ak" + mock_config.S3_SECRET_KEY = "sk" + mock_config.S3_ENDPOINT = "https://example.com" + mock_config.S3_REGION = "auto" + mock_config.S3_ADDRESS_STYLE = "auto" + + with patch("extensions.storage.aws_s3_storage.boto3") as mock_boto3: + client = Mock() + client.head_bucket.side_effect = ClientError( + {"Error": {"Code": "403", "Message": "Forbidden"}}, "HeadBucket" + ) + mock_boto3.client.return_value = client + storage = AwsS3Storage() + assert storage.bucket_name == "test-bucket" + client.create_bucket.assert_not_called() diff --git a/api/tests/unit_tests/services/test_file_service.py b/api/tests/unit_tests/services/test_file_service.py index 8e1b22886b..94f5c25902 100644 --- a/api/tests/unit_tests/services/test_file_service.py +++ b/api/tests/unit_tests/services/test_file_service.py @@ -253,15 +253,39 @@ class TestFileService: patch("services.file_service.storage") as mock_storage, ): mock_verify.return_value = True + mock_storage.get_public_url.return_value = None mock_storage.load.return_value = iter([b"chunk1"]) # Execute - gen, mime = file_service.get_image_preview("file_id", "ts", "nonce", "sign") + public_url, gen, mime = file_service.get_image_preview("file_id", "ts", "nonce", "sign") # Assert + assert public_url is None assert list(gen) == [b"chunk1"] assert mime == "image/jpeg" + def test_get_image_preview_redirects_when_storage_has_public_url(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "jpg" + upload_file.mime_type = "image/jpeg" + upload_file.key = "upload_files/tenant/abc.jpg" + mock_db_session.scalar.return_value = upload_file + + with ( + patch("services.file_service.file_helpers.verify_image_signature") as mock_verify, + patch("services.file_service.storage") as mock_storage, + ): + mock_verify.return_value = True + mock_storage.get_public_url.return_value = "https://cdn.example.com/upload_files/tenant/abc.jpg" + + public_url, gen, mime = file_service.get_image_preview("file_id", "ts", "nonce", "sign") + + assert public_url == "https://cdn.example.com/upload_files/tenant/abc.jpg" + assert gen is None + assert mime == "image/jpeg" + mock_storage.load.assert_not_called() + def test_get_image_preview_invalid_sig(self, file_service): with patch("services.file_service.file_helpers.verify_image_signature") as mock_verify: mock_verify.return_value = False @@ -296,12 +320,33 @@ class TestFileService: patch("services.file_service.storage") as mock_storage, ): mock_verify.return_value = True + mock_storage.get_public_url.return_value = None mock_storage.load.return_value = iter([b"chunk"]) - gen, file = file_service.get_file_generator_by_file_id("file_id", "ts", "nonce", "sign") + public_url, gen, file = file_service.get_file_generator_by_file_id("file_id", "ts", "nonce", "sign") + assert public_url is None assert list(gen) == [b"chunk"] assert file == upload_file + def test_get_file_generator_by_file_id_redirects_when_storage_has_public_url(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.key = "upload_files/tenant/abc.bin" + mock_db_session.scalar.return_value = upload_file + + with ( + patch("services.file_service.file_helpers.verify_file_signature") as mock_verify, + patch("services.file_service.storage") as mock_storage, + ): + mock_verify.return_value = True + mock_storage.get_public_url.return_value = "https://cdn.example.com/upload_files/tenant/abc.bin" + + public_url, gen, file = file_service.get_file_generator_by_file_id("file_id", "ts", "nonce", "sign") + assert public_url == "https://cdn.example.com/upload_files/tenant/abc.bin" + assert gen is None + assert file == upload_file + mock_storage.load.assert_not_called() + def test_get_file_generator_by_file_id_invalid_sig(self, file_service): with patch("services.file_service.file_helpers.verify_file_signature") as mock_verify: mock_verify.return_value = False @@ -324,11 +369,29 @@ class TestFileService: mock_db_session.scalar.return_value = upload_file with patch("services.file_service.storage") as mock_storage: + mock_storage.get_public_url.return_value = None mock_storage.load.return_value = b"image content" - gen, mime = file_service.get_public_image_preview("file_id") + public_url, gen, mime = file_service.get_public_image_preview("file_id") + assert public_url is None assert gen == b"image content" assert mime == "image/png" + def test_get_public_image_preview_redirects_when_storage_has_public_url(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "png" + upload_file.mime_type = "image/png" + upload_file.key = "upload_files/tenant/logo.png" + mock_db_session.scalar.return_value = upload_file + + with patch("services.file_service.storage") as mock_storage: + mock_storage.get_public_url.return_value = "https://cdn.example.com/upload_files/tenant/logo.png" + public_url, gen, mime = file_service.get_public_image_preview("file_id") + assert public_url == "https://cdn.example.com/upload_files/tenant/logo.png" + assert gen is None + assert mime == "image/png" + mock_storage.load.assert_not_called() + def test_get_public_image_preview_not_found(self, file_service, mock_db_session): mock_db_session.scalar.return_value = None with pytest.raises(NotFound, match="File not found or signature is invalid"): diff --git a/docker/.env.example b/docker/.env.example index 29741474fa..89c9b59da3 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -483,6 +483,14 @@ S3_ADDRESS_STYLE=auto # Whether to use AWS managed IAM roles for authenticating with the S3 service. # If set to false, the access key and secret key must be provided. S3_USE_AWS_MANAGED_IAM=false +# Optional public base URL for objects in the bucket. When set, signed file +# previews are served by 302-redirecting to "/" so that bytes +# are delivered directly by the object store / CDN. Examples: +# Cloudflare R2 custom domain: https://cdn.example.com +# MinIO public endpoint: https://minio.example.com/your-bucket +# Aliyun OSS public domain: https://your-bucket.oss-cn-hangzhou.aliyuncs.com +# Leave empty to keep the default API-streamed behavior. +S3_PUBLIC_BASE_URL= # Workflow run and Conversation archive storage (S3-compatible) ARCHIVE_STORAGE_ENABLED=false diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 60ba510f44..7c1ea7f475 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -136,6 +136,7 @@ x-shared-env: &shared-api-worker-env S3_SECRET_KEY: ${S3_SECRET_KEY:-} S3_ADDRESS_STYLE: ${S3_ADDRESS_STYLE:-auto} S3_USE_AWS_MANAGED_IAM: ${S3_USE_AWS_MANAGED_IAM:-false} + S3_PUBLIC_BASE_URL: ${S3_PUBLIC_BASE_URL:-} ARCHIVE_STORAGE_ENABLED: ${ARCHIVE_STORAGE_ENABLED:-false} ARCHIVE_STORAGE_ENDPOINT: ${ARCHIVE_STORAGE_ENDPOINT:-} ARCHIVE_STORAGE_ARCHIVE_BUCKET: ${ARCHIVE_STORAGE_ARCHIVE_BUCKET:-}