dify/api/extensions/storage/aws_s3_storage.py
Luyu Zhang acd6942d21 feat(storage): redirect signed file previews to S3 public base URL
Add an optional S3_PUBLIC_BASE_URL setting that, when configured, lets
file controllers 302-redirect signed previews to the object store / CDN
instead of streaming bytes through the Dify API. Works with any
S3-compatible backend exposing a public domain (Cloudflare R2 custom
domain, MinIO public endpoint, Aliyun OSS public domain, etc.) so that
egress and request handling for images, attachments, tool outputs, and
webapp logos no longer go through the API container.

Signature verification is preserved: the API still validates the HMAC
before issuing the redirect. When S3_PUBLIC_BASE_URL is unset the
behavior is unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 17:12:00 -07:00

96 lines
3.6 KiB
Python

import logging
from collections.abc import Generator
from urllib.parse import quote
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
logger = logging.getLogger(__name__)
class AwsS3Storage(BaseStorage):
"""Implementation for Amazon Web Services S3 storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.S3_BUCKET_NAME
public_base_url = dify_config.S3_PUBLIC_BASE_URL
self.public_base_url = public_base_url.rstrip("/") if public_base_url else None
if dify_config.S3_USE_AWS_MANAGED_IAM:
logger.info("Using AWS managed IAM role for S3")
session = boto3.Session()
region_name = dify_config.S3_REGION
self.client = session.client(service_name="s3", region_name=region_name)
else:
logger.info("Using ak and sk for S3")
self.client = boto3.client(
"s3",
aws_secret_access_key=dify_config.S3_SECRET_KEY,
aws_access_key_id=dify_config.S3_ACCESS_KEY,
endpoint_url=dify_config.S3_ENDPOINT,
region_name=dify_config.S3_REGION,
config=Config(s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}),
)
# create bucket
try:
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
# if bucket not exists, create it
if e.response.get("Error", {}).get("Code") == "404":
self.client.create_bucket(Bucket=self.bucket_name)
# if bucket is not accessible, pass, maybe the bucket is existing but not accessible
elif e.response.get("Error", {}).get("Code") == "403":
pass
else:
# other error, raise exception
raise
def save(self, filename, data):
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
def load_once(self, filename: str) -> bytes:
try:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
except ClientError as ex:
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
return data
def load_stream(self, filename: str) -> Generator:
try:
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("file not found")
elif "reached max retries" in str(ex):
raise ValueError("please do not request the same file too frequently")
else:
raise
def download(self, filename, target_filepath):
self.client.download_file(self.bucket_name, filename, target_filepath)
def exists(self, filename):
try:
self.client.head_object(Bucket=self.bucket_name, Key=filename)
return True
except:
return False
def delete(self, filename: str):
self.client.delete_object(Bucket=self.bucket_name, Key=filename)
def get_public_url(self, filename: str) -> str | None:
if not self.public_base_url:
return None
return f"{self.public_base_url}/{quote(filename, safe='/')}"