mirror of
https://github.com/langgenius/dify.git
synced 2026-05-13 08:57:28 +08:00
Add an optional S3_PUBLIC_BASE_URL setting that, when configured, lets file controllers 302-redirect signed previews to the object store / CDN instead of streaming bytes through the Dify API. Works with any S3-compatible backend exposing a public domain (Cloudflare R2 custom domain, MinIO public endpoint, Aliyun OSS public domain, etc.) so that egress and request handling for images, attachments, tool outputs, and webapp logos no longer go through the API container. Signature verification is preserved: the API still validates the HMAC before issuing the redirect. When S3_PUBLIC_BASE_URL is unset the behavior is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
96 lines
3.6 KiB
Python
96 lines
3.6 KiB
Python
import logging
|
|
from collections.abc import Generator
|
|
from urllib.parse import quote
|
|
|
|
import boto3
|
|
from botocore.client import Config
|
|
from botocore.exceptions import ClientError
|
|
|
|
from configs import dify_config
|
|
from extensions.storage.base_storage import BaseStorage
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AwsS3Storage(BaseStorage):
|
|
"""Implementation for Amazon Web Services S3 storage."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.bucket_name = dify_config.S3_BUCKET_NAME
|
|
public_base_url = dify_config.S3_PUBLIC_BASE_URL
|
|
self.public_base_url = public_base_url.rstrip("/") if public_base_url else None
|
|
if dify_config.S3_USE_AWS_MANAGED_IAM:
|
|
logger.info("Using AWS managed IAM role for S3")
|
|
|
|
session = boto3.Session()
|
|
region_name = dify_config.S3_REGION
|
|
self.client = session.client(service_name="s3", region_name=region_name)
|
|
else:
|
|
logger.info("Using ak and sk for S3")
|
|
|
|
self.client = boto3.client(
|
|
"s3",
|
|
aws_secret_access_key=dify_config.S3_SECRET_KEY,
|
|
aws_access_key_id=dify_config.S3_ACCESS_KEY,
|
|
endpoint_url=dify_config.S3_ENDPOINT,
|
|
region_name=dify_config.S3_REGION,
|
|
config=Config(s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}),
|
|
)
|
|
# create bucket
|
|
try:
|
|
self.client.head_bucket(Bucket=self.bucket_name)
|
|
except ClientError as e:
|
|
# if bucket not exists, create it
|
|
if e.response.get("Error", {}).get("Code") == "404":
|
|
self.client.create_bucket(Bucket=self.bucket_name)
|
|
# if bucket is not accessible, pass, maybe the bucket is existing but not accessible
|
|
elif e.response.get("Error", {}).get("Code") == "403":
|
|
pass
|
|
else:
|
|
# other error, raise exception
|
|
raise
|
|
|
|
def save(self, filename, data):
|
|
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
|
|
|
|
def load_once(self, filename: str) -> bytes:
|
|
try:
|
|
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
|
|
except ClientError as ex:
|
|
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
|
|
raise FileNotFoundError("File not found")
|
|
else:
|
|
raise
|
|
return data
|
|
|
|
def load_stream(self, filename: str) -> Generator:
|
|
try:
|
|
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
|
yield from response["Body"].iter_chunks()
|
|
except ClientError as ex:
|
|
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
|
|
raise FileNotFoundError("file not found")
|
|
elif "reached max retries" in str(ex):
|
|
raise ValueError("please do not request the same file too frequently")
|
|
else:
|
|
raise
|
|
|
|
def download(self, filename, target_filepath):
|
|
self.client.download_file(self.bucket_name, filename, target_filepath)
|
|
|
|
def exists(self, filename):
|
|
try:
|
|
self.client.head_object(Bucket=self.bucket_name, Key=filename)
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
def delete(self, filename: str):
|
|
self.client.delete_object(Bucket=self.bucket_name, Key=filename)
|
|
|
|
def get_public_url(self, filename: str) -> str | None:
|
|
if not self.public_base_url:
|
|
return None
|
|
return f"{self.public_base_url}/{quote(filename, safe='/')}"
|