diff --git a/api/controllers/common/file_response.py b/api/controllers/common/file_response.py new file mode 100644 index 0000000000..ca8ea3d52e --- /dev/null +++ b/api/controllers/common/file_response.py @@ -0,0 +1,57 @@ +import os +from email.message import Message +from urllib.parse import quote + +from flask import Response + +HTML_MIME_TYPES = frozenset({"text/html", "application/xhtml+xml"}) +HTML_EXTENSIONS = frozenset({"html", "htm"}) + + +def _normalize_mime_type(mime_type: str | None) -> str: + if not mime_type: + return "" + message = Message() + message["Content-Type"] = mime_type + return message.get_content_type().strip().lower() + + +def _is_html_extension(extension: str | None) -> bool: + if not extension: + return False + return extension.lstrip(".").lower() in HTML_EXTENSIONS + + +def is_html_content(mime_type: str | None, filename: str | None, extension: str | None = None) -> bool: + normalized_mime_type = _normalize_mime_type(mime_type) + if normalized_mime_type in HTML_MIME_TYPES: + return True + + if _is_html_extension(extension): + return True + + if filename: + return _is_html_extension(os.path.splitext(filename)[1]) + + return False + + +def enforce_download_for_html( + response: Response, + *, + mime_type: str | None, + filename: str | None, + extension: str | None = None, +) -> bool: + if not is_html_content(mime_type, filename, extension): + return False + + if filename: + encoded_filename = quote(filename) + response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" + else: + response.headers["Content-Disposition"] = "attachment" + + response.headers["Content-Type"] = "application/octet-stream" + response.headers["X-Content-Type-Options"] = "nosniff" + return True diff --git a/api/controllers/files/image_preview.py b/api/controllers/files/image_preview.py index 64f47f426a..04db1c67cb 100644 --- a/api/controllers/files/image_preview.py +++ b/api/controllers/files/image_preview.py @@ -7,6 +7,7 @@ from werkzeug.exceptions import NotFound import services from controllers.common.errors import UnsupportedFileTypeError +from controllers.common.file_response import enforce_download_for_html from controllers.files import files_ns from extensions.ext_database import db from services.account_service import TenantService @@ -138,6 +139,13 @@ class FilePreviewApi(Resource): response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" response.headers["Content-Type"] = "application/octet-stream" + enforce_download_for_html( + response, + mime_type=upload_file.mime_type, + filename=upload_file.name, + extension=upload_file.extension, + ) + return response diff --git a/api/controllers/files/tool_files.py b/api/controllers/files/tool_files.py index c487a0a915..89aa472015 100644 --- a/api/controllers/files/tool_files.py +++ b/api/controllers/files/tool_files.py @@ -6,6 +6,7 @@ from pydantic import BaseModel, Field from werkzeug.exceptions import Forbidden, NotFound from controllers.common.errors import UnsupportedFileTypeError +from controllers.common.file_response import enforce_download_for_html from controllers.files import files_ns from core.tools.signature import verify_tool_file_signature from core.tools.tool_file_manager import ToolFileManager @@ -78,4 +79,11 @@ class ToolFileApi(Resource): encoded_filename = quote(tool_file.name) response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" + enforce_download_for_html( + response, + mime_type=tool_file.mimetype, + filename=tool_file.name, + extension=extension, + ) + return response diff --git a/api/controllers/service_api/app/file_preview.py b/api/controllers/service_api/app/file_preview.py index 60f422b88e..f853a124ef 100644 --- a/api/controllers/service_api/app/file_preview.py +++ b/api/controllers/service_api/app/file_preview.py @@ -5,6 +5,7 @@ from flask import Response, request from flask_restx import Resource from pydantic import BaseModel, Field +from controllers.common.file_response import enforce_download_for_html from controllers.common.schema import register_schema_model from controllers.service_api import service_api_ns from controllers.service_api.app.error import ( @@ -183,6 +184,13 @@ class FilePreviewApi(Resource): # Override content-type for downloads to force download response.headers["Content-Type"] = "application/octet-stream" + enforce_download_for_html( + response, + mime_type=upload_file.mime_type, + filename=upload_file.name, + extension=upload_file.extension, + ) + # Add caching headers for performance response.headers["Cache-Control"] = "public, max-age=3600" # Cache for 1 hour diff --git a/api/tests/unit_tests/controllers/common/test_file_response.py b/api/tests/unit_tests/controllers/common/test_file_response.py new file mode 100644 index 0000000000..2487c362bd --- /dev/null +++ b/api/tests/unit_tests/controllers/common/test_file_response.py @@ -0,0 +1,46 @@ +from flask import Response + +from controllers.common.file_response import enforce_download_for_html, is_html_content + + +class TestFileResponseHelpers: + def test_is_html_content_detects_mime_type(self): + mime_type = "text/html; charset=UTF-8" + + result = is_html_content(mime_type, filename="file.txt", extension="txt") + + assert result is True + + def test_is_html_content_detects_extension(self): + result = is_html_content("text/plain", filename="report.html", extension=None) + + assert result is True + + def test_enforce_download_for_html_sets_headers(self): + response = Response("payload", mimetype="text/html") + + updated = enforce_download_for_html( + response, + mime_type="text/html", + filename="unsafe.html", + extension="html", + ) + + assert updated is True + assert "attachment" in response.headers["Content-Disposition"] + assert response.headers["Content-Type"] == "application/octet-stream" + assert response.headers["X-Content-Type-Options"] == "nosniff" + + def test_enforce_download_for_html_no_change_for_non_html(self): + response = Response("payload", mimetype="text/plain") + + updated = enforce_download_for_html( + response, + mime_type="text/plain", + filename="notes.txt", + extension="txt", + ) + + assert updated is False + assert "Content-Disposition" not in response.headers + assert "X-Content-Type-Options" not in response.headers diff --git a/api/tests/unit_tests/controllers/service_api/app/test_file_preview.py b/api/tests/unit_tests/controllers/service_api/app/test_file_preview.py index acff191c79..1bdcd0f1a3 100644 --- a/api/tests/unit_tests/controllers/service_api/app/test_file_preview.py +++ b/api/tests/unit_tests/controllers/service_api/app/test_file_preview.py @@ -41,6 +41,7 @@ class TestFilePreviewApi: upload_file = Mock(spec=UploadFile) upload_file.id = str(uuid.uuid4()) upload_file.name = "test_file.jpg" + upload_file.extension = "jpg" upload_file.mime_type = "image/jpeg" upload_file.size = 1024 upload_file.key = "storage/key/test_file.jpg" @@ -210,6 +211,19 @@ class TestFilePreviewApi: assert mock_upload_file.name in response.headers["Content-Disposition"] assert response.headers["Content-Type"] == "application/octet-stream" + def test_build_file_response_html_forces_attachment(self, file_preview_api, mock_upload_file): + """Test HTML files are forced to download""" + mock_generator = Mock() + mock_upload_file.mime_type = "text/html" + mock_upload_file.name = "unsafe.html" + mock_upload_file.extension = "html" + + response = file_preview_api._build_file_response(mock_generator, mock_upload_file, False) + + assert "attachment" in response.headers["Content-Disposition"] + assert response.headers["Content-Type"] == "application/octet-stream" + assert response.headers["X-Content-Type-Options"] == "nosniff" + def test_build_file_response_audio_video(self, file_preview_api, mock_upload_file): """Test file response building for audio/video files""" mock_generator = Mock()