From 2aa9c69ac4145a81e61be8cb28fa605ab98c5786 Mon Sep 17 00:00:00 2001 From: L1nSn0w Date: Thu, 7 May 2026 18:19:42 +0800 Subject: [PATCH] fix(api): accept resolved file types in custom bucket on history replay A Chatflow file uploaded into the CUSTOM type slot is coerced to its detected type by _resolve_file_type (PNG -> IMAGE), and MessageFile.type persists that resolved type. On history replay, build_from_message_file rebuilds mapping["type"] from MessageFile.type, so a file that passed round 1 (mapping["type"]=="custom") was rejected on round 2 (mapping["type"]=="image") even though the workflow config was unchanged. - Refactor is_file_valid_with_config with bucket semantics: CUSTOM acts as a fallback bucket gated by allowed_file_extensions, compared case- and dot-insensitively. This also fixes a parallel mismatch where a user whitelist of [".PNG", "png", "JPG", ...] failed to match the upload-side ".png" (always lowercase with leading dot). - Skip re-validation when rehydrating files from conversation history in TokenBufferMemory and BaseAgentRunner; history files were validated at upload time, mirroring build_file_from_stored_mapping. --- api/core/agent/base_agent_runner.py | 3 +- api/core/memory/token_buffer_memory.py | 5 +- api/factories/file_factory/validation.py | 38 ++++-- .../factories/test_file_validation.py | 118 ++++++++++++++++++ 4 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 api/tests/unit_tests/factories/test_file_validation.py diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index c22102c2ba..e9bb82d147 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -529,10 +529,11 @@ class BaseAgentRunner(AppRunner): image_detail_config = file_extra_config.image_config.detail if file_extra_config.image_config else None image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW + # History files were validated on upload; replaying does not re-validate. file_objs = file_factory.build_from_message_files( message_files=files, tenant_id=self.tenant_id, - config=file_extra_config, + config=None, access_controller=_file_access_controller, ) if not file_objs: diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py index d840ee213c..4786b00630 100644 --- a/api/core/memory/token_buffer_memory.py +++ b/api/core/memory/token_buffer_memory.py @@ -86,12 +86,13 @@ class TokenBufferMemory: detail = ImagePromptMessageContent.DETAIL.HIGH if file_extra_config and app_record: - # Build files directly without filtering by belongs_to + # History files were validated on upload; replaying does not re-validate. + # See models/utils/file_input_compat.py for the same pattern. file_objs = [ file_factory.build_from_message_file( message_file=message_file, tenant_id=app_record.tenant_id, - config=file_extra_config, + config=None, access_controller=_file_access_controller, ) for message_file in message_files diff --git a/api/factories/file_factory/validation.py b/api/factories/file_factory/validation.py index 4c4f6150e4..67457e96ca 100644 --- a/api/factories/file_factory/validation.py +++ b/api/factories/file_factory/validation.py @@ -2,9 +2,22 @@ from __future__ import annotations +from collections.abc import Iterable + from graphon.file import FileTransferMethod, FileType, FileUploadConfig +def _normalize_extension(extension: str) -> str: + if not extension: + return "" + s = extension.strip().lower() + return s if s.startswith(".") else "." + s + + +def _extension_matches(extension: str, whitelist: Iterable[str]) -> bool: + return _normalize_extension(extension) in {_normalize_extension(e) for e in whitelist} + + def is_file_valid_with_config( *, input_file_type: str, @@ -12,22 +25,27 @@ def is_file_valid_with_config( file_transfer_method: FileTransferMethod, config: FileUploadConfig, ) -> bool: - # FIXME(QIN2DIM): Always allow tool files (files generated by the assistant/model) - # These are internally generated and should bypass user upload restrictions + """Return whether the file is allowed by the upload config. + + ``allowed_file_types`` lists the buckets a file may fall into; ``CUSTOM`` is + a fallback bucket gated by ``allowed_file_extensions`` (case- and + dot-insensitive). Tool-generated files bypass user-facing config. + """ if file_transfer_method == FileTransferMethod.TOOL_FILE: return True - if ( - config.allowed_file_types - and input_file_type not in config.allowed_file_types - and input_file_type != FileType.CUSTOM - ): + allowed_types = config.allowed_file_types or [] + custom_allowed = FileType.CUSTOM in allowed_types + type_allowed = not allowed_types or input_file_type in allowed_types + + if not type_allowed and not custom_allowed: return False + in_custom_bucket = input_file_type == FileType.CUSTOM or not type_allowed if ( - input_file_type == FileType.CUSTOM - and config.allowed_file_extensions is not None - and file_extension not in config.allowed_file_extensions + in_custom_bucket + and config.allowed_file_extensions + and not _extension_matches(file_extension, config.allowed_file_extensions) ): return False diff --git a/api/tests/unit_tests/factories/test_file_validation.py b/api/tests/unit_tests/factories/test_file_validation.py new file mode 100644 index 0000000000..81c897f9c5 --- /dev/null +++ b/api/tests/unit_tests/factories/test_file_validation.py @@ -0,0 +1,118 @@ +"""Unit tests for is_file_valid_with_config.""" + +from __future__ import annotations + +import pytest + +from factories.file_factory.validation import is_file_valid_with_config +from graphon.file import FileTransferMethod, FileType, FileUploadConfig + + +def _validate( + *, + input_file_type: str, + file_extension: str = ".png", + file_transfer_method: FileTransferMethod = FileTransferMethod.LOCAL_FILE, + config: FileUploadConfig, +) -> bool: + return is_file_valid_with_config( + input_file_type=input_file_type, + file_extension=file_extension, + file_transfer_method=file_transfer_method, + config=config, + ) + + +@pytest.mark.parametrize( + ("input_file_type", "file_extension", "allowed_file_types", "allowed_file_extensions", "expected"), + [ + # round-1 happy path: literal "custom" mapping, ext whitelisted + ("custom", ".png", [FileType.CUSTOM], [".png"], True), + # round-2 replay: MessageFile.type is the resolved type, but config still allows CUSTOM + ("image", ".png", [FileType.CUSTOM], [".png"], True), + ("document", ".pdf", [FileType.CUSTOM], [".pdf"], True), + # mixed bucket [IMAGE, CUSTOM]: document falls into CUSTOM bucket via extension + ("document", ".pdf", [FileType.IMAGE, FileType.CUSTOM], [".pdf"], True), + ("document", ".exe", [FileType.IMAGE, FileType.CUSTOM], [".pdf"], False), + ("image", ".jpg", [FileType.IMAGE], [], True), + ("video", ".mp4", [FileType.IMAGE, FileType.DOCUMENT], [], False), + ("custom", ".exe", [FileType.CUSTOM], [".png"], False), + # empty allowed_file_types == no type restriction + ("video", ".mp4", [], [], True), + ], +) +def test_bucket_semantics( + input_file_type, file_extension, allowed_file_types, allowed_file_extensions, expected +): + config = FileUploadConfig( + allowed_file_types=allowed_file_types, + allowed_file_extensions=allowed_file_extensions, + ) + assert _validate( + input_file_type=input_file_type, file_extension=file_extension, config=config + ) is expected + + +@pytest.mark.parametrize("whitelist_entry", [".png", ".PNG", "png", "PNG", " .Png ", "PnG"]) +def test_extension_match_is_case_and_dot_insensitive(whitelist_entry): + config = FileUploadConfig( + allowed_file_types=[FileType.CUSTOM], + allowed_file_extensions=[whitelist_entry], + ) + assert _validate(input_file_type="custom", file_extension=".png", config=config) is True + + +def test_extension_mismatch_still_rejected_after_normalization(): + config = FileUploadConfig( + allowed_file_types=[FileType.CUSTOM], + allowed_file_extensions=[".png", ".jpg"], + ) + assert _validate(input_file_type="custom", file_extension=".pdf", config=config) is False + + +def test_mixed_case_whitelist_replicating_real_user_config(): + config = FileUploadConfig( + allowed_file_types=[FileType.CUSTOM], + allowed_file_extensions=[".PNG", "png", "JPG", ".WEBP", "SVG", "GIF"], + ) + for ext in (".png", ".jpg", ".webp", ".svg", ".gif"): + assert _validate(input_file_type="custom", file_extension=ext, config=config) is True + + +def test_tool_file_always_passes(): + config = FileUploadConfig(allowed_file_types=[FileType.CUSTOM], allowed_file_extensions=[".pdf"]) + assert _validate( + input_file_type="image", + file_extension=".png", + file_transfer_method=FileTransferMethod.TOOL_FILE, + config=config, + ) is True + + +def test_transfer_method_gate_for_non_image(): + config = FileUploadConfig( + allowed_file_types=[FileType.DOCUMENT], + allowed_file_upload_methods=[FileTransferMethod.LOCAL_FILE], + ) + assert _validate( + input_file_type="document", + file_extension=".pdf", + file_transfer_method=FileTransferMethod.LOCAL_FILE, + config=config, + ) is True + assert _validate( + input_file_type="document", + file_extension=".pdf", + file_transfer_method=FileTransferMethod.REMOTE_URL, + config=config, + ) is False + + +def test_history_replay_matches_round_1_outcome_under_unchanged_config(): + """A file that passes round 1 must pass history replay when config is unchanged.""" + config = FileUploadConfig( + allowed_file_types=[FileType.CUSTOM], + allowed_file_extensions=[".png"], + ) + assert _validate(input_file_type="custom", file_extension=".png", config=config) is True + assert _validate(input_file_type="image", file_extension=".png", config=config) is True