fix(api): accept resolved file types in custom bucket on history replay

A Chatflow file uploaded into the CUSTOM type slot is coerced to its
detected type by _resolve_file_type (PNG -> IMAGE), and MessageFile.type
persists that resolved type. On history replay, build_from_message_file
rebuilds mapping["type"] from MessageFile.type, so a file that passed
round 1 (mapping["type"]=="custom") was rejected on round 2
(mapping["type"]=="image") even though the workflow config was unchanged.

- Refactor is_file_valid_with_config with bucket semantics: CUSTOM acts
  as a fallback bucket gated by allowed_file_extensions, compared case-
  and dot-insensitively. This also fixes a parallel mismatch where a
  user whitelist of [".PNG", "png", "JPG", ...] failed to match the
  upload-side ".png" (always lowercase with leading dot).
- Skip re-validation when rehydrating files from conversation history in
  TokenBufferMemory and BaseAgentRunner; history files were validated at
  upload time, mirroring build_file_from_stored_mapping.
This commit is contained in:
L1nSn0w 2026-05-07 18:19:42 +08:00
parent 140ad6ba4e
commit 2aa9c69ac4
4 changed files with 151 additions and 13 deletions

View File

@ -529,10 +529,11 @@ class BaseAgentRunner(AppRunner):
image_detail_config = file_extra_config.image_config.detail if file_extra_config.image_config else None
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
# History files were validated on upload; replaying does not re-validate.
file_objs = file_factory.build_from_message_files(
message_files=files,
tenant_id=self.tenant_id,
config=file_extra_config,
config=None,
access_controller=_file_access_controller,
)
if not file_objs:

View File

@ -86,12 +86,13 @@ class TokenBufferMemory:
detail = ImagePromptMessageContent.DETAIL.HIGH
if file_extra_config and app_record:
# Build files directly without filtering by belongs_to
# History files were validated on upload; replaying does not re-validate.
# See models/utils/file_input_compat.py for the same pattern.
file_objs = [
file_factory.build_from_message_file(
message_file=message_file,
tenant_id=app_record.tenant_id,
config=file_extra_config,
config=None,
access_controller=_file_access_controller,
)
for message_file in message_files

View File

@ -2,9 +2,22 @@
from __future__ import annotations
from collections.abc import Iterable
from graphon.file import FileTransferMethod, FileType, FileUploadConfig
def _normalize_extension(extension: str) -> str:
if not extension:
return ""
s = extension.strip().lower()
return s if s.startswith(".") else "." + s
def _extension_matches(extension: str, whitelist: Iterable[str]) -> bool:
return _normalize_extension(extension) in {_normalize_extension(e) for e in whitelist}
def is_file_valid_with_config(
*,
input_file_type: str,
@ -12,22 +25,27 @@ def is_file_valid_with_config(
file_transfer_method: FileTransferMethod,
config: FileUploadConfig,
) -> bool:
# FIXME(QIN2DIM): Always allow tool files (files generated by the assistant/model)
# These are internally generated and should bypass user upload restrictions
"""Return whether the file is allowed by the upload config.
``allowed_file_types`` lists the buckets a file may fall into; ``CUSTOM`` is
a fallback bucket gated by ``allowed_file_extensions`` (case- and
dot-insensitive). Tool-generated files bypass user-facing config.
"""
if file_transfer_method == FileTransferMethod.TOOL_FILE:
return True
if (
config.allowed_file_types
and input_file_type not in config.allowed_file_types
and input_file_type != FileType.CUSTOM
):
allowed_types = config.allowed_file_types or []
custom_allowed = FileType.CUSTOM in allowed_types
type_allowed = not allowed_types or input_file_type in allowed_types
if not type_allowed and not custom_allowed:
return False
in_custom_bucket = input_file_type == FileType.CUSTOM or not type_allowed
if (
input_file_type == FileType.CUSTOM
and config.allowed_file_extensions is not None
and file_extension not in config.allowed_file_extensions
in_custom_bucket
and config.allowed_file_extensions
and not _extension_matches(file_extension, config.allowed_file_extensions)
):
return False

View File

@ -0,0 +1,118 @@
"""Unit tests for is_file_valid_with_config."""
from __future__ import annotations
import pytest
from factories.file_factory.validation import is_file_valid_with_config
from graphon.file import FileTransferMethod, FileType, FileUploadConfig
def _validate(
*,
input_file_type: str,
file_extension: str = ".png",
file_transfer_method: FileTransferMethod = FileTransferMethod.LOCAL_FILE,
config: FileUploadConfig,
) -> bool:
return is_file_valid_with_config(
input_file_type=input_file_type,
file_extension=file_extension,
file_transfer_method=file_transfer_method,
config=config,
)
@pytest.mark.parametrize(
("input_file_type", "file_extension", "allowed_file_types", "allowed_file_extensions", "expected"),
[
# round-1 happy path: literal "custom" mapping, ext whitelisted
("custom", ".png", [FileType.CUSTOM], [".png"], True),
# round-2 replay: MessageFile.type is the resolved type, but config still allows CUSTOM
("image", ".png", [FileType.CUSTOM], [".png"], True),
("document", ".pdf", [FileType.CUSTOM], [".pdf"], True),
# mixed bucket [IMAGE, CUSTOM]: document falls into CUSTOM bucket via extension
("document", ".pdf", [FileType.IMAGE, FileType.CUSTOM], [".pdf"], True),
("document", ".exe", [FileType.IMAGE, FileType.CUSTOM], [".pdf"], False),
("image", ".jpg", [FileType.IMAGE], [], True),
("video", ".mp4", [FileType.IMAGE, FileType.DOCUMENT], [], False),
("custom", ".exe", [FileType.CUSTOM], [".png"], False),
# empty allowed_file_types == no type restriction
("video", ".mp4", [], [], True),
],
)
def test_bucket_semantics(
input_file_type, file_extension, allowed_file_types, allowed_file_extensions, expected
):
config = FileUploadConfig(
allowed_file_types=allowed_file_types,
allowed_file_extensions=allowed_file_extensions,
)
assert _validate(
input_file_type=input_file_type, file_extension=file_extension, config=config
) is expected
@pytest.mark.parametrize("whitelist_entry", [".png", ".PNG", "png", "PNG", " .Png ", "PnG"])
def test_extension_match_is_case_and_dot_insensitive(whitelist_entry):
config = FileUploadConfig(
allowed_file_types=[FileType.CUSTOM],
allowed_file_extensions=[whitelist_entry],
)
assert _validate(input_file_type="custom", file_extension=".png", config=config) is True
def test_extension_mismatch_still_rejected_after_normalization():
config = FileUploadConfig(
allowed_file_types=[FileType.CUSTOM],
allowed_file_extensions=[".png", ".jpg"],
)
assert _validate(input_file_type="custom", file_extension=".pdf", config=config) is False
def test_mixed_case_whitelist_replicating_real_user_config():
config = FileUploadConfig(
allowed_file_types=[FileType.CUSTOM],
allowed_file_extensions=[".PNG", "png", "JPG", ".WEBP", "SVG", "GIF"],
)
for ext in (".png", ".jpg", ".webp", ".svg", ".gif"):
assert _validate(input_file_type="custom", file_extension=ext, config=config) is True
def test_tool_file_always_passes():
config = FileUploadConfig(allowed_file_types=[FileType.CUSTOM], allowed_file_extensions=[".pdf"])
assert _validate(
input_file_type="image",
file_extension=".png",
file_transfer_method=FileTransferMethod.TOOL_FILE,
config=config,
) is True
def test_transfer_method_gate_for_non_image():
config = FileUploadConfig(
allowed_file_types=[FileType.DOCUMENT],
allowed_file_upload_methods=[FileTransferMethod.LOCAL_FILE],
)
assert _validate(
input_file_type="document",
file_extension=".pdf",
file_transfer_method=FileTransferMethod.LOCAL_FILE,
config=config,
) is True
assert _validate(
input_file_type="document",
file_extension=".pdf",
file_transfer_method=FileTransferMethod.REMOTE_URL,
config=config,
) is False
def test_history_replay_matches_round_1_outcome_under_unchanged_config():
"""A file that passes round 1 must pass history replay when config is unchanged."""
config = FileUploadConfig(
allowed_file_types=[FileType.CUSTOM],
allowed_file_extensions=[".png"],
)
assert _validate(input_file_type="custom", file_extension=".png", config=config) is True
assert _validate(input_file_type="image", file_extension=".png", config=config) is True