fix: recognize attached files in agent node query parameter

Closes #28140
This commit is contained in:
yujiosaka 2025-12-25 23:30:18 +09:00
parent e6e439f54c
commit 2d525e0ac0
29 changed files with 386 additions and 9 deletions

View File

@ -9,10 +9,11 @@ from sqlalchemy.orm import Session
from core.agent.entities import AgentToolEntity
from core.agent.plugin_entities import AgentStrategyParameter
from core.file import File, FileTransferMethod
from core.file import File, FileTransferMethod, FileType, file_manager
from core.memory.token_buffer_memory import TokenBufferMemory
from core.model_manager import ModelInstance, ModelManager
from core.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata
from core.model_runtime.entities.message_entities import TextPromptMessageContent
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
from core.model_runtime.utils.encoders import jsonable_encoder
from core.provider_manager import ProviderManager
@ -24,7 +25,7 @@ from core.tools.entities.tool_entities import (
)
from core.tools.tool_manager import ToolManager
from core.tools.utils.message_transformer import ToolFileMessageTransformer
from core.variables.segments import ArrayFileSegment, StringSegment
from core.variables.segments import ArrayFileSegment, FileSegment, StringSegment
from core.workflow.enums import (
NodeType,
SystemVariableKey,
@ -160,6 +161,22 @@ class AgentNode(Node[AgentNodeData]):
)
)
def _fetch_files_from_variable_selector(
self,
*,
variable_pool: VariablePool,
selector: Sequence[str],
) -> Sequence[File]:
"""Fetch files from a variable selector."""
variable = variable_pool.get(list(selector))
if variable is None:
return []
elif isinstance(variable, FileSegment):
return [variable.value]
elif isinstance(variable, ArrayFileSegment):
return variable.value
return []
def _generate_agent_parameters(
self,
*,
@ -206,11 +223,61 @@ class AgentNode(Node[AgentNodeData]):
except TypeError:
parameter_value = str(agent_input.value)
segment_group = variable_pool.convert_template(parameter_value)
parameter_value = segment_group.log if for_log else segment_group.text
if parameter_name in ("query", "instruction") and not for_log:
contents: list[dict[str, Any]] = []
has_file = False
vision_detail = (
node_data.vision.configs.detail if node_data.vision.enabled else None
)
for segment in segment_group.value:
if isinstance(segment, ArrayFileSegment):
for file in segment.value:
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
file_content = file_manager.to_prompt_message_content(
file, image_detail_config=vision_detail
)
contents.append(file_content.model_dump())
has_file = True
elif isinstance(segment, FileSegment):
file = segment.value
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
file_content = file_manager.to_prompt_message_content(
file, image_detail_config=vision_detail
)
contents.append(file_content.model_dump())
has_file = True
else:
text = segment.text
if text:
contents.append(TextPromptMessageContent(data=text).model_dump())
if parameter_name == "query":
if node_data.vision.enabled and node_data.vision.configs.variable_selector:
vision_files = self._fetch_files_from_variable_selector(
variable_pool=variable_pool,
selector=node_data.vision.configs.variable_selector,
)
for file in vision_files:
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
file_content = file_manager.to_prompt_message_content(
file, image_detail_config=vision_detail
)
contents.append(file_content.model_dump())
has_file = True
if has_file:
parameter_value = contents
else:
parameter_value = segment_group.text
else:
parameter_value = segment_group.log if for_log else segment_group.text
# variable_pool.convert_template returns a string,
# so we need to convert it back to a dictionary
try:
if not isinstance(agent_input.value, str):
if not isinstance(agent_input.value, str) and isinstance(parameter_value, str):
parameter_value = json.loads(parameter_value)
except json.JSONDecodeError:
parameter_value = parameter_value

View File

@ -1,18 +1,38 @@
from collections.abc import Sequence
from enum import IntEnum, StrEnum, auto
from typing import Any, Literal, Union
from pydantic import BaseModel
from pydantic import BaseModel, Field, field_validator
from core.model_runtime.entities import ImagePromptMessageContent
from core.prompt.entities.advanced_prompt_entities import MemoryConfig
from core.tools.entities.tool_entities import ToolSelector
from core.workflow.nodes.base.entities import BaseNodeData
class VisionConfigOptions(BaseModel):
variable_selector: Sequence[str] = Field(default_factory=lambda: ["sys", "files"])
detail: ImagePromptMessageContent.DETAIL = ImagePromptMessageContent.DETAIL.HIGH
class VisionConfig(BaseModel):
enabled: bool = False
configs: VisionConfigOptions = Field(default_factory=VisionConfigOptions)
@field_validator("configs", mode="before")
@classmethod
def convert_none_configs(cls, v: Any):
if v is None:
return VisionConfigOptions()
return v
class AgentNodeData(BaseNodeData):
agent_strategy_provider_name: str # redundancy
agent_strategy_name: str
agent_strategy_label: str # redundancy
memory: MemoryConfig | None = None
vision: VisionConfig = Field(default_factory=VisionConfig)
# The version of the tool parameter.
# If this value is None, it indicates this is a previous version
# and requires using the legacy parameter parsing rules.

View File

@ -0,0 +1,220 @@
"""Unit tests for AgentNode file handling."""
from unittest.mock import patch
import pytest
from core.file import File, FileTransferMethod, FileType
from core.model_runtime.entities.message_entities import (
ImagePromptMessageContent,
TextPromptMessageContent,
)
from core.variables import ArrayFileSegment, FileSegment, StringSegment
from core.variables.segment_group import SegmentGroup
class TestAgentNodeFileHandling:
"""Tests for file handling in query, instruction, and vision variable selector."""
@pytest.fixture
def mock_file(self) -> File:
"""Create a mock file."""
return File(
id="test-file-id",
tenant_id="test-tenant",
type=FileType.IMAGE,
transfer_method=FileTransferMethod.LOCAL_FILE,
related_id="test-related-id",
filename="test.png",
extension=".png",
mime_type="image/png",
size=1024,
)
@pytest.fixture
def mock_custom_file(self) -> File:
"""Create a mock custom (unsupported) file."""
return File(
id="test-custom-id",
tenant_id="test-tenant",
type=FileType.CUSTOM,
transfer_method=FileTransferMethod.LOCAL_FILE,
related_id="test-related-id",
filename="test.zip",
extension=".zip",
mime_type="application/zip",
size=4096,
)
def test_query_with_text_only_returns_string(self):
"""When query contains only text, it should return a string."""
segment_group = SegmentGroup(value=[StringSegment(value="Hello, world!")])
contents: list[dict] = []
has_file = False
for segment in segment_group.value:
if not isinstance(segment, (ArrayFileSegment, FileSegment)):
if segment.text:
contents.append(TextPromptMessageContent(data=segment.text).model_dump())
result = contents if has_file else segment_group.text
assert result == "Hello, world!"
assert isinstance(result, str)
def test_query_with_file_returns_list(self, mock_file):
"""When query contains a file, it should return a list."""
segment_group = SegmentGroup(value=[FileSegment(value=mock_file)])
with patch("core.file.file_manager.to_prompt_message_content") as mock_to_content:
mock_to_content.return_value = ImagePromptMessageContent(
url="http://example.com/test.png", mime_type="image/png", format="png"
)
contents: list[dict] = []
has_file = False
for segment in segment_group.value:
if isinstance(segment, FileSegment):
file = segment.value
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
from core.file import file_manager
contents.append(file_manager.to_prompt_message_content(file).model_dump())
has_file = True
result = contents if has_file else segment_group.text
assert isinstance(result, list)
assert len(result) == 1
assert result[0]["type"] == "image"
def test_query_with_text_and_file_returns_list_with_both(self, mock_file):
"""When query contains both text and file, it should return a list with both."""
segment_group = SegmentGroup(value=[
StringSegment(value="Describe this: "),
FileSegment(value=mock_file),
])
with patch("core.file.file_manager.to_prompt_message_content") as mock_to_content:
mock_to_content.return_value = ImagePromptMessageContent(
url="http://example.com/test.png", mime_type="image/png", format="png"
)
contents: list[dict] = []
has_file = False
for segment in segment_group.value:
if isinstance(segment, FileSegment):
file = segment.value
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
from core.file import file_manager
contents.append(file_manager.to_prompt_message_content(file).model_dump())
has_file = True
elif segment.text:
contents.append(TextPromptMessageContent(data=segment.text).model_dump())
result = contents if has_file else segment_group.text
assert isinstance(result, list)
assert len(result) == 2
assert result[0]["type"] == "text"
assert result[1]["type"] == "image"
def test_custom_file_type_is_ignored(self, mock_custom_file):
"""Custom file types should be ignored."""
segment_group = SegmentGroup(value=[FileSegment(value=mock_custom_file)])
has_file = False
for segment in segment_group.value:
if isinstance(segment, FileSegment):
if segment.value.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
has_file = True
assert has_file is False
def test_instruction_with_file_returns_list(self, mock_file):
"""When instruction contains a file, it should return a list (same as query)."""
segment_group = SegmentGroup(value=[
StringSegment(value="You are a helpful assistant. "),
FileSegment(value=mock_file),
])
with patch("core.file.file_manager.to_prompt_message_content") as mock_to_content:
mock_to_content.return_value = ImagePromptMessageContent(
url="http://example.com/test.png", mime_type="image/png", format="png"
)
contents: list[dict] = []
has_file = False
for segment in segment_group.value:
if isinstance(segment, FileSegment):
file = segment.value
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
from core.file import file_manager
contents.append(file_manager.to_prompt_message_content(file).model_dump())
has_file = True
elif segment.text:
contents.append(TextPromptMessageContent(data=segment.text).model_dump())
result = contents if has_file else segment_group.text
assert isinstance(result, list)
assert len(result) == 2
assert result[0]["type"] == "text"
assert result[1]["type"] == "image"
def test_vision_variable_selector_files_added_to_query(self, mock_file):
"""Vision variable selector files should be added to query only."""
vision_files = [mock_file]
with patch("core.file.file_manager.to_prompt_message_content") as mock_to_content:
mock_to_content.return_value = ImagePromptMessageContent(
url="http://example.com/test.png", mime_type="image/png", format="png"
)
contents: list[dict] = []
has_file = False
for file in vision_files:
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
from core.file import file_manager
contents.append(file_manager.to_prompt_message_content(file).model_dump())
has_file = True
assert has_file is True
assert len(contents) == 1
assert contents[0]["type"] == "image"
def test_query_with_text_and_vision_files(self, mock_file):
"""Query text combined with vision variable selector files."""
segment_group = SegmentGroup(value=[StringSegment(value="Describe this image")])
vision_files = [mock_file]
with patch("core.file.file_manager.to_prompt_message_content") as mock_to_content:
mock_to_content.return_value = ImagePromptMessageContent(
url="http://example.com/test.png", mime_type="image/png", format="png"
)
contents: list[dict] = []
has_file = False
for segment in segment_group.value:
if segment.text:
contents.append(TextPromptMessageContent(data=segment.text).model_dump())
for file in vision_files:
if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}:
from core.file import file_manager
contents.append(file_manager.to_prompt_message_content(file).model_dump())
has_file = True
result = contents if has_file else segment_group.text
assert isinstance(result, list)
assert len(result) == 2
assert result[0]["type"] == "text"
assert result[0]["data"] == "Describe this image"
assert result[1]["type"] == "image"

View File

@ -6,8 +6,10 @@ import type { StrategyParamItem } from '@/app/components/plugins/types'
import { memo } from 'react'
import { useTranslation } from 'react-i18next'
import { toType } from '@/app/components/tools/utils/to-form-schema'
import { Resolution } from '@/types/app'
import { useStore } from '../../store'
import { AgentStrategy } from '../_base/components/agent-strategy'
import ConfigVision from '../_base/components/config-vision'
import Field from '../_base/components/field'
import MemoryConfig from '../_base/components/memory-config'
import OutputVars, { VarItem } from '../_base/components/output-vars'
@ -40,6 +42,8 @@ const AgentPanel: FC<NodePanelProps<AgentNodeType>> = (props) => {
readOnly,
outputSchema,
handleMemoryChange,
handleVisionEnabledChange,
handleVisionConfigChange,
canChooseMCPTool,
} = useConfig(props.id, props.data)
const { t } = useTranslation()
@ -85,12 +89,11 @@ const AgentPanel: FC<NodePanelProps<AgentNodeType>> = (props) => {
canChooseMCPTool={canChooseMCPTool}
/>
</Field>
<div className="px-4 py-2">
<div className="space-y-4 px-4 py-2">
{isChatMode && currentStrategy?.features?.includes(AgentFeature.HISTORY_MESSAGES) && (
<>
<Split />
<MemoryConfig
className="mt-4"
readonly={readOnly}
config={{ data: inputs.memory }}
onChange={handleMemoryChange}
@ -98,6 +101,15 @@ const AgentPanel: FC<NodePanelProps<AgentNodeType>> = (props) => {
/>
</>
)}
<ConfigVision
nodeId={props.id}
readOnly={readOnly}
isVisionModel={true}
enabled={inputs.vision?.enabled || false}
onEnabledChange={handleVisionEnabledChange}
config={inputs.vision?.configs || { detail: Resolution.high, variable_selector: [] }}
onConfigChange={handleVisionConfigChange}
/>
</div>
<div>
<OutputVars>

View File

@ -1,6 +1,11 @@
import type { ToolVarInputs } from '../tool/types'
import type { PluginMeta } from '@/app/components/plugins/types'
import type { CommonNodeType, Memory } from '@/app/components/workflow/types'
import type { CommonNodeType, Memory, VisionSetting } from '@/app/components/workflow/types'
export type AgentVisionConfig = {
enabled: boolean
configs?: VisionSetting
}
export type AgentNodeType = CommonNodeType & {
agent_strategy_provider_name?: string
@ -11,6 +16,7 @@ export type AgentNodeType = CommonNodeType & {
output_schema: Record<string, any>
plugin_unique_identifier?: string
memory?: Memory
vision?: AgentVisionConfig
version?: string
tool_node_version?: string
}

View File

@ -1,4 +1,4 @@
import type { Memory, Var } from '../../types'
import type { Memory, Var, VisionSetting } from '../../types'
import type { ToolVarInputs } from '../tool/types'
import type { AgentNodeType } from './types'
import { produce } from 'immer'
@ -11,6 +11,7 @@ import {
} from '@/app/components/workflow/hooks'
import { useCheckInstalled, useFetchPluginsInMarketPlaceByIds } from '@/service/use-plugins'
import { useStrategyProviderDetail } from '@/service/use-strategy'
import { Resolution } from '@/types/app'
import { isSupportMCP } from '@/utils/plugin-version-feature'
import { VarType as VarKindType } from '../../types'
import useAvailableVarList from '../_base/hooks/use-available-var-list'
@ -204,7 +205,34 @@ const useConfig = (id: string, payload: AgentNodeType) => {
})
setInputs(newInputs)
}, [inputs, setInputs])
const isChatMode = useIsChatMode()
const handleVisionEnabledChange = useCallback((enabled: boolean) => {
const newInputs = produce(inputs, (draft) => {
if (!draft.vision) {
draft.vision = { enabled: false }
}
draft.vision.enabled = enabled
if (enabled && isChatMode) {
draft.vision.configs = {
detail: Resolution.high,
variable_selector: ['sys', 'files'],
}
}
})
setInputs(newInputs)
}, [inputs, setInputs, isChatMode])
const handleVisionConfigChange = useCallback((config: VisionSetting) => {
const newInputs = produce(inputs, (draft) => {
if (!draft.vision) {
draft.vision = { enabled: true }
}
draft.vision.configs = config
})
setInputs(newInputs)
}, [inputs, setInputs])
return {
readOnly,
inputs,
@ -221,6 +249,8 @@ const useConfig = (id: string, payload: AgentNodeType) => {
availableNodesWithParent,
outputSchema,
handleMemoryChange,
handleVisionEnabledChange,
handleVisionConfigChange,
isChatMode,
canChooseMCPTool: isSupportMCP(inputs.meta?.version),
}

View File

@ -1024,6 +1024,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'تم إهمال هذا النموذج',
},
vision: 'الرؤية',
outputVars: {
text: 'محتوى تم إنشاؤه بواسطة الوكيل',
usage: 'معلومات استخدام النموذج',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Dieses Modell ist veraltet',
},
vision: 'Vision',
outputVars: {
files: {
type: 'Art der Unterstützung. Jetzt nur noch Image unterstützen',

View File

@ -1024,6 +1024,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'This model is deprecated',
},
vision: 'vision',
outputVars: {
text: 'agent generated content',
usage: 'Model Usage Information',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Este modelo está en desuso',
},
vision: 'visión',
outputVars: {
files: {
url: 'URL de la imagen',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'این مدل منسوخ شده است',
},
vision: 'بینایی',
outputVars: {
files: {
transfer_method: 'روش انتقال. ارزش remote_url یا local_file',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Ce modèle est obsolète',
},
vision: 'vision',
outputVars: {
files: {
title: 'Fichiers générés par lagent',

View File

@ -921,6 +921,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'यह मॉडल अप्रचलित है।',
},
vision: 'दृष्टि',
outputVars: {
files: {
transfer_method: 'स्थानांतरण विधि। मान या तो remote_url है या local_file।',

View File

@ -940,6 +940,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Model ini tidak digunakan lagi',
},
vision: 'penglihatan',
outputVars: {
files: {
transfer_method: 'Metode transfer. Nilai adalah remote_url atau local_file',

View File

@ -927,6 +927,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Questo modello è deprecato',
},
vision: 'vision',
outputVars: {
files: {
type: 'Tipo di supporto. Ora supporta solo l\'immagine',

View File

@ -960,6 +960,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'このモデルは廃止されました',
},
vision: 'ビジョン',
outputVars: {
files: {
url: '画像の URL',

View File

@ -943,6 +943,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: '이 모델은 더 이상 사용되지 않습니다.',
},
vision: '비전',
outputVars: {
files: {
url: '이미지 URL',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Ten model jest przestarzały',
},
vision: 'wizja',
outputVars: {
files: {
title: 'Pliki generowane przez agenta',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Este modelo está obsoleto',
},
vision: 'visão',
outputVars: {
files: {
type: 'Tipo de suporte. Agora suporta apenas imagem',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Acest model este învechit',
},
vision: 'viziune',
outputVars: {
files: {
upload_file_id: 'Încărcați ID-ul fișierului',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Эта модель устарела',
},
vision: 'зрение',
outputVars: {
files: {
transfer_method: 'Способ переноса. Ценность составляет remote_url или local_file',

View File

@ -940,6 +940,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Ta model je zastarelo',
},
vision: 'vizija',
outputVars: {
files: {
type: 'Vrsta podpore. Zdaj podpiramo samo slike.',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'โมเดลนี้เลิกใช้แล้ว',
},
vision: 'การมองเห็น',
outputVars: {
files: {
transfer_method: 'วิธีการโอน ค่าเป็น remote_url หรือ local_file',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Bu model kullanım dışıdır',
},
vision: 'görsel',
outputVars: {
files: {
upload_file_id: 'Dosya kimliğini karşıya yükle',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Ця модель вважається застарілою',
},
vision: 'бачення',
outputVars: {
files: {
upload_file_id: 'Завантажити ідентифікатор файлу',

View File

@ -901,6 +901,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: 'Mô hình này không còn được dùng nữa',
},
vision: 'tầm nhìn',
outputVars: {
files: {
title: 'Tệp do tác nhân tạo',

View File

@ -980,6 +980,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: '此模型已弃用',
},
vision: '视觉',
outputVars: {
text: 'agent 生成的内容',
usage: '模型用量信息',

View File

@ -906,6 +906,7 @@ const translation = {
modelSelectorTooltips: {
deprecated: '此模型已棄用',
},
vision: '視覺',
outputVars: {
files: {
type: '支撐類型。現在僅支援鏡像',