diff --git a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 14744d0a74..803c9ccaf9 100644 --- a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -23,7 +23,11 @@ from dify_graph.variables import ( ) from dify_graph.variables.segments import ArrayObjectSegment -from .entities import KnowledgeRetrievalNodeData +from .entities import ( + Condition, + KnowledgeRetrievalNodeData, + MetadataFilteringCondition, +) from .exc import ( KnowledgeRetrievalNodeError, RateLimitExceededError, @@ -171,6 +175,12 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD if node_data.metadata_filtering_mode is not None: metadata_filtering_mode = node_data.metadata_filtering_mode + resolved_metadata_conditions = ( + self._resolve_metadata_filtering_conditions(node_data.metadata_filtering_conditions) + if node_data.metadata_filtering_conditions + else None + ) + if str(node_data.retrieval_mode) == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE and query: # fetch model config if node_data.single_retrieval_config is None: @@ -189,7 +199,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD model_mode=model.mode, model_name=model.name, metadata_model_config=node_data.metadata_model_config, - metadata_filtering_conditions=node_data.metadata_filtering_conditions, + metadata_filtering_conditions=resolved_metadata_conditions, metadata_filtering_mode=metadata_filtering_mode, query=query, ) @@ -247,7 +257,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD weights=weights, reranking_enable=node_data.multiple_retrieval_config.reranking_enable, metadata_model_config=node_data.metadata_model_config, - metadata_filtering_conditions=node_data.metadata_filtering_conditions, + metadata_filtering_conditions=resolved_metadata_conditions, metadata_filtering_mode=metadata_filtering_mode, attachment_ids=[attachment.related_id for attachment in attachments] if attachments else None, ) @@ -256,6 +266,48 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD usage = self._rag_retrieval.llm_usage return retrieval_resource_list, usage + def _resolve_metadata_filtering_conditions( + self, conditions: MetadataFilteringCondition + ) -> MetadataFilteringCondition: + if conditions.conditions is None: + return MetadataFilteringCondition( + logical_operator=conditions.logical_operator, + conditions=None, + ) + + variable_pool = self.graph_runtime_state.variable_pool + resolved_conditions: list[Condition] = [] + for cond in conditions.conditions or []: + value = cond.value + if isinstance(value, str): + segment_group = variable_pool.convert_template(value) + if len(segment_group.value) == 1: + resolved_value = segment_group.value[0].to_object() + else: + resolved_value = segment_group.text + elif isinstance(value, Sequence) and all(isinstance(v, str) for v in value): + resolved_values = [] + for v in value: # type: ignore + segment_group = variable_pool.convert_template(v) + if len(segment_group.value) == 1: + resolved_values.append(segment_group.value[0].to_object()) + else: + resolved_values.append(segment_group.text) + resolved_value = resolved_values + else: + resolved_value = value + resolved_conditions.append( + Condition( + name=cond.name, + comparison_operator=cond.comparison_operator, + value=resolved_value, + ) + ) + return MetadataFilteringCondition( + logical_operator=conditions.logical_operator or "and", + conditions=resolved_conditions, + ) + @classmethod def _extract_variable_selector_to_variable_mapping( cls, diff --git a/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py b/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py index 6a538d81de..9dacc5a39b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py @@ -8,7 +8,9 @@ from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.enums import WorkflowNodeExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.nodes.knowledge_retrieval.entities import ( + Condition, KnowledgeRetrievalNodeData, + MetadataFilteringCondition, MultipleRetrievalConfig, RerankingModelConfig, SingleRetrievalConfig, @@ -593,3 +595,106 @@ class TestFetchDatasetRetriever: # Assert assert version == "1" + + def test_resolve_metadata_filtering_conditions_templates( + self, + mock_graph_init_params, + mock_graph_runtime_state, + mock_rag_retrieval, + ): + """_resolve_metadata_filtering_conditions should expand {{#...#}} and keep numbers/None unchanged.""" + # Arrange + node_id = str(uuid.uuid4()) + config = { + "id": node_id, + "data": { + "title": "Knowledge Retrieval", + "type": "knowledge-retrieval", + "dataset_ids": [str(uuid.uuid4())], + "retrieval_mode": "multiple", + }, + } + # Variable in pool used by template + mock_graph_runtime_state.variable_pool.add(["start", "query"], StringSegment(value="readme")) + + node = KnowledgeRetrievalNode( + id=node_id, + config=config, + graph_init_params=mock_graph_init_params, + graph_runtime_state=mock_graph_runtime_state, + rag_retrieval=mock_rag_retrieval, + ) + + conditions = MetadataFilteringCondition( + logical_operator="and", + conditions=[ + Condition(name="document_name", comparison_operator="is", value="{{#start.query#}}"), + Condition(name="tags", comparison_operator="in", value=["x", "{{#start.query#}}"]), + Condition(name="year", comparison_operator="=", value=2025), + ], + ) + + # Act + resolved = node._resolve_metadata_filtering_conditions(conditions) + + # Assert + assert resolved.logical_operator == "and" + assert resolved.conditions[0].value == "readme" + assert isinstance(resolved.conditions[1].value, list) + assert resolved.conditions[1].value[1] == "readme" + assert resolved.conditions[2].value == 2025 + + def test_fetch_passes_resolved_metadata_conditions( + self, + mock_graph_init_params, + mock_graph_runtime_state, + mock_rag_retrieval, + ): + """_fetch_dataset_retriever should pass resolved metadata conditions into request.""" + # Arrange + query = "hi" + variables = {"query": query} + mock_graph_runtime_state.variable_pool.add(["start", "q"], StringSegment(value="readme")) + + node_data = KnowledgeRetrievalNodeData( + title="Knowledge Retrieval", + type="knowledge-retrieval", + dataset_ids=[str(uuid.uuid4())], + retrieval_mode="multiple", + multiple_retrieval_config=MultipleRetrievalConfig( + top_k=4, + score_threshold=0.0, + reranking_mode="reranking_model", + reranking_enable=True, + reranking_model=RerankingModelConfig(provider="cohere", model="rerank-v2"), + ), + metadata_filtering_mode="manual", + metadata_filtering_conditions=MetadataFilteringCondition( + logical_operator="and", + conditions=[ + Condition(name="document_name", comparison_operator="is", value="{{#start.q#}}"), + ], + ), + ) + + node_id = str(uuid.uuid4()) + config = {"id": node_id, "data": node_data.model_dump()} + node = KnowledgeRetrievalNode( + id=node_id, + config=config, + graph_init_params=mock_graph_init_params, + graph_runtime_state=mock_graph_runtime_state, + rag_retrieval=mock_rag_retrieval, + ) + + mock_rag_retrieval.knowledge_retrieval.return_value = [] + mock_rag_retrieval.llm_usage = LLMUsage.empty_usage() + + # Act + node._fetch_dataset_retriever(node_data=node_data, variables=variables) + + # Assert the passed request has resolved value + call_args = mock_rag_retrieval.knowledge_retrieval.call_args + request = call_args[1]["request"] + assert request.metadata_filtering_conditions is not None + assert request.metadata_filtering_conditions.conditions[0].value == "readme"