feat: wire agent v2 knowledge sets backend

2026-06-26 23:01:11 +08:00 · 2026-06-23 16:43:10 +08:00 · 2026-06-23 16:43:10 +08:00 · c06d924094
commit c06d924094
parent c3cb134e73
35 changed files with 2424 additions and 530 deletions
--- a/api/clients/agent_backend/request_builder.py
+++ b/api/clients/agent_backend/request_builder.py
@ -312,7 +312,7 @@ class AgentBackendRunRequestBuilder:
                )
            )

-        if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
+        if run_input.knowledge is not None and run_input.knowledge.sets:
            layers.append(
                RunLayerSpec(
                    name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
@ -513,7 +513,7 @@ class AgentBackendRunRequestBuilder:
                )
            )

-        if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
+        if run_input.knowledge is not None and run_input.knowledge.sets:
            layers.append(
                RunLayerSpec(
                    name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
--- a/api/controllers/console/agent/composer.py
+++ b/api/controllers/console/agent/composer.py
@ -105,6 +105,7 @@ class WorkflowAgentComposerValidateApi(Resource):
    def post(self, tenant_id: str, app_model: App, node_id: str):
        payload = ComposerSavePayload.model_validate(console_ns.payload or {})
        ComposerConfigValidator.validate_save_payload(payload)
+        AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
        findings = AgentComposerService.collect_validation_findings(
            tenant_id=tenant_id,
            payload=payload,
@ -239,6 +240,7 @@ class AgentComposerValidateApi(Resource):
        _resolve_agent_app_id(tenant_id=tenant_id, agent_id=agent_id)
        payload = ComposerSavePayload.model_validate(console_ns.payload or {})
        ComposerConfigValidator.validate_save_payload(payload)
+        AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
        findings = AgentComposerService.collect_validation_findings(
            tenant_id=tenant_id,
            payload=payload,
--- a/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py
+++ b/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from typing import Any

 from models.agent_config_entities import AgentSoulConfig
+from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids

 SUPPORTED_AGENT_BACKEND_FEATURES = frozenset(
    {
@ -48,9 +49,7 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
            )

    reserved_status = dict.fromkeys(sorted(RESERVED_AGENT_BACKEND_FEATURES), "reserved_not_executed")
-    reserved_status["knowledge"] = (
-        "supported_by_knowledge_layer" if list_configured_knowledge_dataset_ids(agent_soul) else "not_configured"
-    )
+    reserved_status["knowledge"] = "supported_by_knowledge_layer" if agent_soul.knowledge.sets else "not_configured"
    reserved_status["tools.dify_tools"] = "supported_when_config_valid"
    reserved_status["tools.cli_tools"] = "supported_by_shell_bootstrap"
    reserved_status["env"] = "supported_by_shell_bootstrap"
@ -66,14 +65,14 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any


 def list_configured_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
-    """Return the normalized knowledge dataset ids that can produce a runtime layer.
+    """Return normalized dataset ids selected by Agent v2 knowledge sets.

    ``build_runtime_feature_manifest()`` and ``build_knowledge_layer_config()``
-    must stay aligned: both decide knowledge support from this effective,
-    non-blank dataset-id set rather than from raw
-    ``agent_soul.knowledge.datasets`` entries.
+    stay aligned on the set-based contract: DTO validation rejects blank dataset
+    ids before runtime, so this helper only flattens configured set datasets for
+    metadata/diagnostic surfaces that still need a dataset-id summary.
    """
-    return [dataset_id for dataset in agent_soul.knowledge.datasets if (dataset_id := (dataset.id or "").strip())]
+    return list_agent_soul_knowledge_dataset_ids(agent_soul)


 def _get_nested(value: dict[str, Any], path: str) -> Any:
--- a/api/core/workflow/nodes/agent_v2/runtime_request_builder.py
+++ b/api/core/workflow/nodes/agent_v2/runtime_request_builder.py
@ -15,7 +15,16 @@ from dify_agent.layers.execution_context import (
    DifyExecutionContextLayerConfig,
    DifyExecutionContextUserFrom,
 )
-from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig, DifyKnowledgeRetrievalConfig
+from dify_agent.layers.knowledge import (
+    DifyKnowledgeBaseLayerConfig,
+    DifyKnowledgeDatasetConfig,
+    DifyKnowledgeMetadataFilteringConfig,
+    DifyKnowledgeModelConfig,
+    DifyKnowledgeQueryConfig,
+    DifyKnowledgeRerankingModelConfig,
+    DifyKnowledgeRetrievalConfig,
+    DifyKnowledgeSetConfig,
+)
 from dify_agent.layers.shell import (
    DifyShellCliToolConfig,
    DifyShellEnvVarConfig,
@ -40,7 +49,9 @@ from graphon.file import FileTransferMethod
 from graphon.variables.segments import Segment
 from models.agent import Agent, AgentConfigSnapshot, WorkflowAgentNodeBinding
 from models.agent_config_entities import (
-    AgentKnowledgeQueryConfig,
+    AgentKnowledgeMetadataFilteringConfig,
+    AgentKnowledgeModelConfig,
+    AgentKnowledgeRetrievalConfig,
    AgentSoulConfig,
    DeclaredArrayItem,
    DeclaredOutputChildConfig,
@ -547,42 +558,84 @@ def build_shell_layer_config(agent_soul: AgentSoulConfig) -> DifyShellLayerConfi


 def build_knowledge_layer_config(agent_soul: AgentSoulConfig) -> DifyKnowledgeBaseLayerConfig | None:
-    """Map Agent Soul knowledge config into the fixed Dify knowledge-base layer.
+    """Map Agent Soul knowledge sets into one Dify knowledge-base layer.

-    Normalization intentionally matches the current dify-agent runtime contract:
-
-    - blank or missing dataset ids are ignored;
-    - if no valid dataset ids remain, no knowledge layer is injected;
-    - retrieval mode is always forced to ``multiple`` in this first wiring pass;
-    - ``top_k`` falls back to a stable runtime default when the soul omits it;
-    - ``score_threshold`` is only forwarded when the product config explicitly
-      enables it, otherwise the layer keeps the disabled/default ``0.0`` value;
-    - metadata filtering stays at the layer DTO default (disabled).
+    Agent Soul DTO validation owns malformed set rejection. Runtime mapping is
+    intentionally lossless: every configured set is forwarded with its query
+    policy, dataset refs, retrieval controls, and metadata-filtering controls.
+    ``score_threshold=None`` means disabled threshold filtering and maps to the
+    inner retrieval request's ``0.0`` default through the Agent backend DTO.
    """
-    dataset_ids = list_configured_knowledge_dataset_ids(agent_soul)
-    if not dataset_ids:
+    if not agent_soul.knowledge.sets:
        return None

-    query_config = agent_soul.knowledge.query_config
    return DifyKnowledgeBaseLayerConfig(
-        dataset_ids=dataset_ids,
-        retrieval=DifyKnowledgeRetrievalConfig(
-            mode="multiple",
-            top_k=_knowledge_top_k(query_config),
-            score_threshold=_knowledge_score_threshold(query_config),
-        ),
+        sets=[
+            DifyKnowledgeSetConfig(
+                id=knowledge_set.id,
+                name=knowledge_set.name,
+                description=knowledge_set.description,
+                datasets=[
+                    DifyKnowledgeDatasetConfig(
+                        id=dataset.id or "",
+                        name=dataset.name,
+                        description=dataset.description,
+                    )
+                    for dataset in knowledge_set.datasets
+                ],
+                query=DifyKnowledgeQueryConfig(
+                    mode=cast(Literal["user_query", "generated_query"], knowledge_set.query.mode.value),
+                    value=knowledge_set.query.value,
+                ),
+                retrieval=_knowledge_retrieval_config(knowledge_set.retrieval),
+                metadata_filtering=_knowledge_metadata_filtering_config(knowledge_set.metadata_filtering),
+            )
+            for knowledge_set in agent_soul.knowledge.sets
+        ],
    )


-def _knowledge_top_k(query_config: AgentKnowledgeQueryConfig) -> int:
-    top_k = query_config.top_k
-    return top_k if isinstance(top_k, int) and top_k >= 1 else 4
+def _knowledge_retrieval_config(retrieval: AgentKnowledgeRetrievalConfig) -> DifyKnowledgeRetrievalConfig:
+    return DifyKnowledgeRetrievalConfig(
+        mode=retrieval.mode,
+        top_k=retrieval.top_k,
+        score_threshold=retrieval.score_threshold or 0.0,
+        reranking_mode=retrieval.reranking_mode,
+        reranking_enable=retrieval.reranking_enable,
+        reranking_model=DifyKnowledgeRerankingModelConfig(
+            provider=retrieval.reranking_model.provider,
+            model=retrieval.reranking_model.model,
+        )
+        if retrieval.reranking_model is not None
+        else None,
+        weights=cast(dict[str, Any], retrieval.weights.model_dump(mode="json", exclude_none=True))
+        if retrieval.weights is not None
+        else None,
+        model=_knowledge_model_config(retrieval.model),
+    )


-def _knowledge_score_threshold(query_config: AgentKnowledgeQueryConfig) -> float:
-    if query_config.score_threshold_enabled and query_config.score_threshold is not None:
-        return query_config.score_threshold
-    return 0.0
+def _knowledge_metadata_filtering_config(
+    metadata_filtering: AgentKnowledgeMetadataFilteringConfig,
+) -> DifyKnowledgeMetadataFilteringConfig:
+    return DifyKnowledgeMetadataFilteringConfig(
+        mode=metadata_filtering.mode,
+        model_config=_knowledge_model_config(metadata_filtering.metadata_model_config),
+        conditions=cast(Any, metadata_filtering.conditions.model_dump(mode="json"))
+        if metadata_filtering.conditions is not None
+        else None,
+    )
+
+
+def _knowledge_model_config(model: AgentKnowledgeModelConfig | None) -> DifyKnowledgeModelConfig | None:
+    if model is None:
+        return None
+    return DifyKnowledgeModelConfig(
+        provider=model.provider,
+        name=model.name,
+        mode=model.mode,
+        completion_params=model.completion_params,
+    )


 def build_ask_human_layer_config(agent_soul: AgentSoulConfig) -> DifyAskHumanLayerConfig | None:
--- a/api/core/workflow/nodes/agent_v2/validators.py
+++ b/api/core/workflow/nodes/agent_v2/validators.py
@ -18,6 +18,7 @@ from models.agent_config_entities import (
 )
 from models.model import UploadFile
 from models.workflow import Workflow
+from services.agent.knowledge_datasets import list_missing_tenant_knowledge_dataset_ids

 from .entities import DifyAgentNodeData

@ -146,6 +147,7 @@ class WorkflowAgentNodeValidator:
            )
        cls._validate_agent_soul_env(binding=binding, agent_soul=agent_soul)
        cls._validate_agent_soul_tools(binding=binding, agent_soul=agent_soul)
+        cls._validate_agent_soul_knowledge(binding=binding, agent_soul=agent_soul)
        node_job = WorkflowNodeJobConfig.model_validate(binding.node_job_config_dict)
        cls.validate_node_job(session=session, binding=binding, node_job=node_job, topology=topology)

@ -364,6 +366,24 @@ class WorkflowAgentNodeValidator:
                )
            cli_tool_names.add(normalized_name)

+    @classmethod
+    def _validate_agent_soul_knowledge(
+        cls,
+        *,
+        binding: WorkflowAgentNodeBinding,
+        agent_soul: AgentSoulConfig,
+    ) -> None:
+        """Validate knowledge set dataset rows against the publishing tenant."""
+        missing_ids = list_missing_tenant_knowledge_dataset_ids(
+            tenant_id=binding.tenant_id,
+            agent_soul=agent_soul,
+        )
+        if missing_ids:
+            raise WorkflowAgentNodeValidationError(
+                f"Workflow Agent node {binding.node_id} references missing or out-of-scope knowledge datasets: "
+                f"{', '.join(missing_ids)}."
+            )
+
    @classmethod
    def _validate_agent_soul_env(
        cls,
--- a/api/fields/agent_fields.py
+++ b/api/fields/agent_fields.py
@ -400,10 +400,22 @@ class AgentComposerNodeJobCandidatesResponse(ResponseModel):
    human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)


+class AgentComposerKnowledgeDatasetCandidateResponse(AgentKnowledgeDatasetConfig):
+    missing: bool = False
+
+
+class AgentComposerKnowledgeSetCandidateResponse(ResponseModel):
+    id: str
+    name: str
+    description: str | None = None
+    datasets: list[AgentComposerKnowledgeDatasetCandidateResponse] = Field(default_factory=list)
+    missing_dataset_ids: list[str] = Field(default_factory=list)
+
+
 class AgentComposerSoulCandidatesResponse(ResponseModel):
    dify_tools: list[AgentComposerDifyToolCandidateResponse] = Field(default_factory=list)
    cli_tools: list[AgentCliToolConfig] = Field(default_factory=list)
-    knowledge_datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
+    knowledge_sets: list[AgentComposerKnowledgeSetCandidateResponse] = Field(default_factory=list)
    human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)


--- a/api/models/agent_config_entities.py
+++ b/api/models/agent_config_entities.py
@ -2,10 +2,11 @@ from __future__ import annotations

 import re
 from enum import StrEnum
-from typing import Annotated, Any, Final, Literal
+from typing import Annotated, Any, Final, Literal, Self

 from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, field_validator, model_validator

+from core.rag.entities.metadata_entities import ConditionValue, SupportedComparisonOperator
 from core.workflow.file_reference import is_canonical_file_reference
 from graphon.file import FileTransferMethod

@ -236,17 +237,161 @@ class AgentCliToolConfig(AgentFlexibleConfig):
    inferred_from: str | None = Field(default=None, max_length=255)


-class AgentKnowledgeDatasetConfig(AgentFlexibleConfig):
+class AgentKnowledgeDatasetConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
    id: str | None = Field(default=None, max_length=255)
    name: str | None = Field(default=None, max_length=255)
    description: str | None = None


-class AgentKnowledgeQueryConfig(AgentFlexibleConfig):
-    query: str | None = None
+class AgentKnowledgeQueryConfig(BaseModel):
+    """Per-set query policy for Agent v2 knowledge retrieval.
+
+    Agent v2 stores knowledge as explicit ``knowledge.sets`` rather than the
+    legacy flat ``datasets`` / ``query_mode`` / ``query_config`` shape. Each
+    set owns its own query policy, so ``user_query`` must carry an explicit
+    ``value`` while ``generated_query`` leaves that value empty.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    mode: AgentKnowledgeQueryMode
+    value: str | None = None
+
+    @model_validator(mode="after")
+    def validate_query(self) -> Self:
+        if self.mode == AgentKnowledgeQueryMode.USER_QUERY and not (self.value or "").strip():
+            raise ValueError("knowledge query.value is required for user_query mode")
+        return self
+
+
+class AgentKnowledgeModelConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    provider: str = Field(min_length=1, max_length=255)
+    name: str = Field(min_length=1, max_length=255)
+    mode: str = Field(min_length=1, max_length=64)
+    completion_params: dict[str, Any] = Field(default_factory=dict)
+
+
+class AgentKnowledgeRerankingModelConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    provider: str = Field(min_length=1, max_length=255)
+    model: str = Field(min_length=1, max_length=255)
+
+
+class AgentKnowledgeWeightedScoreConfig(AgentFlexibleConfig):
+    weight_type: str | None = Field(default=None, max_length=64)
+    vector_setting: dict[str, Any] | None = None
+    keyword_setting: dict[str, Any] | None = None
+
+
+class AgentKnowledgeRetrievalConfig(BaseModel):
+    """Per-set retrieval policy for Agent v2 knowledge retrieval.
+
+    Retrieval settings now live on each knowledge set instead of one shared
+    flat config. A set may use either ``multiple`` retrieval with ``top_k`` or
+    ``single`` retrieval with a required model config.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    mode: Literal["single", "multiple"]
    top_k: int | None = Field(default=None, ge=1)
    score_threshold: float | None = Field(default=None, ge=0, le=1)
-    score_threshold_enabled: bool | None = None
+    reranking_mode: str = "reranking_model"
+    reranking_enable: bool = True
+    reranking_model: AgentKnowledgeRerankingModelConfig | None = None
+    weights: AgentKnowledgeWeightedScoreConfig | None = None
+    model: AgentKnowledgeModelConfig | None = None
+
+    @model_validator(mode="after")
+    def validate_mode_fields(self) -> Self:
+        if self.mode == "multiple" and self.top_k is None:
+            raise ValueError("knowledge retrieval.top_k is required for multiple mode")
+        if self.mode == "single" and self.model is None:
+            raise ValueError("knowledge retrieval.model is required for single mode")
+        return self
+
+
+class AgentKnowledgeMetadataCondition(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    name: str = Field(min_length=1, max_length=255)
+    comparison_operator: SupportedComparisonOperator
+    value: ConditionValue = None
+
+
+class AgentKnowledgeMetadataConditions(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    logical_operator: Literal["and", "or"] = "and"
+    conditions: list[AgentKnowledgeMetadataCondition] = Field(default_factory=list)
+
+
+class AgentKnowledgeMetadataFilteringConfig(BaseModel):
+    """Per-set metadata filtering policy.
+
+    The Python attribute uses ``metadata_model_config`` for clarity because the
+    model belongs to metadata filtering specifically, while the external API and
+    generated schema keep the historical ``model_config`` field name via alias.
+    """
+
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+    mode: Literal["disabled", "automatic", "manual"] = "disabled"
+    # Internal name is explicit; wire format remains ``model_config``.
+    metadata_model_config: AgentKnowledgeModelConfig | None = Field(default=None, alias="model_config")
+    conditions: AgentKnowledgeMetadataConditions | None = None
+
+    @model_validator(mode="after")
+    def validate_mode_fields(self) -> Self:
+        if self.mode == "automatic" and self.metadata_model_config is None:
+            raise ValueError("metadata_filtering.model_config is required for automatic mode")
+        if self.mode == "manual" and (self.conditions is None or not self.conditions.conditions):
+            raise ValueError("metadata_filtering.conditions is required for manual mode")
+        return self
+
+
+class AgentKnowledgeSetConfig(BaseModel):
+    """One explicit knowledge set in Agent v2.
+
+    ``knowledge.sets`` replaces the old flat knowledge config. Each set owns
+    its datasets plus query, retrieval, and metadata policies. An individual
+    set must contain at least one dataset id even though the overall knowledge
+    section may be empty, which is how callers express "no knowledge layer".
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    id: str = Field(min_length=1, max_length=255)
+    name: str = Field(min_length=1, max_length=255)
+    description: str | None = None
+    datasets: list[AgentKnowledgeDatasetConfig]
+    query: AgentKnowledgeQueryConfig
+    retrieval: AgentKnowledgeRetrievalConfig
+    metadata_filtering: AgentKnowledgeMetadataFilteringConfig = Field(
+        default_factory=AgentKnowledgeMetadataFilteringConfig
+    )
+
+    @field_validator("id", "name")
+    @classmethod
+    def validate_non_blank_identity(cls, value: str) -> str:
+        normalized = value.strip()
+        if not normalized:
+            raise ValueError("knowledge set id and name must not be blank")
+        return normalized
+
+    @model_validator(mode="after")
+    def validate_datasets(self) -> Self:
+        dataset_ids = [(dataset.id or "").strip() for dataset in self.datasets]
+        if not dataset_ids or any(not dataset_id for dataset_id in dataset_ids):
+            raise ValueError("knowledge set requires at least one dataset id")
+        if len(dataset_ids) != len(set(dataset_ids)):
+            raise ValueError("knowledge set dataset ids must be unique")
+        return self


 class AgentHumanContactConfig(AgentFlexibleConfig):
@ -453,9 +598,28 @@ class AgentSoulToolsConfig(BaseModel):


 class AgentSoulKnowledgeConfig(BaseModel):
-    datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
-    query_mode: AgentKnowledgeQueryMode | None = None
-    query_config: AgentKnowledgeQueryConfig = Field(default_factory=AgentKnowledgeQueryConfig)
+    """Top-level Agent v2 knowledge config.
+
+    Agent v2 models knowledge as explicit sets instead of one flat
+    ``datasets`` / ``query_mode`` / ``query_config`` block. An empty ``sets``
+    list means no knowledge layer should be emitted at runtime, while set-name
+    uniqueness stays case-insensitive because runtime selection addresses sets
+    by name.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    sets: list[AgentKnowledgeSetConfig] = Field(default_factory=list)
+
+    @model_validator(mode="after")
+    def validate_unique_sets(self) -> Self:
+        set_ids = [item.id.strip() for item in self.sets]
+        if len(set_ids) != len(set(set_ids)):
+            raise ValueError("knowledge set ids must be unique")
+        set_names = [item.name.strip().lower() for item in self.sets]
+        if len(set_names) != len(set(set_names)):
+            raise ValueError("knowledge set names must be unique")
+        return self


 class AgentSoulHumanConfig(BaseModel):
--- a/api/openapi/markdown/console-openapi.md
+++ b/api/openapi/markdown/console-openapi.md
@ -12433,6 +12433,25 @@ Risk marker for CLI tool bootstrap commands.
 | current_snapshot_id | string |  | No |
 | workflow_node_count | integer |  | Yes |

+#### AgentComposerKnowledgeDatasetCandidateResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| description | string |  | No |
+| id | string |  | No |
+| missing | boolean |  | No |
+| name | string |  | No |
+
+#### AgentComposerKnowledgeSetCandidateResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| datasets | [ [AgentComposerKnowledgeDatasetCandidateResponse](#agentcomposerknowledgedatasetcandidateresponse) ] |  | No |
+| description | string |  | No |
+| id | string |  | Yes |
+| missing_dataset_ids | [ string ] |  | No |
+| name | string |  | Yes |
+
 #### AgentComposerNodeJobCandidatesResponse

 | Name | Type | Description | Required |
@ -12448,7 +12467,7 @@ Risk marker for CLI tool bootstrap commands.
 | cli_tools | [ [AgentCliToolConfig](#agentclitoolconfig) ] |  | No |
 | dify_tools | [ [AgentComposerDifyToolCandidateResponse](#agentcomposerdifytoolcandidateresponse) ] |  | No |
 | human_contacts | [ [AgentHumanContactConfig](#agenthumancontactconfig) ] |  | No |
-| knowledge_datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] |  | No |
+| knowledge_sets | [ [AgentComposerKnowledgeSetCandidateResponse](#agentcomposerknowledgesetcandidateresponse) ] |  | No |

 #### AgentComposerSoulLockResponse

@ -12842,14 +12861,44 @@ the current roster/workflow APIs scoped to Dify Agent.
 | id | string |  | No |
 | name | string |  | No |

+#### AgentKnowledgeMetadataCondition
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
+| name | string |  | Yes |
+| value | string<br>[ string ]<br>number |  | No |
+
+#### AgentKnowledgeMetadataConditions
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| conditions | [ [AgentKnowledgeMetadataCondition](#agentknowledgemetadatacondition) ] |  | No |
+| logical_operator | string, <br>**Available values:** "and", "or", <br>**Default:** and | *Enum:* `"and"`, `"or"` | No |
+
+#### AgentKnowledgeMetadataFilteringConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| conditions | [AgentKnowledgeMetadataConditions](#agentknowledgemetadataconditions) |  | No |
+| mode | string, <br>**Available values:** "automatic", "disabled", "manual", <br>**Default:** disabled | *Enum:* `"automatic"`, `"disabled"`, `"manual"` | No |
+| model_config | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) |  | No |
+
+#### AgentKnowledgeModelConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| completion_params | object |  | No |
+| mode | string |  | Yes |
+| name | string |  | Yes |
+| provider | string |  | Yes |
+
 #### AgentKnowledgeQueryConfig

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| query | string |  | No |
-| score_threshold | number |  | No |
-| score_threshold_enabled | boolean |  | No |
-| top_k | integer |  | No |
+| mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) |  | Yes |
+| value | string |  | No |

 #### AgentKnowledgeQueryMode

@ -12857,6 +12906,46 @@ the current roster/workflow APIs scoped to Dify Agent.
 | ---- | ---- | ----------- | -------- |
 | AgentKnowledgeQueryMode | string |  |  |

+#### AgentKnowledgeRerankingModelConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| model | string |  | Yes |
+| provider | string |  | Yes |
+
+#### AgentKnowledgeRetrievalConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| mode | string, <br>**Available values:** "multiple", "single" | *Enum:* `"multiple"`, `"single"` | Yes |
+| model | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) |  | No |
+| reranking_enable | boolean, <br>**Default:** true |  | No |
+| reranking_mode | string, <br>**Default:** reranking_model |  | No |
+| reranking_model | [AgentKnowledgeRerankingModelConfig](#agentknowledgererankingmodelconfig) |  | No |
+| score_threshold | number |  | No |
+| top_k | integer |  | No |
+| weights | [AgentKnowledgeWeightedScoreConfig](#agentknowledgeweightedscoreconfig) |  | No |
+
+#### AgentKnowledgeSetConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] |  | Yes |
+| description | string |  | No |
+| id | string |  | Yes |
+| metadata_filtering | [AgentKnowledgeMetadataFilteringConfig](#agentknowledgemetadatafilteringconfig) |  | No |
+| name | string |  | Yes |
+| query | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) |  | Yes |
+| retrieval | [AgentKnowledgeRetrievalConfig](#agentknowledgeretrievalconfig) |  | Yes |
+
+#### AgentKnowledgeWeightedScoreConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| keyword_setting | object |  | No |
+| vector_setting | object |  | No |
+| weight_type | string |  | No |
+
 #### AgentLogConversationItemResponse

 | Name | Type | Description | Required |
@ -13258,9 +13347,7 @@ old Agent tool payloads can be read while new payloads stay explicit.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] |  | No |
-| query_config | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) |  | No |
-| query_mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) |  | No |
+| sets | [ [AgentKnowledgeSetConfig](#agentknowledgesetconfig) ] |  | No |

 #### AgentSoulMemoryConfig

--- a/api/services/agent/composer_candidates.py
+++ b/api/services/agent/composer_candidates.py
@ -25,6 +25,7 @@ from models.agent_config_entities import (
    AgentSoulConfig,
    DeclaredOutputConfig,
 )
+from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids

 MAX_CANDIDATES_PER_LIST = 200

@ -139,19 +140,34 @@ def soul_candidates(

    cli_tools = [tool.model_dump(exclude_none=True) for tool in soul.tools.cli_tools if tool.enabled]

-    dataset_ids = [dataset.id for dataset in soul.knowledge.datasets if dataset.id]
+    dataset_ids = list_agent_soul_knowledge_dataset_ids(soul)
    dataset_rows = dataset_lookup(dataset_ids) if dataset_ids else {}
-    knowledge_datasets: list[dict[str, Any]] = []
-    for dataset in soul.knowledge.datasets:
-        if not dataset.id:
-            continue
-        row = dataset_rows.get(dataset.id)
-        knowledge_datasets.append(
+    knowledge_sets: list[dict[str, Any]] = []
+    for knowledge_set in soul.knowledge.sets:
+        missing_dataset_ids: list[str] = []
+        datasets: list[dict[str, Any]] = []
+        for dataset in knowledge_set.datasets:
+            dataset_id = (dataset.id or "").strip()
+            if not dataset_id:
+                continue
+            row = dataset_rows.get(dataset_id)
+            if row is None:
+                missing_dataset_ids.append(dataset_id)
+            datasets.append(
+                {
+                    "id": dataset_id,
+                    "name": (getattr(row, "name", None) or dataset.name or dataset_id),
+                    "description": getattr(row, "description", None) or dataset.description,
+                    "missing": row is None,
+                }
+            )
+        knowledge_sets.append(
            {
-                "id": dataset.id,
-                "name": (getattr(row, "name", None) or dataset.name or dataset.id),
-                "description": getattr(row, "description", None) or dataset.description,
-                "missing": row is None,
+                "id": knowledge_set.id,
+                "name": knowledge_set.name,
+                "description": knowledge_set.description,
+                "datasets": datasets,
+                "missing_dataset_ids": missing_dataset_ids,
            }
        )

@ -161,7 +177,7 @@ def soul_candidates(
    lists = {
        "dify_tools": dify_tools,
        "cli_tools": cli_tools,
-        "knowledge_datasets": knowledge_datasets,
+        "knowledge_sets": knowledge_sets,
        "human_contacts": human_contacts,
    }
    capped: dict[str, list[dict[str, Any]]] = {}
@ -192,7 +208,6 @@ def _ref_entry(
        "inferred": inferred,
    }

-
 def _capped(values: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], bool]:
    if len(values) > MAX_CANDIDATES_PER_LIST:
        return values[:MAX_CANDIDATES_PER_LIST], True
--- a/api/services/agent/composer_service.py
+++ b/api/services/agent/composer_service.py
@ -33,6 +33,11 @@ from services.agent.errors import (
    AgentNameConflictError,
    AgentNotFoundError,
    AgentVersionNotFoundError,
+    InvalidComposerConfigError,
+)
+from services.agent.knowledge_datasets import (
+    get_tenant_knowledge_dataset_rows,
+    list_missing_tenant_knowledge_dataset_ids,
 )
 from services.entities.agent_entities import (
    AgentSoulConfig,
@ -101,6 +106,7 @@ class AgentComposerService:

        _backfill_cli_tool_ids(payload.agent_soul)
        ComposerConfigValidator.validate_save_payload(payload)
+        cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
        workflow = cls._get_draft_workflow(tenant_id=tenant_id, app_id=app_id)
        binding = cls._get_workflow_binding(tenant_id=tenant_id, workflow_id=workflow.id, node_id=node_id)

@ -195,6 +201,7 @@ class AgentComposerService:
            raise ValueError("Agent App composer endpoint only accepts agent_app variant")
        _backfill_cli_tool_ids(payload.agent_soul)
        ComposerConfigValidator.validate_save_payload(payload)
+        cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
        if payload.agent_soul is None:
            raise ValueError("agent_soul is required")

@ -273,19 +280,15 @@ class AgentComposerService:
        agent_id: str | None = None,
    ) -> dict[str, Any]:
        """ENG-617 soft findings, with DB-backed dataset and drive mention checks."""
-        from services.agent.prompt_mentions import MentionKind, parse_prompt_mentions
-
-        mentioned_ids: set[str] = set()
-        if payload.agent_soul is not None:
-            mentioned_ids |= {
-                mention.ref_id
-                for mention in parse_prompt_mentions(payload.agent_soul.prompt.system_prompt)
-                if mention.kind == MentionKind.KNOWLEDGE
-            }
-        existing_dataset_ids: set[str] | None = None
-        if mentioned_ids:
-            existing_dataset_ids = set(cls._dataset_rows(tenant_id=tenant_id, dataset_ids=sorted(mentioned_ids)))
-        findings = ComposerConfigValidator.collect_soft_findings(payload, existing_dataset_ids=existing_dataset_ids)
+        existing_knowledge_set_ids = (
+            {knowledge_set.id for knowledge_set in payload.agent_soul.knowledge.sets}
+            if payload.agent_soul is not None
+            else None
+        )
+        findings = ComposerConfigValidator.collect_soft_findings(
+            payload,
+            existing_knowledge_set_ids=existing_knowledge_set_ids,
+        )
        if agent_id and payload.agent_soul is not None:
            findings["warnings"].extend(
                cls._drive_mention_findings(
@ -296,6 +299,24 @@ class AgentComposerService:
            )
        return findings

+    @classmethod
+    def validate_knowledge_datasets(cls, *, tenant_id: str, agent_soul: AgentSoulConfig | None) -> None:
+        """Hard-validate tenant-scoped knowledge set datasets before saving.
+
+        DTO validators own set shape, duplicate set ids/names, and duplicate
+        dataset ids within one set. This service-level check owns database
+        existence and tenant ownership so invalid or cross-tenant datasets fail
+        before Agent Soul snapshots are persisted.
+        """
+        if agent_soul is None:
+            return
+        missing_ids = list_missing_tenant_knowledge_dataset_ids(tenant_id=tenant_id, agent_soul=agent_soul)
+        if missing_ids:
+            raise InvalidComposerConfigError(
+                "knowledge_dataset_not_found: knowledge sets reference missing or out-of-scope datasets: "
+                + ", ".join(missing_ids)
+            )
+
    @classmethod
    def resolve_bound_agent_id(cls, *, tenant_id: str, app_id: str) -> str | None:
        """The Agent App's bound roster agent id, if any (validate-endpoint context)."""
@ -410,7 +431,7 @@ class AgentComposerService:

        soul_lists, soul_truncated = soul_candidates(
            agent_soul=agent_soul,
-            dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
+            dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
            workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
        )
        truncated = truncated or soul_truncated
@ -437,7 +458,7 @@ class AgentComposerService:
        agent_soul = cls._load_agent_app_soul(tenant_id=tenant_id, app_id=app_id)
        soul_lists, truncated = soul_candidates(
            agent_soul=agent_soul,
-            dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
+            dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
            workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
        )
        response = ComposerCandidatesResponse(
@ -530,30 +551,6 @@ class AgentComposerService:
        variables = WorkflowDraftVariableService(session=session).list_system_variables(app_id, user_id)
        return [(variable.name, variable.value_type.value) for variable in variables.variables]

-    @staticmethod
-    def _dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
-        """Tenant-scoped dataset lookup tolerating malformed ids.
-
-        Mention ids come from user-editable prompt text; a non-UUID id can never
-        match a dataset row, so it is simply absent from the result (-> missing/
-        placeholder semantics) instead of breaking the UUID-typed query.
-        """
-        from uuid import UUID
-
-        from services.dataset_service import DatasetService
-
-        valid_ids: list[str] = []
-        for dataset_id in dataset_ids:
-            try:
-                UUID(dataset_id)
-            except (ValueError, TypeError):
-                continue
-            valid_ids.append(dataset_id)
-        if not valid_ids:
-            return {}
-        rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
-        return {str(row.id): row for row in rows}
-
    @staticmethod
    def _workspace_dify_tools(*, tenant_id: str, user_id: str) -> list[dict[str, Any]]:
        """Workspace Dify Plugin tools, same source as the tool selector.
--- a/api/services/agent/composer_validator.py
+++ b/api/services/agent/composer_validator.py
@ -141,15 +141,15 @@ class ComposerConfigValidator:
        cls,
        payload: ComposerSavePayload,
        *,
-        existing_dataset_ids: set[str] | None = None,
+        existing_knowledge_set_ids: set[str] | None = None,
    ) -> dict[str, Any]:
        """ENG-617 §5.3/§5.4 soft findings — never block save.

        ``warnings`` carries ``mention_target_missing`` / ``mention_malformed``
-        entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge
+        entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge-set
        mentions with a placeholder name (0522 consensus) instead of dropping or
-        rejecting them. With ``existing_dataset_ids`` provided, configured-but-
-        deleted datasets surface as placeholders too.
+        rejecting them. With ``existing_knowledge_set_ids`` provided, mentions
+        that no longer exist in the current Agent Soul surface as placeholders too.
        """
        warnings: list[dict[str, Any]] = []
        placeholders: list[dict[str, str]] = []
@ -181,7 +181,7 @@ class ComposerConfigValidator:
                resolved = resolver(mention)
                if mention.kind == MentionKind.KNOWLEDGE:
                    dangling = resolved is None or (
-                        existing_dataset_ids is not None and mention.ref_id not in existing_dataset_ids
+                        existing_knowledge_set_ids is not None and mention.ref_id not in existing_knowledge_set_ids
                    )
                    if dangling:
                        placeholders.append(
--- a/api/services/agent/knowledge_datasets.py
+++ b/api/services/agent/knowledge_datasets.py
@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from typing import Any
+from uuid import UUID
+
+from models.agent_config_entities import AgentSoulConfig
+
+
+def list_agent_soul_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
+    """Return normalized unique knowledge dataset ids in config order.
+
+    Agent v2 knowledge dataset selection is owned by ``knowledge.sets``. This
+    helper keeps composer, workflow validation, candidates, and runtime
+    diagnostics aligned on the same normalization rules: strip whitespace, drop
+    blanks, preserve first-seen order, and deduplicate.
+    """
+    dataset_ids: list[str] = []
+    seen: set[str] = set()
+    for knowledge_set in agent_soul.knowledge.sets:
+        for dataset in knowledge_set.datasets:
+            dataset_id = (dataset.id or "").strip()
+            if not dataset_id or dataset_id in seen:
+                continue
+            seen.add(dataset_id)
+            dataset_ids.append(dataset_id)
+    return dataset_ids
+
+
+def get_tenant_knowledge_dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
+    """Return tenant-scoped dataset rows for normalized knowledge dataset ids.
+
+    Knowledge ids come from user-editable config. Malformed ids can never match
+    a dataset row, so they are treated as missing instead of breaking the
+    UUID-typed dataset lookup.
+    """
+    from services.dataset_service import DatasetService
+
+    valid_ids: list[str] = []
+    for dataset_id in dataset_ids:
+        try:
+            UUID(dataset_id)
+        except (TypeError, ValueError):
+            continue
+        valid_ids.append(dataset_id)
+
+    if not valid_ids:
+        return {}
+
+    rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
+    return {str(row.id): row for row in rows}
+
+
+def list_missing_tenant_knowledge_dataset_ids(*, tenant_id: str, agent_soul: AgentSoulConfig | None) -> list[str]:
+    """Return normalized knowledge dataset ids missing from the tenant scope."""
+    if agent_soul is None:
+        return []
+
+    dataset_ids = list_agent_soul_knowledge_dataset_ids(agent_soul)
+    if not dataset_ids:
+        return []
+
+    rows = get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=dataset_ids)
+    return [dataset_id for dataset_id in dataset_ids if dataset_id not in rows]
--- a/api/services/agent/prompt_mentions.py
+++ b/api/services/agent/prompt_mentions.py
@ -6,7 +6,7 @@ Slash-menu insertions are stored inline in the plain-string prompt as tokens:

 ``kind`` is a fixed lowercase word; ``id`` points at an item in the Agent
 runtime context. For prompt-owned entities that means Agent Soul lists such as
-``tools`` / ``knowledge.datasets`` / ``human.contacts`` and workflow job lists
+``tools`` / ``knowledge.sets`` / ``human.contacts`` and workflow job lists
 such as ``previous_node_output_refs`` / ``declared_outputs``. For drive-backed
 ``skill`` / ``file`` mentions the field stores a URL-encoded drive key and is
 resolved against ``agent_drive_files`` at runtime. ``label`` is an optional
@ -211,9 +211,9 @@ def build_soul_mention_resolver(agent_soul: AgentSoulConfig) -> MentionResolver:
                    if mention.ref_id in (cli_tool.id, cli_tool.name):
                        return cli_tool.name or cli_tool.id
            case MentionKind.KNOWLEDGE:
-                for dataset in agent_soul.knowledge.datasets:
-                    if mention.ref_id == dataset.id:
-                        return dataset.name or dataset.id
+                for knowledge_set in agent_soul.knowledge.sets:
+                    if mention.ref_id == knowledge_set.id:
+                        return knowledge_set.name or knowledge_set.id
            case MentionKind.HUMAN:
                return _resolve_human_contact(agent_soul.human.contacts, mention.ref_id)
            case _:
--- a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py
+++ b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py
@ -162,8 +162,15 @@ def test_request_builder_adds_knowledge_layer_when_configured():
    run_input = _run_input()
    run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
        {
-            "dataset_ids": ["dataset-1"],
-            "retrieval": {"mode": "multiple", "top_k": 4},
+            "sets": [
+                {
+                    "id": "support",
+                    "name": "Support KB",
+                    "datasets": [{"id": "dataset-1"}],
+                    "query": {"mode": "generated_query"},
+                    "retrieval": {"mode": "multiple", "top_k": 4},
+                }
+            ],
        }
    )

@ -174,7 +181,7 @@ def test_request_builder_adds_knowledge_layer_when_configured():
    assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
    assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
    knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
-    assert knowledge_config.dataset_ids == ["dataset-1"]
+    assert knowledge_config.sets[0].dataset_ids == ["dataset-1"]


 def test_request_builder_can_delete_on_exit_for_cleanup_paths():
@ -386,8 +393,15 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
    run_input = _agent_app_input()
    run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
        {
-            "dataset_ids": ["dataset-1", "dataset-2"],
-            "retrieval": {"mode": "multiple", "top_k": 2},
+            "sets": [
+                {
+                    "id": "support",
+                    "name": "Support KB",
+                    "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
+                    "query": {"mode": "generated_query"},
+                    "retrieval": {"mode": "multiple", "top_k": 2},
+                }
+            ],
        }
    )

@ -398,7 +412,7 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
    assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
    assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
    knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
-    assert knowledge_config.dataset_ids == ["dataset-1", "dataset-2"]
+    assert knowledge_config.sets[0].dataset_ids == ["dataset-1", "dataset-2"]


 # ── ENG-635 / ENG-638: ask_human layer injection + deferred_tool_results ─────
--- a/api/tests/unit_tests/commands/test_generate_swagger_specs.py
+++ b/api/tests/unit_tests/commands/test_generate_swagger_specs.py
@ -149,3 +149,55 @@ def test_generate_specs_is_idempotent(tmp_path):
    assert [path.name for path in first_paths] == [path.name for path in second_paths]
    for first_path, second_path in zip(first_paths, second_paths):
        assert first_path.read_text(encoding="utf-8") == second_path.read_text(encoding="utf-8")
+
+
+def test_generate_specs_include_agent_v2_knowledge_set_schema_and_query_enums(tmp_path):
+    module = _load_generate_swagger_specs_module()
+
+    written_paths = module.generate_specs(tmp_path)
+    console_path = next(path for path in written_paths if path.name == "console-openapi.json")
+    payload = json.loads(console_path.read_text(encoding="utf-8"))
+    schemas = payload["components"]["schemas"]
+
+    assert "AgentKnowledgeSetConfig" in schemas
+    assert schemas["AgentSoulKnowledgeConfig"]["properties"]["sets"]["items"]["$ref"] == (
+        "#/components/schemas/AgentKnowledgeSetConfig"
+    )
+    assert schemas["AgentKnowledgeQueryMode"]["enum"] == ["generated_query", "user_query"]
+
+
+def test_checked_in_agent_v2_knowledge_openapi_and_generated_contracts_are_in_sync():
+    api_dir = Path(__file__).resolve().parents[3]
+    repo_root = api_dir.parent
+
+    markdown = (api_dir / "openapi" / "markdown" / "console-openapi.md").read_text(encoding="utf-8")
+    agent_types = (
+        repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "types.gen.ts"
+    ).read_text(encoding="utf-8")
+    apps_types = (
+        repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "types.gen.ts"
+    ).read_text(encoding="utf-8")
+    agent_zod = (
+        repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "zod.gen.ts"
+    ).read_text(encoding="utf-8")
+    apps_zod = (
+        repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "zod.gen.ts"
+    ).read_text(encoding="utf-8")
+
+    assert "#### AgentKnowledgeSetConfig" in markdown
+    assert "#### AgentSoulKnowledgeConfig" in markdown
+    assert "#### AgentKnowledgeQueryMode" in markdown
+
+    for content in (agent_types, apps_types):
+        assert "export type AgentKnowledgeSetConfig = {" in content
+        assert "export type AgentSoulKnowledgeConfig = {" in content
+        assert "AgentKnowledgeQueryMode" in content
+        assert "generated_query" in content
+        assert "user_query" in content
+
+    for content in (agent_zod, apps_zod):
+        assert "export const zAgentKnowledgeSetConfig = z.object({" in content
+        assert "export const zAgentSoulKnowledgeConfig = z.object({" in content
+        assert "zAgentKnowledgeQueryMode = z.enum([" in content
+        assert "generated_query" in content
+        assert "user_query" in content
--- a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py
+++ b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py
@ -153,12 +153,19 @@ class TestAgentAppRuntimeRequestBuilder:
                    "model": "gpt-4o-mini",
                },
                "knowledge": {
-                    "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
-                    "query_config": {
-                        "top_k": 3,
-                        "score_threshold": 0.5,
-                        "score_threshold_enabled": False,
-                    },
+                    "sets": [
+                        {
+                            "id": "support",
+                            "name": "Support KB",
+                            "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {
+                                "mode": "multiple",
+                                "top_k": 3,
+                                "score_threshold": None,
+                            },
+                        }
+                    ],
                },
            }
        )
@ -173,10 +180,12 @@ class TestAgentAppRuntimeRequestBuilder:
        assert knowledge.type == "dify.knowledge_base"
        assert knowledge.deps == {"execution_context": "execution_context"}
        dumped_config = knowledge.config.model_dump(mode="json", by_alias=True)
-        assert dumped_config["dataset_ids"] == ["dataset-1", "dataset-2"]
-        assert dumped_config["retrieval"]["mode"] == "multiple"
-        assert dumped_config["retrieval"]["top_k"] == 3
-        assert dumped_config["retrieval"]["score_threshold"] == 0.0
+        knowledge_set = dumped_config["sets"][0]
+        assert [dataset["id"] for dataset in knowledge_set["datasets"]] == ["dataset-1", "dataset-2"]
+        assert knowledge_set["query"] == {"mode": "generated_query", "value": None}
+        assert knowledge_set["retrieval"]["mode"] == "multiple"
+        assert knowledge_set["retrieval"]["top_k"] == 3
+        assert knowledge_set["retrieval"]["score_threshold"] == 0.0

    def test_build_raises_when_model_missing(self):
        builder = AgentAppRuntimeRequestBuilder(
--- a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py
+++ b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py
@ -512,12 +512,55 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
                    "model": "gpt-test",
                },
                "knowledge": {
-                    "datasets": [{"id": "dataset-1"}, {"id": "  "}, {"id": "dataset-2"}],
-                    "query_config": {
-                        "top_k": 6,
-                        "score_threshold": 0.4,
-                        "score_threshold_enabled": True,
-                    },
+                    "sets": [
+                        {
+                            "id": "support",
+                            "name": "Support KB",
+                            "description": "Support content",
+                            "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {
+                                "mode": "multiple",
+                                "top_k": 6,
+                                "score_threshold": 0.4,
+                                "reranking_model": {"provider": "cohere", "model": "rerank-v3"},
+                                "weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
+                            },
+                            "metadata_filtering": {
+                                "mode": "manual",
+                                "conditions": {
+                                    "logical_operator": "and",
+                                    "conditions": [
+                                        {"name": "category", "comparison_operator": "contains", "value": "auth"}
+                                    ],
+                                },
+                            },
+                        },
+                        {
+                            "id": "release",
+                            "name": "Release Notes",
+                            "datasets": [{"id": "dataset-3"}],
+                            "query": {"mode": "user_query", "value": "release notes"},
+                            "retrieval": {
+                                "mode": "single",
+                                "model": {
+                                    "provider": "openai",
+                                    "name": "gpt-4o-mini",
+                                    "mode": "chat",
+                                    "completion_params": {"temperature": 0.2},
+                                },
+                            },
+                            "metadata_filtering": {
+                                "mode": "automatic",
+                                "model_config": {
+                                    "provider": "openai",
+                                    "name": "gpt-4o-mini",
+                                    "mode": "chat",
+                                    "completion_params": {},
+                                },
+                            },
+                        },
+                    ],
                },
            }
        ),
@ -531,25 +574,75 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
    knowledge_layer = layers["knowledge"]
    assert knowledge_layer["type"] == "dify.knowledge_base"
    assert knowledge_layer["deps"] == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
-    assert knowledge_layer["config"] == {
-        "dataset_ids": ["dataset-1", "dataset-2"],
-        "retrieval": {
-            "mode": "multiple",
-            "top_k": 6,
-            "score_threshold": 0.4,
-            "reranking_mode": "reranking_model",
-            "reranking_enable": True,
-            "reranking_model": None,
-            "weights": None,
-            "model": None,
+    assert knowledge_layer["config"]["sets"] == [
+        {
+            "id": "support",
+            "name": "Support KB",
+            "description": "Support content",
+            "datasets": [
+                {"id": "dataset-1", "name": None, "description": None},
+                {"id": "dataset-2", "name": None, "description": None},
+            ],
+            "query": {"mode": "generated_query", "value": None},
+            "retrieval": {
+                "mode": "multiple",
+                "top_k": 6,
+                "score_threshold": 0.4,
+                "reranking_mode": "reranking_model",
+                "reranking_enable": True,
+                "reranking_model": {"provider": "cohere", "model": "rerank-v3"},
+                "weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
+                "model": None,
+            },
+            "metadata_filtering": {
+                "mode": "manual",
+                "metadata_model_config": None,
+                "conditions": {
+                    "logical_operator": "and",
+                    "conditions": [
+                        {"name": "category", "comparison_operator": "contains", "value": "auth"}
+                    ],
+                },
+            },
        },
-        "metadata_filtering": {"mode": "disabled", "metadata_model_config": None, "conditions": None},
-        "max_result_content_chars": 2000,
-        "max_observation_chars": 12000,
-    }
+        {
+            "id": "release",
+            "name": "Release Notes",
+            "description": None,
+            "datasets": [{"id": "dataset-3", "name": None, "description": None}],
+            "query": {"mode": "user_query", "value": "release notes"},
+            "retrieval": {
+                "mode": "single",
+                "top_k": None,
+                "score_threshold": 0.0,
+                "reranking_mode": "reranking_model",
+                "reranking_enable": True,
+                "reranking_model": None,
+                "weights": None,
+                "model": {
+                    "provider": "openai",
+                    "name": "gpt-4o-mini",
+                    "mode": "chat",
+                    "completion_params": {"temperature": 0.2},
+                },
+            },
+            "metadata_filtering": {
+                "mode": "automatic",
+                "metadata_model_config": {
+                    "provider": "openai",
+                    "name": "gpt-4o-mini",
+                    "mode": "chat",
+                    "completion_params": {},
+                },
+                "conditions": None,
+            },
+        },
+    ]
+    assert knowledge_layer["config"]["max_result_content_chars"] == 2000
+    assert knowledge_layer["config"]["max_observation_chars"] == 12000


-def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits_it():
+def test_build_knowledge_layer_maps_disabled_score_threshold_to_zero():
    context = _context()
    snapshot = AgentConfigSnapshot(
        id="snapshot-1",
@ -565,8 +658,19 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
                    "model": "gpt-test",
                },
                "knowledge": {
-                    "datasets": [{"id": "dataset-1"}],
-                    "query_config": {},
+                    "sets": [
+                        {
+                            "id": "support",
+                            "name": "Support KB",
+                            "datasets": [{"id": "dataset-1"}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {
+                                "mode": "multiple",
+                                "top_k": 4,
+                                "score_threshold": None,
+                            },
+                        }
+                    ],
                },
            }
        ),
@ -577,10 +681,10 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits

    dumped = result.request.model_dump(mode="json")
    knowledge_layer = next(layer for layer in dumped["composition"]["layers"] if layer["name"] == "knowledge")
-    assert knowledge_layer["config"]["retrieval"]["top_k"] == 4
+    assert knowledge_layer["config"]["sets"][0]["retrieval"]["score_threshold"] == 0.0


-def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
+def test_build_skips_knowledge_layer_when_agent_soul_has_no_sets():
    context = _context()
    snapshot = AgentConfigSnapshot(
        id="snapshot-1",
@ -595,9 +699,7 @@ def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
                    "model_provider": "openai",
                    "model": "gpt-test",
                },
-                "knowledge": {
-                    "datasets": [{"id": "  "}, {}],
-                },
+                "knowledge": {"sets": []},
            }
        ),
    )
@ -1094,7 +1196,15 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
    soul = AgentSoulConfig.model_validate(
        {
            "knowledge": {
-                "datasets": [{"id": "dataset-1", "name": "Product Docs"}],
+                "sets": [
+                    {
+                        "id": "product",
+                        "name": "Product Docs",
+                        "datasets": [{"id": "dataset-1", "name": "Product Docs"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ],
            }
        }
    )
@ -1106,13 +1216,13 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
    assert all("knowledge" not in w["section"] for w in manifest["unsupported_runtime_warnings"])


-def test_feature_manifest_treats_blank_knowledge_dataset_ids_as_not_configured():
+def test_feature_manifest_treats_empty_knowledge_sets_as_not_configured():
    from core.workflow.nodes.agent_v2.runtime_feature_manifest import build_runtime_feature_manifest

    soul = AgentSoulConfig.model_validate(
        {
            "knowledge": {
-                "datasets": [{"id": "  "}, {}],
+                "sets": [],
            }
        }
    )
--- a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py
+++ b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py
@ -55,6 +55,33 @@ def _snapshot() -> AgentConfigSnapshot:
    )


+def _snapshot_with_knowledge_dataset(dataset_id: str) -> AgentConfigSnapshot:
+    return AgentConfigSnapshot(
+        id="snapshot-1",
+        tenant_id="tenant-1",
+        agent_id="agent-1",
+        version=1,
+        config_snapshot=AgentSoulConfig(
+            model=AgentSoulModelConfig(
+                plugin_id="langgenius/openai",
+                model_provider="openai",
+                model="gpt-test",
+            ),
+            knowledge={
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": dataset_id}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ]
+            },
+        ),
+    )
+
+
 def _graph(edges: list[dict]) -> dict:
    return {
        "nodes": [
@ -515,6 +542,35 @@ def test_publish_validation_rejects_missing_file_ref():
        )


+def test_publish_validation_rejects_missing_or_out_of_scope_knowledge_datasets(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    dataset_id = "550e8400-e29b-41d4-a716-446655440000"
+    node_job = WorkflowNodeJobConfig.model_validate({})
+    snapshot = _snapshot_with_knowledge_dataset(dataset_id)
+    session = Mock()
+    session.scalar.side_effect = [_binding(node_job), _agent(), snapshot]
+
+    captured = {}
+
+    def fake_get_datasets_by_ids(ids, tenant_id):
+        captured["ids"] = ids
+        captured["tenant_id"] = tenant_id
+        return [], 0
+
+    import services.dataset_service as dataset_service_module
+
+    monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
+
+    with pytest.raises(WorkflowAgentNodeValidationError, match=dataset_id):
+        WorkflowAgentNodeValidator.validate_published_workflow(
+            session=session,
+            workflow=_workflow(_graph([{"source": "start", "target": "agent-node"}])),
+        )
+
+    assert captured == {"ids": [dataset_id], "tenant_id": "tenant-1"}
+
+
 def test_publish_validation_accepts_tool_node_agentic_manual_mode():
    session = Mock()

--- a/api/tests/unit_tests/services/agent/test_agent_composer_entities.py
+++ b/api/tests/unit_tests/services/agent/test_agent_composer_entities.py
@ -1,4 +1,5 @@
 import pytest
+from pydantic import ValidationError

 from models.agent_config_entities import AgentKnowledgeQueryMode, AgentSoulModelConfig, DeclaredOutputType
 from services.agent.composer_service import AgentComposerService
@ -91,14 +92,144 @@ def test_knowledge_query_mode_uses_stable_backend_enums():
    config = AgentSoulConfig.model_validate(
        {
            "knowledge": {
-                "datasets": [{"dataset_id": "dataset-1"}],
-                "query_mode": "generated_query",
-                "query_config": {"generation_prompt": "Create a retrieval query."},
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ],
            }
        }
    )

-    assert config.knowledge.query_mode == AgentKnowledgeQueryMode.GENERATED_QUERY
+    assert config.knowledge.sets[0].query.mode == AgentKnowledgeQueryMode.GENERATED_QUERY
+
+
+@pytest.mark.parametrize(
+    ("knowledge_payload", "match"),
+    [
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                    {
+                        "id": "support",
+                        "name": "Billing KB",
+                        "datasets": [{"id": "dataset-2"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                ]
+            },
+            "knowledge set ids must be unique",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Shared KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                    {
+                        "id": "billing",
+                        "name": "Shared KB",
+                        "datasets": [{"id": "dataset-2"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                ]
+            },
+            "knowledge set names must be unique",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}, {"id": " dataset-1 "}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                ]
+            },
+            "knowledge set dataset ids must be unique",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "user_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                ]
+            },
+            "knowledge query.value is required for user_query mode",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "single"},
+                    },
+                ]
+            },
+            "knowledge retrieval.model is required for single mode",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                        "metadata_filtering": {"mode": "automatic"},
+                    },
+                ]
+            },
+            "metadata_filtering.model_config is required for automatic mode",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                        "metadata_filtering": {"mode": "manual"},
+                    },
+                ]
+            },
+            "metadata_filtering.conditions is required for manual mode",
+        ),
+    ],
+)
+def test_knowledge_sets_contract_rejects_invalid_configs(knowledge_payload, match: str):
+    with pytest.raises(ValidationError, match=match):
+        AgentSoulConfig.model_validate({"knowledge": knowledge_payload})


 def test_agent_soul_model_config_is_first_class_without_credentials():
--- a/api/tests/unit_tests/services/agent/test_agent_services.py
+++ b/api/tests/unit_tests/services/agent/test_agent_services.py
@ -2594,20 +2594,151 @@ def test_dataset_rows_filters_malformed_ids(monkeypatch: pytest.MonkeyPatch):
        return [], 0

    import services.dataset_service as dataset_service_module
+    from services.agent.knowledge_datasets import get_tenant_knowledge_dataset_rows

    monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)

    valid = "550e8400-e29b-41d4-a716-446655440000"
-    rows = AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
+    rows = get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
    assert rows == {}
    assert captured["ids"] == [valid]

    # all-malformed input never touches the DB
    captured.clear()
-    assert AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
+    assert get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
    assert captured == {}


+@pytest.mark.parametrize(
+    ("variant", "save_call"),
+    [
+        (
+            ComposerVariant.AGENT_APP,
+            lambda payload: AgentComposerService.save_agent_app_composer(
+                tenant_id="tenant-1",
+                app_id="app-1",
+                account_id="account-1",
+                payload=payload,
+            ),
+        ),
+        (
+            ComposerVariant.WORKFLOW,
+            lambda payload: AgentComposerService.save_workflow_composer(
+                tenant_id="tenant-1",
+                app_id="app-1",
+                node_id="node-1",
+                account_id="account-1",
+                payload=payload,
+            ),
+        ),
+    ],
+)
+def test_composer_save_rejects_malformed_knowledge_dataset_ids(monkeypatch: pytest.MonkeyPatch, variant, save_call):
+    captured = {"calls": 0}
+
+    def fake_get_datasets_by_ids(ids, tenant_id):
+        captured["calls"] += 1
+        captured["ids"] = ids
+        captured["tenant_id"] = tenant_id
+        return [], 0
+
+    import services.dataset_service as dataset_service_module
+
+    monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
+
+    payload = ComposerSavePayload.model_validate(
+        {
+            "variant": variant.value,
+            "save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
+            "soul_lock": {"locked": False},
+            "agent_soul": {
+                "knowledge": {
+                    "sets": [
+                        {
+                            "id": "support",
+                            "name": "Support KB",
+                            "datasets": [{"id": "not-a-uuid"}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {"mode": "multiple", "top_k": 4},
+                        }
+                    ]
+                }
+            },
+        }
+    )
+
+    with pytest.raises(InvalidComposerConfigError, match="not-a-uuid"):
+        save_call(payload)
+
+    assert captured == {"calls": 0}
+
+
+@pytest.mark.parametrize(
+    ("variant", "save_call"),
+    [
+        (
+            ComposerVariant.AGENT_APP,
+            lambda payload: AgentComposerService.save_agent_app_composer(
+                tenant_id="tenant-1",
+                app_id="app-1",
+                account_id="account-1",
+                payload=payload,
+            ),
+        ),
+        (
+            ComposerVariant.WORKFLOW,
+            lambda payload: AgentComposerService.save_workflow_composer(
+                tenant_id="tenant-1",
+                app_id="app-1",
+                node_id="node-1",
+                account_id="account-1",
+                payload=payload,
+            ),
+        ),
+    ],
+)
+def test_composer_save_rejects_missing_or_out_of_scope_knowledge_datasets(
+    monkeypatch: pytest.MonkeyPatch, variant, save_call
+):
+    captured = {}
+    missing_dataset_id = "550e8400-e29b-41d4-a716-446655440000"
+
+    def fake_get_datasets_by_ids(ids, tenant_id):
+        captured["ids"] = ids
+        captured["tenant_id"] = tenant_id
+        return [], 0
+
+    import services.dataset_service as dataset_service_module
+
+    monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
+
+    payload = ComposerSavePayload.model_validate(
+        {
+            "variant": variant.value,
+            "save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
+            "soul_lock": {"locked": False},
+            "agent_soul": {
+                "knowledge": {
+                    "sets": [
+                        {
+                            "id": "support",
+                            "name": "Support KB",
+                            "datasets": [{"id": missing_dataset_id}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {"mode": "multiple", "top_k": 4},
+                        }
+                    ]
+                }
+            },
+        }
+    )
+
+    with pytest.raises(InvalidComposerConfigError, match=missing_dataset_id):
+        save_call(payload)
+
+    assert captured == {"ids": [missing_dataset_id], "tenant_id": "tenant-1"}
+
+
 def test_workspace_dify_tools_returns_provider_and_tool_granularities(monkeypatch: pytest.MonkeyPatch):
    """The slash-menu Tools tab needs both selection granularities: a provider
    hosts many tools (like an MCP server), so candidates return one
--- a/api/tests/unit_tests/services/agent/test_composer_candidates.py
+++ b/api/tests/unit_tests/services/agent/test_composer_candidates.py
@ -124,7 +124,18 @@ def _soul() -> AgentSoulConfig:
                    {"id": "ct-2", "name": "disabled-one", "enabled": False},
                ],
            },
-            "knowledge": {"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}]},
+            "knowledge": {
+                "sets": [
+                    {
+                        "id": "kb-1",
+                        "name": "产品知识",
+                        "description": "knowledge set",
+                        "datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ]
+            },
            "human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
        }
    )
@ -143,12 +154,16 @@ def test_soul_candidates_lists_configured_items_only():
    assert [item["name"] for item in lists["cli_tools"]] == ["ffmpeg"]
    # the stable mention id flows through so the frontend can mint [§cli_tool:<id>§]
    assert [item["id"] for item in lists["cli_tools"]] == ["ct-1"]
-    # enriched from DB; dangling dataset kept with missing flag (placeholder, 0522)
-    knowledge = {item["id"]: item for item in lists["knowledge_datasets"]}
-    assert knowledge["ds-1"]["name"] == "产品手册"
-    assert knowledge["ds-1"]["missing"] is False
-    assert knowledge["ds-gone"]["missing"] is True
-    assert knowledge["ds-gone"]["name"] == "已删"
+    # Knowledge mentions point at set ids; nested datasets are hydrated for context.
+    knowledge_set = lists["knowledge_sets"][0]
+    assert knowledge_set["id"] == "kb-1"
+    assert knowledge_set["name"] == "产品知识"
+    assert knowledge_set["missing_dataset_ids"] == ["ds-gone"]
+    datasets = {item["id"]: item for item in knowledge_set["datasets"]}
+    assert datasets["ds-1"]["name"] == "产品手册"
+    assert datasets["ds-1"]["missing"] is False
+    assert datasets["ds-gone"]["missing"] is True
+    assert datasets["ds-gone"]["name"] == "已删"
    assert lists["human_contacts"][0]["id"] == "c-1"
    assert lists["dify_tools"][0]["id"] == "tavily/tavily_search"

--- a/api/tests/unit_tests/services/agent/test_composer_mention_validation.py
+++ b/api/tests/unit_tests/services/agent/test_composer_mention_validation.py
@ -149,22 +149,32 @@ def test_dangling_knowledge_without_label_gets_fallback_name():
    ]


-def test_configured_but_deleted_dataset_surfaces_as_placeholder():
+def test_configured_but_deleted_knowledge_set_surfaces_as_placeholder():
    payload = ComposerSavePayload.model_validate(
        {
            "variant": "agent_app",
            "agent_soul": {
-                "prompt": {"system_prompt": "see [§knowledge:ds-1:产品手册§]"},
-                "knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
+                "prompt": {"system_prompt": "see [§knowledge:kb-1:产品手册§]"},
+                "knowledge": {
+                    "sets": [
+                        {
+                            "id": "kb-1",
+                            "name": "产品手册",
+                            "datasets": [{"id": "ds-1", "name": "产品手册"}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {"mode": "multiple", "top_k": 4},
+                        }
+                    ]
+                },
            },
            "save_strategy": "save_to_current_version",
        }
    )
-    # configured + DB row exists -> clean
-    assert _findings(payload, existing_dataset_ids={"ds-1"})["knowledge_retrieval_placeholder"] == []
-    # configured but deleted in DB -> placeholder
-    assert _findings(payload, existing_dataset_ids=set())["knowledge_retrieval_placeholder"] == [
-        {"id": "ds-1", "placeholder_name": "产品手册"}
+    # configured + current Agent Soul row exists -> clean
+    assert _findings(payload, existing_knowledge_set_ids={"kb-1"})["knowledge_retrieval_placeholder"] == []
+    # configured but removed from the current Agent Soul surface -> placeholder
+    assert _findings(payload, existing_knowledge_set_ids=set())["knowledge_retrieval_placeholder"] == [
+        {"id": "kb-1", "placeholder_name": "产品手册"}
    ]


--- a/api/tests/unit_tests/services/agent/test_prompt_mentions.py
+++ b/api/tests/unit_tests/services/agent/test_prompt_mentions.py
@ -107,7 +107,17 @@ def soul() -> AgentSoulConfig:
                ],
                "cli_tools": [{"id": "ct-1", "name": "ffmpeg"}],
            },
-            "knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
+            "knowledge": {
+                "sets": [
+                    {
+                        "id": "kb-1",
+                        "name": "产品手册",
+                        "datasets": [{"id": "ds-1", "name": "产品手册"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ]
+            },
            "human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
        }
    )
@ -117,7 +127,7 @@ def test_soul_resolver_resolves_each_kind(soul: AgentSoulConfig):
    resolver = build_soul_mention_resolver(soul)
    prompt = (
        "Use [§tool:tavily/tavily_search:tavily§], run [§cli_tool:ct-1:ffmpeg§], "
-        "ground in [§knowledge:ds-1§], ask [§human:c-1§]."
+        "ground in [§knowledge:kb-1§], ask [§human:c-1§]."
    )

    expanded = expand_prompt_mentions(prompt, resolver)
--- a/dify-agent/src/dify_agent/layers/knowledge/init.py
+++ b/dify-agent/src/dify_agent/layers/knowledge/init.py
@ -7,21 +7,31 @@ root stays import-safe for callers that only need to construct run requests.
 from dify_agent.layers.knowledge.configs import (
    DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
    DifyKnowledgeBaseLayerConfig,
+    DifyKnowledgeDatasetConfig,
+    DifyKnowledgeEagerResult,
    DifyKnowledgeMetadataCondition,
    DifyKnowledgeMetadataConditions,
    DifyKnowledgeMetadataFilteringConfig,
    DifyKnowledgeModelConfig,
+    DifyKnowledgeQueryConfig,
    DifyKnowledgeRerankingModelConfig,
    DifyKnowledgeRetrievalConfig,
+    DifyKnowledgeRuntimeState,
+    DifyKnowledgeSetConfig,
 )

 __all__ = [
    "DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
    "DifyKnowledgeBaseLayerConfig",
+    "DifyKnowledgeDatasetConfig",
+    "DifyKnowledgeEagerResult",
    "DifyKnowledgeMetadataCondition",
    "DifyKnowledgeMetadataConditions",
    "DifyKnowledgeMetadataFilteringConfig",
    "DifyKnowledgeModelConfig",
+    "DifyKnowledgeQueryConfig",
    "DifyKnowledgeRerankingModelConfig",
    "DifyKnowledgeRetrievalConfig",
+    "DifyKnowledgeRuntimeState",
+    "DifyKnowledgeSetConfig",
 ]
--- a/dify-agent/src/dify_agent/layers/knowledge/configs.py
+++ b/dify-agent/src/dify_agent/layers/knowledge/configs.py
@ -1,12 +1,11 @@
 """Client-safe DTOs for the Dify knowledge-base Agenton layer.

-The public layer config exposes only static retrieval controls: dataset ids,
-retrieval strategy, metadata filtering, and observation-size limits. The agent
-model itself should only ever see a single ``query`` tool argument; tenant/
-app/user context comes from the execution-context layer and the actual
-retrieval is delegated to the Dify API inner endpoint. Tool naming is not
-caller-configurable: the runtime always exposes the same stable knowledge-base
-search tool.
+The public layer config carries one or more named knowledge sets. Each set owns
+its dataset ids plus query, retrieval, and metadata-filtering policy. Generated-
+query sets are exposed through one stable model-visible search tool whose
+schema lets the model pick ``set_name`` and ``query``; user-query sets are
+retrieved eagerly when the layer enters a run and their formatted observations
+are kept only in JSON-safe ``runtime_state`` for session snapshots.
 """

 from __future__ import annotations
@ -61,6 +60,44 @@ class DifyKnowledgeRerankingModelConfig(BaseModel):
    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")


+class DifyKnowledgeDatasetConfig(BaseModel):
+    """One dataset selected by a knowledge set.
+
+    Only ``id`` is used for retrieval. ``name`` and ``description`` are retained
+    because callers already have them and they are useful in runtime/debug
+    snapshots without changing the inner retrieval request contract.
+    """
+
+    id: str
+    name: str | None = None
+    description: str | None = None
+
+    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+    @field_validator("id")
+    @classmethod
+    def validate_id(cls, value: str) -> str:
+        normalized = value.strip()
+        if not normalized:
+            raise ValueError("dataset id must not be blank")
+        return normalized
+
+
+class DifyKnowledgeQueryConfig(BaseModel):
+    """Query policy for one knowledge set."""
+
+    mode: Literal["user_query", "generated_query"]
+    value: str | None = None
+
+    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+    @model_validator(mode="after")
+    def validate_mode_specific_fields(self) -> DifyKnowledgeQueryConfig:
+        if self.mode == "user_query" and not (self.value or "").strip():
+            raise ValueError("query.value is required for user_query mode")
+        return self
+
+
 class DifyKnowledgeRetrievalConfig(BaseModel):
    """Static retrieval controls mirrored into the inner API request."""

@ -151,38 +188,90 @@ class DifyKnowledgeMetadataFilteringConfig(BaseModel):
        return payload


-class DifyKnowledgeBaseLayerConfig(LayerConfig):
-    """Public config for one model-visible knowledge search tool.
+class DifyKnowledgeSetConfig(BaseModel):
+    """One independently searchable or eagerly-preloaded knowledge set."""

-    The model only gets to choose whether to call the tool and what ``query``
-    to send. Dataset ids, retrieval settings, metadata filtering, and caller
-    context remain config/runtime concerns outside the model-visible tool
-    schema. The tool name and description are fixed by the layer runtime and do
-    not appear in the public config DTO.
-    """
-
-    dataset_ids: list[str]
+    id: str
+    name: str
+    description: str | None = None
+    datasets: list[DifyKnowledgeDatasetConfig]
+    query: DifyKnowledgeQueryConfig
    retrieval: DifyKnowledgeRetrievalConfig
    metadata_filtering: DifyKnowledgeMetadataFilteringConfig = Field(
        default_factory=DifyKnowledgeMetadataFilteringConfig
    )
+
+    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+    @field_validator("id", "name")
+    @classmethod
+    def validate_non_blank_identity(cls, value: str) -> str:
+        normalized = value.strip()
+        if not normalized:
+            raise ValueError("knowledge set id and name must not be blank")
+        return normalized
+
+    @model_validator(mode="after")
+    def validate_dataset_ids(self) -> DifyKnowledgeSetConfig:
+        if not self.datasets:
+            raise ValueError("knowledge set requires at least one dataset")
+        dataset_ids = [dataset.id for dataset in self.datasets]
+        if len(dataset_ids) != len(set(dataset_ids)):
+            raise ValueError("knowledge set dataset ids must be unique")
+        return self
+
+    @property
+    def dataset_ids(self) -> list[str]:
+        """Return the selected dataset ids for the inner retrieval request."""
+        return [dataset.id for dataset in self.datasets]
+
+
+class DifyKnowledgeEagerResult(BaseModel):
+    """JSON-safe eager user-query result stored in layer runtime state."""
+
+    set_id: str
+    set_name: str
+    query: str
+    observation: str
+    status: Literal["success", "empty", "temporarily_unavailable"]
+
+    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+
+class DifyKnowledgeRuntimeState(BaseModel):
+    """Serializable eager-retrieval state stored in Agenton session snapshots."""
+
+    eager_config_fingerprint: str | None = None
+    eager_results: list[DifyKnowledgeEagerResult] = Field(default_factory=list)
+
+    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", validate_assignment=True)
+
+
+class DifyKnowledgeBaseLayerConfig(LayerConfig):
+    """Public config for one knowledge-base layer.
+
+    The model-visible surface stays fixed to ``knowledge_base_search``. Set
+    names are the only model-visible selection labels; dataset ids, retrieval
+    controls, metadata filtering, and caller identity remain config/runtime
+    concerns outside the tool schema.
+    """
+
+    sets: list[DifyKnowledgeSetConfig]
    max_result_content_chars: int = Field(default=2000, ge=1)
    max_observation_chars: int = Field(default=12000, ge=1)

    model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")

-    @field_validator("dataset_ids")
-    @classmethod
-    def validate_dataset_ids(cls, value: list[str]) -> list[str]:
-        if not value:
-            raise ValueError("dataset_ids must contain at least one item")
-        normalized_ids = [item.strip() for item in value]
-        if any(not item for item in normalized_ids):
-            raise ValueError("dataset_ids must not contain blank items")
-        return normalized_ids
-
    @model_validator(mode="after")
-    def validate_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
+    def validate_sets_and_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
+        if not self.sets:
+            raise ValueError("sets must contain at least one knowledge set")
+        set_ids = [knowledge_set.id for knowledge_set in self.sets]
+        if len(set_ids) != len(set(set_ids)):
+            raise ValueError("knowledge set ids must be unique")
+        normalized_names = [knowledge_set.name.strip().lower() for knowledge_set in self.sets]
+        if len(normalized_names) != len(set(normalized_names)):
+            raise ValueError("knowledge set names must be unique")
        if self.max_observation_chars < self.max_result_content_chars:
            raise ValueError("max_observation_chars must be greater than or equal to max_result_content_chars")
        return self
@ -191,10 +280,15 @@ class DifyKnowledgeBaseLayerConfig(LayerConfig):
 __all__ = [
    "DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
    "DifyKnowledgeBaseLayerConfig",
+    "DifyKnowledgeDatasetConfig",
+    "DifyKnowledgeEagerResult",
    "DifyKnowledgeMetadataCondition",
    "DifyKnowledgeMetadataConditions",
    "DifyKnowledgeMetadataFilteringConfig",
    "DifyKnowledgeModelConfig",
+    "DifyKnowledgeQueryConfig",
    "DifyKnowledgeRerankingModelConfig",
    "DifyKnowledgeRetrievalConfig",
+    "DifyKnowledgeRuntimeState",
+    "DifyKnowledgeSetConfig",
 ]
--- a/dify-agent/src/dify_agent/layers/knowledge/layer.py
+++ b/dify-agent/src/dify_agent/layers/knowledge/layer.py
@ -1,17 +1,18 @@
-"""Dify knowledge-base layer exposing one model-visible search tool.
+"""Dify knowledge-base layer exposing set-aware retrieval.

 The layer depends on ``DifyExecutionContextLayer`` for tenant/app/user/invoke
-identity, keeps retrieval controls in config only, and borrows a lifespan-owned
-HTTP client for each tool invocation. It never owns live clients or stores
-retrieved source content in layer state. Tool identity is intentionally fixed at
-runtime: callers cannot rename the knowledge tool or override its description
-through public layer config because the model-visible surface must stay stable
-across API-side Agent Soul mappings.
+identity. Generated-query sets become one stable model-visible
+``knowledge_base_search(set_name, query)`` tool, while user-query sets are
+retrieved eagerly during context entry and exposed as additional user prompt
+content. Eager observations are persisted only as JSON-safe runtime state so
+Agenton session snapshots can resume without repeating unchanged retrievals.
 """

 from __future__ import annotations

 from dataclasses import dataclass
+import hashlib
+import json
 import logging
 from typing import ClassVar, cast

@ -27,7 +28,13 @@ from dify_agent.layers.knowledge.client import (
    DifyKnowledgeBaseClientError,
    DifyKnowledgeRetrieveResponse,
 )
-from dify_agent.layers.knowledge.configs import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
+from dify_agent.layers.knowledge.configs import (
+    DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
+    DifyKnowledgeBaseLayerConfig,
+    DifyKnowledgeEagerResult,
+    DifyKnowledgeRuntimeState,
+    DifyKnowledgeSetConfig,
+)

 logger = logging.getLogger(__name__)

@ -35,23 +42,14 @@ logger = logging.getLogger(__name__)
 # public DTO cannot grow a parallel naming contract that diverges from the
 # runtime knowledge-search surface.
 _KNOWLEDGE_BASE_TOOL_NAME = "knowledge_base_search"
-_KNOWLEDGE_BASE_TOOL_DESCRIPTION = "Search configured knowledge bases for information relevant to the query."
+_KNOWLEDGE_BASE_TOOL_DESCRIPTION = (
+    "Search a configured knowledge set. Pick one configured set_name and provide a focused search query."
+)
 BLANK_QUERY_OBSERVATION = "knowledge base search requires a non-empty query"
 NO_RESULTS_OBSERVATION = "No relevant knowledge base results were found."
 TEMPORARY_UNAVAILABLE_OBSERVATION = (
    "Knowledge base search is temporarily unavailable. Please continue without it if possible."
 )
-QUERY_TOOL_SCHEMA = {
-    "type": "object",
-    "properties": {
-        "query": {
-            "type": "string",
-            "description": "Search query for the configured knowledge bases.",
-        }
-    },
-    "required": ["query"],
-    "additionalProperties": False,
-}


 class DifyKnowledgeBaseDeps(LayerDeps):
@ -61,8 +59,10 @@ class DifyKnowledgeBaseDeps(LayerDeps):


@dataclass(slots=True)
-class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig]):
-    """Layer that resolves one config-scoped knowledge search tool."""
+class DifyKnowledgeBaseLayer(
+    PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig, DifyKnowledgeRuntimeState]
+):
+    """Layer that resolves set-scoped knowledge tools and eager user prompts."""

    type_id: ClassVar[str | None] = DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID

@ -95,7 +95,7 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
        )

    async def get_tools(self, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
-        """Build one Pydantic AI tool that exposes only ``query`` to the model.
+        """Build the unified generated-query Pydantic AI tool, when needed.

        Knowledge tools depend on execution-context identity that is optional for
        other run types but mandatory here: ``tenant_id``, ``user_id``,
@ -103,11 +103,15 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
        any HTTP request is attempted. Tool execution then follows a strict
        observation policy:

+        - unknown ``set_name`` returns a local validation observation;
        - blank ``query`` returns a local validation observation;
        - retryable client failures (timeouts, connection failures, HTTP
          ``429``/``502``) become a temporary-unavailable observation;
        - non-retryable client failures are raised so the run fails fast.
        """
+        generated_sets = self._generated_query_sets()
+        if not generated_sets:
+            return []
        if http_client.is_closed:
            raise RuntimeError("DifyKnowledgeBaseLayer.get_tools() requires an open shared HTTP client.")

@ -118,54 +122,28 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
            api_key=self.inner_api_key,
            http_client=http_client,
        )
+        set_by_name = {knowledge_set.name: knowledge_set for knowledge_set in generated_sets}

-        async def knowledge_base_search(_ctx: RunContext[object], query: str) -> str:
+        async def knowledge_base_search(_ctx: RunContext[object], set_name: str, query: str) -> str:
+            knowledge_set = set_by_name.get(set_name)
+            if knowledge_set is None:
+                return f"unknown knowledge set: {set_name}"
            normalized_query = query.strip()
            if not normalized_query:
                return BLANK_QUERY_OBSERVATION
-            try:
-                response = await client.retrieve(
-                    tenant_id=caller["tenant_id"],
-                    user_id=caller["user_id"],
-                    app_id=caller["app_id"],
-                    user_from=caller["user_from"],
-                    invoke_from=caller["invoke_from"],
-                    dataset_ids=list(self.config.dataset_ids),
-                    query=normalized_query,
-                    retrieval=self.config.retrieval,
-                    metadata_filtering=self.config.metadata_filtering,
-                )
-            except DifyKnowledgeBaseClientError as exc:
-                if exc.retryable:
-                    logger.warning(
-                        "knowledge base search temporarily unavailable",
-                        extra={
-                            "tenant_id": caller["tenant_id"],
-                            "app_id": caller["app_id"],
-                            "invoke_from": caller["invoke_from"],
-                            "error_code": exc.error_code,
-                            "status_code": exc.status_code,
-                        },
-                    )
-                    return TEMPORARY_UNAVAILABLE_OBSERVATION
-                logger.error(
-                    "knowledge base search failed",
-                    extra={
-                        "tenant_id": caller["tenant_id"],
-                        "app_id": caller["app_id"],
-                        "invoke_from": caller["invoke_from"],
-                        "error_code": exc.error_code,
-                        "status_code": exc.status_code,
-                    },
-                )
-                raise
-            return _format_observation(response, self.config)
+            return await self._retrieve_for_set(
+                client=client,
+                caller=caller,
+                knowledge_set=knowledge_set,
+                query=normalized_query,
+                retryable_observation=True,
+            )

        async def prepare_tool_definition(_ctx: RunContext[object], tool_def: ToolDefinition) -> ToolDefinition:
            return ToolDefinition(
                name=tool_def.name,
                description=tool_def.description,
-                parameters_json_schema=QUERY_TOOL_SCHEMA,
+                parameters_json_schema=_tool_schema(generated_sets),
                strict=tool_def.strict,
                sequential=tool_def.sequential,
                metadata=tool_def.metadata,
@ -181,11 +159,177 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
                knowledge_base_search,
                takes_ctx=True,
                name=_KNOWLEDGE_BASE_TOOL_NAME,
-                description=_KNOWLEDGE_BASE_TOOL_DESCRIPTION,
+                description=_tool_description(generated_sets),
                prepare=prepare_tool_definition,
            )
        ]

+    @property
+    @override
+    def user_prompts(self) -> list[str]:
+        """Expose eager user-query results as an additional user prompt."""
+        if not self.runtime_state.eager_results:
+            return []
+
+        sections: list[str] = []
+        for result in self.runtime_state.eager_results:
+            sections.append(
+                "\n".join(
+                    [
+                        f"Set: {result.set_name}",
+                        f"Query: {result.query}",
+                        "Results:",
+                        result.observation,
+                    ]
+                )
+            )
+        return ["Knowledge retrieval results:\n\n" + "\n\n".join(sections)]
+
+    @override
+    async def on_context_create(self) -> None:
+        await self._refresh_eager_results_if_needed()
+
+    @override
+    async def on_context_resume(self) -> None:
+        await self._refresh_eager_results_if_needed()
+
+    def _generated_query_sets(self) -> list[DifyKnowledgeSetConfig]:
+        return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "generated_query"]
+
+    def _user_query_sets(self) -> list[DifyKnowledgeSetConfig]:
+        return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "user_query"]
+
+    async def _refresh_eager_results_if_needed(self) -> None:
+        user_query_sets = self._user_query_sets()
+        if not user_query_sets:
+            self.runtime_state.eager_config_fingerprint = None
+            self.runtime_state.eager_results = []
+            return
+
+        fingerprint = _eager_config_fingerprint(user_query_sets)
+        if self.runtime_state.eager_config_fingerprint == fingerprint:
+            return
+
+        caller = _build_caller_context(self.deps.execution_context.config)
+        async with httpx.AsyncClient() as http_client:
+            client = DifyKnowledgeBaseClient(
+                base_url=self.inner_api_url,
+                api_key=self.inner_api_key,
+                http_client=http_client,
+            )
+            eager_results: list[DifyKnowledgeEagerResult] = []
+            for knowledge_set in user_query_sets:
+                query = (knowledge_set.query.value or "").strip()
+                try:
+                    response = await client.retrieve(
+                        tenant_id=caller["tenant_id"],
+                        user_id=caller["user_id"],
+                        app_id=caller["app_id"],
+                        user_from=caller["user_from"],
+                        invoke_from=caller["invoke_from"],
+                        dataset_ids=knowledge_set.dataset_ids,
+                        query=query,
+                        retrieval=knowledge_set.retrieval,
+                        metadata_filtering=knowledge_set.metadata_filtering,
+                    )
+                except DifyKnowledgeBaseClientError as exc:
+                    if exc.retryable:
+                        logger.warning(
+                            "eager knowledge retrieval temporarily unavailable",
+                            extra={
+                                "tenant_id": caller["tenant_id"],
+                                "app_id": caller["app_id"],
+                                "invoke_from": caller["invoke_from"],
+                                "knowledge_set_id": knowledge_set.id,
+                                "error_code": exc.error_code,
+                                "status_code": exc.status_code,
+                            },
+                        )
+                        eager_results.append(
+                            DifyKnowledgeEagerResult(
+                                set_id=knowledge_set.id,
+                                set_name=knowledge_set.name,
+                                query=query,
+                                observation=TEMPORARY_UNAVAILABLE_OBSERVATION,
+                                status="temporarily_unavailable",
+                            )
+                        )
+                        continue
+                    logger.error(
+                        "eager knowledge retrieval failed",
+                        extra={
+                            "tenant_id": caller["tenant_id"],
+                            "app_id": caller["app_id"],
+                            "invoke_from": caller["invoke_from"],
+                            "knowledge_set_id": knowledge_set.id,
+                            "error_code": exc.error_code,
+                            "status_code": exc.status_code,
+                        },
+                    )
+                    raise
+
+                eager_results.append(
+                    DifyKnowledgeEagerResult(
+                        set_id=knowledge_set.id,
+                        set_name=knowledge_set.name,
+                        query=query,
+                        observation=_format_observation(response, self.config, include_heading=False),
+                        status="success" if response.results else "empty",
+                    )
+                )
+
+        self.runtime_state.eager_results = eager_results
+        self.runtime_state.eager_config_fingerprint = fingerprint
+
+    async def _retrieve_for_set(
+        self,
+        *,
+        client: DifyKnowledgeBaseClient,
+        caller: dict[str, str],
+        knowledge_set: DifyKnowledgeSetConfig,
+        query: str,
+        retryable_observation: bool,
+    ) -> str:
+        try:
+            response = await client.retrieve(
+                tenant_id=caller["tenant_id"],
+                user_id=caller["user_id"],
+                app_id=caller["app_id"],
+                user_from=caller["user_from"],
+                invoke_from=caller["invoke_from"],
+                dataset_ids=knowledge_set.dataset_ids,
+                query=query,
+                retrieval=knowledge_set.retrieval,
+                metadata_filtering=knowledge_set.metadata_filtering,
+            )
+        except DifyKnowledgeBaseClientError as exc:
+            if exc.retryable and retryable_observation:
+                logger.warning(
+                    "knowledge base search temporarily unavailable",
+                    extra={
+                        "tenant_id": caller["tenant_id"],
+                        "app_id": caller["app_id"],
+                        "invoke_from": caller["invoke_from"],
+                        "knowledge_set_id": knowledge_set.id,
+                        "error_code": exc.error_code,
+                        "status_code": exc.status_code,
+                    },
+                )
+                return TEMPORARY_UNAVAILABLE_OBSERVATION
+            logger.error(
+                "knowledge base search failed",
+                extra={
+                    "tenant_id": caller["tenant_id"],
+                    "app_id": caller["app_id"],
+                    "invoke_from": caller["invoke_from"],
+                    "knowledge_set_id": knowledge_set.id,
+                    "error_code": exc.error_code,
+                    "status_code": exc.status_code,
+                },
+            )
+            raise
+        return _format_observation(response, self.config)
+

 def _build_caller_context(execution_context: object) -> dict[str, str]:
    """Extract the inner-API caller identity from execution-context config.
@ -232,7 +376,56 @@ def _build_caller_context(execution_context: object) -> dict[str, str]:
    }


-def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKnowledgeBaseLayerConfig) -> str:
+def _tool_schema(generated_sets: list[DifyKnowledgeSetConfig]) -> dict[str, object]:
+    return {
+        "type": "object",
+        "properties": {
+            "set_name": {
+                "type": "string",
+                "enum": [knowledge_set.name for knowledge_set in generated_sets],
+                "description": "Knowledge set to search.",
+            },
+            "query": {
+                "type": "string",
+                "description": "Search query for the selected knowledge set.",
+            },
+        },
+        "required": ["set_name", "query"],
+        "additionalProperties": False,
+    }
+
+
+def _tool_description(generated_sets: list[DifyKnowledgeSetConfig]) -> str:
+    set_descriptions = []
+    for knowledge_set in generated_sets:
+        if knowledge_set.description:
+            set_descriptions.append(f"{knowledge_set.name}: {knowledge_set.description}")
+        else:
+            set_descriptions.append(knowledge_set.name)
+    return f"{_KNOWLEDGE_BASE_TOOL_DESCRIPTION} Configured sets: {', '.join(set_descriptions)}."
+
+
+def _eager_config_fingerprint(user_query_sets: list[DifyKnowledgeSetConfig]) -> str:
+    payload = [
+        {
+            "id": knowledge_set.id,
+            "query": knowledge_set.query.model_dump(mode="json"),
+            "dataset_ids": knowledge_set.dataset_ids,
+            "retrieval": knowledge_set.retrieval.model_dump(mode="json"),
+            "metadata_filtering": knowledge_set.metadata_filtering.model_dump(mode="json", by_alias=True),
+        }
+        for knowledge_set in user_query_sets
+    ]
+    serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
+
+
+def _format_observation(
+    response: DifyKnowledgeRetrieveResponse,
+    config: DifyKnowledgeBaseLayerConfig,
+    *,
+    include_heading: bool = True,
+) -> str:
    """Render inner-API retrieval results into the model-visible tool response.

    The formatting contract is intentionally simple and stable for the model:
@ -248,7 +441,7 @@ def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKno
    if not response.results:
        return NO_RESULTS_OBSERVATION

-    lines = ["Knowledge base search results:"]
+    lines = ["Knowledge base search results:"] if include_heading else []
    for index, result in enumerate(response.results, start=1):
        metadata = result.metadata
        title = result.title or metadata.document_name or "Untitled"
@ -280,6 +473,5 @@ __all__ = [
    "DifyKnowledgeBaseDeps",
    "DifyKnowledgeBaseLayer",
    "NO_RESULTS_OBSERVATION",
-    "QUERY_TOOL_SCHEMA",
    "TEMPORARY_UNAVAILABLE_OBSERVATION",
 ]
--- a/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py
+++ b/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py
@ -6,46 +6,142 @@ from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig

 def _valid_config() -> dict[str, object]:
    return {
-        "dataset_ids": ["dataset-1"],
-        "retrieval": {
-            "mode": "multiple",
-            "top_k": 4,
-        },
+        "sets": [
+            {
+                "id": "support",
+                "name": "Support KB",
+                "datasets": [{"id": "dataset-1"}],
+                "query": {"mode": "generated_query"},
+                "retrieval": {
+                    "mode": "multiple",
+                    "top_k": 4,
+                },
+            }
+        ],
    }


 def test_knowledge_base_config_accepts_valid_multiple_mode() -> None:
    config = DifyKnowledgeBaseLayerConfig.model_validate(_valid_config())

-    assert config.dataset_ids == ["dataset-1"]
-    assert config.retrieval.top_k == 4
-    assert config.metadata_filtering.mode == "disabled"
+    assert config.sets[0].dataset_ids == ["dataset-1"]
+    assert config.sets[0].retrieval.top_k == 4
+    assert config.sets[0].metadata_filtering.mode == "disabled"


@pytest.mark.parametrize(
    "payload, expected_message",
    [
-        ({"dataset_ids": [], "retrieval": {"mode": "multiple", "top_k": 4}}, "dataset_ids"),
+        ({"sets": []}, "sets"),
        ({"tool_name": "knowledge_base_search", **_valid_config()}, "Extra inputs are not permitted"),
        ({"tool_description": "Search knowledge", **_valid_config()}, "Extra inputs are not permitted"),
-        ({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "multiple"}}, "top_k"),
-        ({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "single"}}, "retrieval.model"),
        (
            {
-                "dataset_ids": ["dataset-1"],
-                "retrieval": {"mode": "multiple", "top_k": 4},
-                "metadata_filtering": {"mode": "automatic"},
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": ""}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ]
+            },
+            "dataset id",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "user_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    }
+                ]
+            },
+            "query.value",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple"},
+                    }
+                ]
+            },
+            "top_k",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "single"},
+                    }
+                ]
+            },
+            "retrieval.model",
+        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                        "metadata_filtering": {"mode": "automatic"},
+                    }
+                ],
            },
            "metadata_filtering.model_config",
        ),
        (
            {
-                "dataset_ids": ["dataset-1"],
-                "retrieval": {"mode": "multiple", "top_k": 4},
-                "metadata_filtering": {"mode": "manual"},
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                        "metadata_filtering": {"mode": "manual"},
+                    }
+                ],
            },
            "metadata_filtering.conditions",
        ),
+        (
+            {
+                "sets": [
+                    {
+                        "id": "support",
+                        "name": "Support KB",
+                        "datasets": [{"id": "dataset-1"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                    {
+                        "id": "docs",
+                        "name": "support kb",
+                        "datasets": [{"id": "dataset-2"}],
+                        "query": {"mode": "generated_query"},
+                        "retrieval": {"mode": "multiple", "top_k": 4},
+                    },
+                ]
+            },
+            "names must be unique",
+        ),
    ],
 )
 def test_knowledge_base_config_rejects_invalid_inputs(payload: dict[str, object], expected_message: str) -> None:
@ -57,8 +153,7 @@ def test_knowledge_base_config_rejects_observation_limit_smaller_than_result_lim
    with pytest.raises(ValidationError, match="max_observation_chars"):
        _ = DifyKnowledgeBaseLayerConfig.model_validate(
            {
-                "dataset_ids": ["dataset-1"],
-                "retrieval": {"mode": "multiple", "top_k": 4},
+                **_valid_config(),
                "max_result_content_chars": 50,
                "max_observation_chars": 20,
            }
--- a/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py
+++ b/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py
@ -8,7 +8,11 @@ from pydantic_ai import Tool
 from agenton.compositor import Compositor, LayerNode, LayerProvider
 from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig
 from dify_agent.layers.execution_context.layer import DifyExecutionContextLayer
-from dify_agent.layers.knowledge.client import DifyKnowledgeBaseClientError
+from dify_agent.layers.knowledge.client import (
+    DifyKnowledgeBaseClient,
+    DifyKnowledgeBaseClientError,
+    DifyKnowledgeRetrieveResponse,
+)
 from dify_agent.layers.knowledge.configs import DifyKnowledgeBaseLayerConfig
 from dify_agent.layers.knowledge.layer import (
    BLANK_QUERY_OBSERVATION,
@ -32,10 +36,23 @@ def _execution_context_config(**overrides: object) -> DifyExecutionContextLayerC


 def _knowledge_config(**overrides: object) -> DifyKnowledgeBaseLayerConfig:
-    payload: dict[str, object] = {
-        "dataset_ids": ["dataset-1"],
+    set_payload: dict[str, object] = {
+        "id": "support",
+        "name": "Support KB",
+        "datasets": [{"id": "dataset-1"}],
+        "query": {"mode": "generated_query"},
        "retrieval": {"mode": "multiple", "top_k": 4},
    }
+    for key in ("id", "name", "description", "datasets", "query", "retrieval", "metadata_filtering"):
+        if key in overrides:
+            set_payload[key] = overrides.pop(key)
+    if "dataset_ids" in overrides:
+        dataset_ids = overrides.pop("dataset_ids")
+        assert isinstance(dataset_ids, list)
+        set_payload["datasets"] = [{"id": dataset_id} for dataset_id in dataset_ids]
+    payload: dict[str, object] = {
+        "sets": [set_payload],
+    }
    payload.update(overrides)
    return DifyKnowledgeBaseLayerConfig.model_validate(payload)

@ -62,7 +79,7 @@ def _knowledge_provider() -> LayerProvider[DifyKnowledgeBaseLayer]:
    )


-def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
+def test_knowledge_layer_exposes_one_set_scoped_tool_definition() -> None:
    async def scenario() -> None:
        compositor = Compositor(
            [
@ -82,20 +99,23 @@ def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
                tool_def = await tool.prepare_tool_def(None)  # pyright: ignore[reportArgumentType]
                assert isinstance(tool, Tool)
                assert tool.name == "knowledge_base_search"
-                assert tool.description == "Search configured knowledge bases for information relevant to the query."
+                assert "Pick one configured set_name" in tool.description
                assert tool_def is not None
-                assert (
-                    tool_def.description == "Search configured knowledge bases for information relevant to the query."
-                )
+                assert "Pick one configured set_name" in tool_def.description
                assert tool_def.parameters_json_schema == {
                    "type": "object",
                    "properties": {
+                        "set_name": {
+                            "type": "string",
+                            "enum": ["Support KB"],
+                            "description": "Knowledge set to search.",
+                        },
                        "query": {
                            "type": "string",
-                            "description": "Search query for the configured knowledge bases.",
-                        }
+                            "description": "Search query for the selected knowledge set.",
+                        },
                    },
-                    "required": ["query"],
+                    "required": ["set_name", "query"],
                    "additionalProperties": False,
                }

@ -119,12 +139,105 @@ def test_knowledge_layer_rejects_blank_query_locally() -> None:
            ) as run:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
-                result = await tool.function_schema.call({"query": "   "}, None)  # pyright: ignore[reportArgumentType]
+                result = await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                    {"set_name": "Support KB", "query": "   "}, None
+                )
                assert result == BLANK_QUERY_OBSERVATION

    asyncio.run(scenario())


+def test_knowledge_layer_exposes_no_tool_when_all_sets_are_user_query(monkeypatch: pytest.MonkeyPatch) -> None:
+    async def fake_retrieve(self: DifyKnowledgeBaseClient, **_kwargs: object) -> DifyKnowledgeRetrieveResponse:
+        del self
+        return DifyKnowledgeRetrieveResponse.model_validate({"results": [], "usage": {}})
+
+    monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
+
+    async def scenario() -> None:
+        compositor = Compositor(
+            [
+                LayerNode("execution_context", _execution_context_provider()),
+                LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
+            ]
+        )
+        async with httpx.AsyncClient() as http_client:
+            async with compositor.enter(
+                configs={
+                    "execution_context": _execution_context_config(),
+                    "knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
+                }
+            ) as run:
+                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
+                assert await knowledge_layer.get_tools(http_client=http_client) == []
+
+    asyncio.run(scenario())
+
+
+def test_knowledge_layer_fetches_user_query_sets_on_context_entry(monkeypatch: pytest.MonkeyPatch) -> None:
+    seen_requests: list[dict[str, object]] = []
+
+    async def fake_retrieve(self: DifyKnowledgeBaseClient, **kwargs: object) -> DifyKnowledgeRetrieveResponse:
+        del self
+        seen_requests.append(kwargs)
+        return DifyKnowledgeRetrieveResponse.model_validate(
+            {
+                "results": [
+                    {
+                        "metadata": {
+                            "_source": "knowledge",
+                            "dataset_name": "Docs",
+                            "document_name": "Release.md",
+                            "score": 0.8,
+                        },
+                        "title": "Release",
+                        "files": [],
+                        "content": "Version notes",
+                        "summary": None,
+                    }
+                ],
+                "usage": {},
+            }
+        )
+
+    monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
+
+    async def scenario() -> None:
+        compositor = Compositor(
+            [
+                LayerNode("execution_context", _execution_context_provider()),
+                LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
+            ]
+        )
+        async with compositor.enter(
+            configs={
+                "execution_context": _execution_context_config(),
+                "knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
+            }
+        ) as run:
+            knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
+            assert len(seen_requests) == 1
+            assert seen_requests[0]["query"] == "release notes"
+            assert seen_requests[0]["dataset_ids"] == ["dataset-1"]
+            assert knowledge_layer.runtime_state.eager_config_fingerprint
+            assert knowledge_layer.runtime_state.eager_results[0].status == "success"
+            assert knowledge_layer.user_prompts == [
+                "Knowledge retrieval results:\n\n"
+                "Set: Support KB\n"
+                "Query: release notes\n"
+                "Results:\n"
+                "1. Title: Release\n"
+                "   Dataset: Docs\n"
+                "   Document: Release.md\n"
+                "   Score: 0.8\n"
+                "   Content: Version notes"
+            ]
+            await knowledge_layer.on_context_resume()
+            assert len(seen_requests) == 1
+
+    asyncio.run(scenario())
+
+
@pytest.mark.parametrize(
    ("field_name", "field_value"),
    [
@ -199,7 +312,9 @@ def test_knowledge_layer_formats_results_and_truncates_observation() -> None:
            ) as run:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
-                result = await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                result = await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                    {"set_name": "Support KB", "query": "reset"}, None
+                )
                assert result.startswith("Knowledge base search results:\n1. Title: Guide")
                assert "Dataset: Docs" in result
                assert "Document: Guide.md" in result
@ -229,7 +344,9 @@ def test_knowledge_layer_returns_no_results_observation() -> None:
            ) as run:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
-                result = await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                result = await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                    {"set_name": "Support KB", "query": "reset"}, None
+                )
                assert result == NO_RESULTS_OBSERVATION

    asyncio.run(scenario())
@ -256,7 +373,9 @@ def test_knowledge_layer_converts_retryable_failures_into_observation() -> None:
            ) as run:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
-                result = await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                result = await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                    {"set_name": "Support KB", "query": "reset"}, None
+                )
                assert result == TEMPORARY_UNAVAILABLE_OBSERVATION

    asyncio.run(scenario())
@ -289,7 +408,9 @@ def test_knowledge_layer_converts_retryable_transport_failures_into_observation(
            ) as run:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
-                result = await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                result = await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                    {"set_name": "Support KB", "query": "reset"}, None
+                )
                assert result == TEMPORARY_UNAVAILABLE_OBSERVATION

    asyncio.run(scenario())
@ -317,7 +438,9 @@ def test_knowledge_layer_raises_non_retryable_client_errors() -> None:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
                with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
-                    await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                    await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                        {"set_name": "Support KB", "query": "reset"}, None
+                    )
                assert exc_info.value.status_code == 403

    asyncio.run(scenario())
@ -343,7 +466,9 @@ def test_knowledge_layer_raises_for_malformed_success_responses() -> None:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
                with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
-                    await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                    await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                        {"set_name": "Support KB", "query": "reset"}, None
+                    )
                assert exc_info.value.error_code == "invalid_response"
                assert exc_info.value.retryable is False

@ -411,7 +536,9 @@ def test_knowledge_layer_sends_execution_context_and_static_config_to_inner_api(
            ) as run:
                knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
                tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
-                result = await tool.function_schema.call({"query": "reset"}, None)  # pyright: ignore[reportArgumentType]
+                result = await tool.function_schema.call(  # pyright: ignore[reportArgumentType]
+                    {"set_name": "Support KB", "query": "reset"}, None
+                )
                assert result == NO_RESULTS_OBSERVATION

    asyncio.run(scenario())
--- a/dify-agent/tests/local/dify_agent/runtime/test_runner.py
+++ b/dify-agent/tests/local/dify_agent/runtime/test_runner.py
@ -995,7 +995,7 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
        return TestModel(custom_output_text="done")  # pyright: ignore[reportReturnType]

    async def fake_get_tools(self: DifyKnowledgeBaseLayer, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
-        assert self.config.dataset_ids == ["dataset-1"]
+        assert self.config.sets[0].dataset_ids == ["dataset-1"]
        assert http_client.headers.get("X-Test-Client") == "dify-api"
        return [Tool(knowledge_tool, name="knowledge_base_search")]

@ -1055,8 +1055,15 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
                    deps={"execution_context": "execution_context"},
                    config=DifyKnowledgeBaseLayerConfig.model_validate(
                        {
-                            "dataset_ids": ["dataset-1"],
-                            "retrieval": {"mode": "multiple", "top_k": 4},
+                            "sets": [
+                                {
+                                    "id": "support",
+                                    "name": "Support KB",
+                                    "datasets": [{"id": "dataset-1"}],
+                                    "query": {"mode": "generated_query"},
+                                    "retrieval": {"mode": "multiple", "top_k": 4},
+                                }
+                            ],
                        }
                    ),
                ),
--- a/dify-agent/tests/local/dify_agent/server/test_app.py
+++ b/dify-agent/tests/local/dify_agent/server/test_app.py
@ -231,8 +231,15 @@ def test_create_app_creates_scheduler_and_closes_after_shutdown(monkeypatch: pyt
        knowledge_layer = knowledge_provider.create_layer(
            DifyKnowledgeBaseLayerConfig.model_validate(
                {
-                    "dataset_ids": ["dataset-1"],
-                    "retrieval": {"mode": "multiple", "top_k": 2},
+                    "sets": [
+                        {
+                            "id": "support",
+                            "name": "Support KB",
+                            "datasets": [{"id": "dataset-1"}],
+                            "query": {"mode": "generated_query"},
+                            "retrieval": {"mode": "multiple", "top_k": 2},
+                        }
+                    ],
                }
            )
        )
--- a/dify-agent/tests/local/dify_agent/test_import_boundaries.py
+++ b/dify-agent/tests/local/dify_agent/test_import_boundaries.py
@ -115,7 +115,7 @@ def test_protocol_and_dify_plugin_exports_do_not_import_server_only_modules() ->
            "assert dify_agent_layers_execution_context.__all__ == ['DIFY_EXECUTION_CONTEXT_LAYER_TYPE_ID', 'DifyExecutionContextAgentMode', 'DifyExecutionContextInvokeFrom', 'DifyExecutionContextLayerConfig', 'DifyExecutionContextUserFrom']",
            "assert dify_agent_layers_ask_human.__all__ == ['AskHumanAction', 'AskHumanActionStyle', 'AskHumanField', 'AskHumanFieldType', 'AskHumanFileField', 'AskHumanFileListField', 'AskHumanParagraphField', 'AskHumanResultStatus', 'AskHumanSelectField', 'AskHumanSelectOption', 'AskHumanSelectedAction', 'AskHumanToolArgs', 'AskHumanToolResult', 'AskHumanUrgency', 'DEFAULT_ASK_HUMAN_TOOL_DESCRIPTION', 'DIFY_ASK_HUMAN_LAYER_TYPE_ID', 'DifyAskHumanLayerConfig']",
            "assert dify_agent_layers_dify_plugin.__all__ == ['DIFY_PLUGIN_LLM_LAYER_TYPE_ID', 'DIFY_PLUGIN_TOOLS_LAYER_TYPE_ID', 'DifyPluginCredentialValue', 'DifyPluginLLMLayerConfig', 'DifyPluginToolCredentialType', 'DifyPluginToolConfig', 'DifyPluginToolOption', 'DifyPluginToolParameter', 'DifyPluginToolParameterForm', 'DifyPluginToolParameterType', 'DifyPluginToolsLayerConfig', 'DifyPluginToolValue']",
-            "assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig']",
+            "assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeDatasetConfig', 'DifyKnowledgeEagerResult', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeQueryConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig', 'DifyKnowledgeRuntimeState', 'DifyKnowledgeSetConfig']",
            "assert dify_agent_layers_output.__all__ == ['DIFY_OUTPUT_LAYER_TYPE_ID', 'DifyOutputLayerConfig']",
            "assert dify_agent_layers_shell.__all__ == ['DIFY_SHELL_LAYER_TYPE_ID', 'DifyShellCliToolConfig', 'DifyShellEnvVarConfig', 'DifyShellLayerConfig', 'DifyShellSandboxConfig', 'DifyShellSecretRefConfig']",
        ],
--- a/packages/contracts/generated/api/console/agent/types.gen.ts
+++ b/packages/contracts/generated/api/console/agent/types.gen.ts
@ -563,7 +563,7 @@ export type AgentComposerSoulCandidatesResponse = {
  cli_tools?: Array<AgentCliToolConfig>
  dify_tools?: Array<AgentComposerDifyToolCandidateResponse>
  human_contacts?: Array<AgentHumanContactConfig>
-  knowledge_datasets?: Array<AgentKnowledgeDatasetConfig>
+  knowledge_sets?: Array<AgentComposerKnowledgeSetCandidateResponse>
 }

 export type ComposerCandidateCapabilities = {
@ -926,9 +926,7 @@ export type AgentSoulHumanConfig = {
 }

 export type AgentSoulKnowledgeConfig = {
-  datasets?: Array<AgentKnowledgeDatasetConfig>
-  query_config?: AgentKnowledgeQueryConfig
-  query_mode?: AgentKnowledgeQueryMode | null
+  sets?: Array<AgentKnowledgeSetConfig>
 }

 export type AgentSoulMemoryConfig = {
@ -1069,11 +1067,12 @@ export type AgentComposerDifyToolCandidateResponse = {
  tools_count?: number | null
 }

-export type AgentKnowledgeDatasetConfig = {
+export type AgentComposerKnowledgeSetCandidateResponse = {
+  datasets?: Array<AgentComposerKnowledgeDatasetCandidateResponse>
  description?: string | null
-  id?: string | null
-  name?: string | null
-  [key: string]: unknown
+  id: string
+  missing_dataset_ids?: Array<string>
+  name: string
 }

 export type AgentModerationProviderConfig = {
@ -1228,16 +1227,16 @@ export type AgentHumanToolConfig = {
  [key: string]: unknown
 }

-export type AgentKnowledgeQueryConfig = {
-  query?: string | null
-  score_threshold?: number | null
-  score_threshold_enabled?: boolean | null
-  top_k?: number | null
-  [key: string]: unknown
+export type AgentKnowledgeSetConfig = {
+  datasets: Array<AgentKnowledgeDatasetConfig>
+  description?: string | null
+  id: string
+  metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
+  name: string
+  query: AgentKnowledgeQueryConfig
+  retrieval: AgentKnowledgeRetrievalConfig
 }

-export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
-
 export type AgentMemoryArtifactConfig = {
  id?: string | null
  name?: string | null
@ -1376,6 +1375,13 @@ export type AgentPermissionConfig = {

 export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'

+export type AgentComposerKnowledgeDatasetCandidateResponse = {
+  description?: string | null
+  id?: string | null
+  missing?: boolean
+  name?: string | null
+}
+
 export type AgentModerationIoConfig = {
  enabled?: boolean
  preset_response?: string | null
@ -1404,6 +1410,34 @@ export type FormInputConfig

 export type JsonValue2 = unknown

+export type AgentKnowledgeDatasetConfig = {
+  description?: string | null
+  id?: string | null
+  name?: string | null
+}
+
+export type AgentKnowledgeMetadataFilteringConfig = {
+  conditions?: AgentKnowledgeMetadataConditions | null
+  mode?: 'automatic' | 'disabled' | 'manual'
+  model_config?: AgentKnowledgeModelConfig | null
+}
+
+export type AgentKnowledgeQueryConfig = {
+  mode: AgentKnowledgeQueryMode
+  value?: string | null
+}
+
+export type AgentKnowledgeRetrievalConfig = {
+  mode: 'multiple' | 'single'
+  model?: AgentKnowledgeModelConfig | null
+  reranking_enable?: boolean
+  reranking_mode?: string
+  reranking_model?: AgentKnowledgeRerankingModelConfig | null
+  score_threshold?: number | null
+  top_k?: number | null
+  weights?: AgentKnowledgeWeightedScoreConfig | null
+}
+
 export type AgentModelResponseFormatConfig = {
  type?: string | null
  [key: string]: unknown
@ -1454,6 +1488,38 @@ export type FileListInputConfig = {
  type?: 'file-list'
 }

+export type AgentKnowledgeMetadataConditions = {
+  conditions?: Array<AgentKnowledgeMetadataCondition>
+  logical_operator?: 'and' | 'or'
+}
+
+export type AgentKnowledgeModelConfig = {
+  completion_params?: {
+    [key: string]: unknown
+  }
+  mode: string
+  name: string
+  provider: string
+}
+
+export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
+
+export type AgentKnowledgeRerankingModelConfig = {
+  model: string
+  provider: string
+}
+
+export type AgentKnowledgeWeightedScoreConfig = {
+  keyword_setting?: {
+    [key: string]: unknown
+  } | null
+  vector_setting?: {
+    [key: string]: unknown
+  } | null
+  weight_type?: string | null
+  [key: string]: unknown
+}
+
 export type StringSource = {
  selector?: Array<string>
  type: ValueSourceType
@ -1470,6 +1536,30 @@ export type FileType = 'audio' | 'custom' | 'document' | 'image' | 'video'

 export type FileTransferMethod = 'datasource_file' | 'local_file' | 'remote_url' | 'tool_file'

+export type AgentKnowledgeMetadataCondition = {
+  comparison_operator:
+    | '<'
+    | '='
+    | '>'
+    | 'after'
+    | 'before'
+    | 'contains'
+    | 'empty'
+    | 'end with'
+    | 'in'
+    | 'is'
+    | 'is not'
+    | 'not contains'
+    | 'not empty'
+    | 'not in'
+    | 'start with'
+    | '≠'
+    | '≤'
+    | '≥'
+  name: string
+  value?: string | Array<string> | number | null
+}
+
 export type ValueSourceType = 'constant' | 'variable'

 export type AgentAppPaginationWritable = {
--- a/packages/contracts/generated/api/console/agent/zod.gen.ts
+++ b/packages/contracts/generated/api/console/agent/zod.gen.ts
@ -1022,15 +1022,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
  tools_count: z.int().nullish(),
 })

-/**
- * AgentKnowledgeDatasetConfig
- */
-export const zAgentKnowledgeDatasetConfig = z.object({
-  description: z.string().nullish(),
-  id: z.string().max(255).nullish(),
-  name: z.string().max(255).nullish(),
-})
-
 /**
 * SimpleAccount
 */
@ -1279,30 +1270,6 @@ export const zAgentSoulHumanConfig = z.object({
  tools: z.array(zAgentHumanToolConfig).optional(),
 })

-/**
- * AgentKnowledgeQueryConfig
- */
-export const zAgentKnowledgeQueryConfig = z.object({
-  query: z.string().nullish(),
-  score_threshold: z.number().gte(0).lte(1).nullish(),
-  score_threshold_enabled: z.boolean().nullish(),
-  top_k: z.int().gte(1).nullish(),
-})
-
-/**
- * AgentKnowledgeQueryMode
- */
-export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
-
-/**
- * AgentSoulKnowledgeConfig
- */
-export const zAgentSoulKnowledgeConfig = z.object({
-  datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
-  query_config: zAgentKnowledgeQueryConfig.optional(),
-  query_mode: zAgentKnowledgeQueryMode.nullish(),
-})
-
 /**
 * AgentMemoryArtifactConfig
 */
@ -1521,6 +1488,27 @@ export const zAgentCliToolConfig = z.object({
  tool_name: z.string().max(255).nullish(),
 })

+/**
+ * AgentComposerKnowledgeDatasetCandidateResponse
+ */
+export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
+  description: z.string().nullish(),
+  id: z.string().max(255).nullish(),
+  missing: z.boolean().optional().default(false),
+  name: z.string().max(255).nullish(),
+})
+
+/**
+ * AgentComposerKnowledgeSetCandidateResponse
+ */
+export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
+  datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
+  description: z.string().nullish(),
+  id: z.string(),
+  missing_dataset_ids: z.array(z.string()).optional(),
+  name: z.string(),
+})
+
 /**
 * AgentComposerSoulCandidatesResponse
 */
@ -1528,7 +1516,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
  cli_tools: z.array(zAgentCliToolConfig).optional(),
  dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
  human_contacts: z.array(zAgentHumanContactConfig).optional(),
-  knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
+  knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
 })

 /**
@ -1583,6 +1571,15 @@ export const zHumanInputFormSubmissionData = z.object({
  submitted_data: z.record(z.string(), zJsonValue2).nullish(),
 })

+/**
+ * AgentKnowledgeDatasetConfig
+ */
+export const zAgentKnowledgeDatasetConfig = z.object({
+  description: z.string().nullish(),
+  id: z.string().max(255).nullish(),
+  name: z.string().max(255).nullish(),
+})
+
 /**
 * AgentModelResponseFormatConfig
 */
@ -1733,53 +1730,6 @@ export const zAgentSoulToolsConfig = z.object({
  dify_tools: z.array(zAgentSoulDifyToolConfig).optional(),
 })

-/**
- * AgentSoulConfig
- */
-export const zAgentSoulConfig = z.object({
-  app_features: zAgentSoulAppFeaturesConfig.optional(),
-  app_variables: z.array(zAppVariableConfig).optional(),
-  env: zAgentSoulEnvConfig.optional(),
-  human: zAgentSoulHumanConfig.optional(),
-  knowledge: zAgentSoulKnowledgeConfig.optional(),
-  memory: zAgentSoulMemoryConfig.optional(),
-  misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
-  model: zAgentSoulModelConfig.nullish(),
-  prompt: zAgentSoulPromptConfig.optional(),
-  sandbox: zAgentSoulSandboxConfig.optional(),
-  schema_version: z.int().optional().default(1),
-  tools: zAgentSoulToolsConfig.optional(),
-})
-
-/**
- * AgentAppComposerResponse
- */
-export const zAgentAppComposerResponse = z.object({
-  active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
-  agent: zAgentComposerAgentResponse,
-  agent_soul: zAgentSoulConfig,
-  save_options: z.array(zComposerSaveStrategy),
-  validation: zComposerValidationFindingsResponse.nullish(),
-  variant: z.literal('agent_app'),
-})
-
-/**
- * AgentConfigSnapshotDetailResponse
- */
-export const zAgentConfigSnapshotDetailResponse = z.object({
-  agent_id: z.string().nullish(),
-  config_snapshot: zAgentSoulConfig,
-  created_at: z.int().nullish(),
-  created_by: z.string().nullish(),
-  display_version: z.int().nullish(),
-  id: z.string(),
-  revisions: z.array(zAgentConfigRevisionResponse).optional(),
-  snapshot_version: z.int().nullish(),
-  summary: z.string().nullish(),
-  version: z.int(),
-  version_note: z.string().nullish(),
-})
-
 /**
 * OutputErrorStrategy
 *
@ -1869,22 +1819,6 @@ export const zWorkflowNodeJobConfig = z.object({
  workflow_prompt: z.string().optional().default(''),
 })

-/**
- * ComposerSavePayload
- */
-export const zComposerSavePayload = z.object({
-  agent_soul: zAgentSoulConfig.nullish(),
-  binding: zComposerBindingPayload.nullish(),
-  client_revision_id: z.string().nullish(),
-  idempotency_key: z.string().nullish(),
-  new_agent_name: z.string().min(1).max(255).nullish(),
-  node_job: zWorkflowNodeJobConfig.nullish(),
-  save_strategy: zComposerSaveStrategy,
-  soul_lock: zComposerSoulLockPayload.optional(),
-  variant: zComposerVariant,
-  version_note: z.string().nullish(),
-})
-
 /**
 * ButtonStyle
 *
@ -1903,6 +1837,60 @@ export const zUserActionConfig = z.object({
  title: z.string().max(100),
 })

+/**
+ * AgentKnowledgeModelConfig
+ */
+export const zAgentKnowledgeModelConfig = z.object({
+  completion_params: z.record(z.string(), z.unknown()).optional(),
+  mode: z.string().min(1).max(64),
+  name: z.string().min(1).max(255),
+  provider: z.string().min(1).max(255),
+})
+
+/**
+ * AgentKnowledgeQueryMode
+ */
+export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
+
+/**
+ * AgentKnowledgeQueryConfig
+ */
+export const zAgentKnowledgeQueryConfig = z.object({
+  mode: zAgentKnowledgeQueryMode,
+  value: z.string().nullish(),
+})
+
+/**
+ * AgentKnowledgeRerankingModelConfig
+ */
+export const zAgentKnowledgeRerankingModelConfig = z.object({
+  model: z.string().min(1).max(255),
+  provider: z.string().min(1).max(255),
+})
+
+/**
+ * AgentKnowledgeWeightedScoreConfig
+ */
+export const zAgentKnowledgeWeightedScoreConfig = z.object({
+  keyword_setting: z.record(z.string(), z.unknown()).nullish(),
+  vector_setting: z.record(z.string(), z.unknown()).nullish(),
+  weight_type: z.string().max(64).nullish(),
+})
+
+/**
+ * AgentKnowledgeRetrievalConfig
+ */
+export const zAgentKnowledgeRetrievalConfig = z.object({
+  mode: z.enum(['multiple', 'single']),
+  model: zAgentKnowledgeModelConfig.nullish(),
+  reranking_enable: z.boolean().optional().default(true),
+  reranking_mode: z.string().optional().default('reranking_model'),
+  reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
+  score_threshold: z.number().gte(0).lte(1).nullish(),
+  top_k: z.int().gte(1).nullish(),
+  weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
+})
+
 /**
 * FileType
 */
@ -1941,6 +1929,134 @@ export const zFileListInputConfig = z.object({
  type: z.literal('file-list').optional().default('file-list'),
 })

+/**
+ * AgentKnowledgeMetadataCondition
+ */
+export const zAgentKnowledgeMetadataCondition = z.object({
+  comparison_operator: z.enum([
+    '<',
+    '=',
+    '>',
+    'after',
+    'before',
+    'contains',
+    'empty',
+    'end with',
+    'in',
+    'is',
+    'is not',
+    'not contains',
+    'not empty',
+    'not in',
+    'start with',
+    '≠',
+    '≤',
+    '≥',
+  ]),
+  name: z.string().min(1).max(255),
+  value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
+})
+
+/**
+ * AgentKnowledgeMetadataConditions
+ */
+export const zAgentKnowledgeMetadataConditions = z.object({
+  conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
+  logical_operator: z.enum(['and', 'or']).optional().default('and'),
+})
+
+/**
+ * AgentKnowledgeMetadataFilteringConfig
+ */
+export const zAgentKnowledgeMetadataFilteringConfig = z.object({
+  conditions: zAgentKnowledgeMetadataConditions.nullish(),
+  mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
+  model_config: zAgentKnowledgeModelConfig.nullish(),
+})
+
+/**
+ * AgentKnowledgeSetConfig
+ */
+export const zAgentKnowledgeSetConfig = z.object({
+  datasets: z.array(zAgentKnowledgeDatasetConfig),
+  description: z.string().nullish(),
+  id: z.string().min(1).max(255),
+  metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
+  name: z.string().min(1).max(255),
+  query: zAgentKnowledgeQueryConfig,
+  retrieval: zAgentKnowledgeRetrievalConfig,
+})
+
+/**
+ * AgentSoulKnowledgeConfig
+ */
+export const zAgentSoulKnowledgeConfig = z.object({
+  sets: z.array(zAgentKnowledgeSetConfig).optional(),
+})
+
+/**
+ * AgentSoulConfig
+ */
+export const zAgentSoulConfig = z.object({
+  app_features: zAgentSoulAppFeaturesConfig.optional(),
+  app_variables: z.array(zAppVariableConfig).optional(),
+  env: zAgentSoulEnvConfig.optional(),
+  human: zAgentSoulHumanConfig.optional(),
+  knowledge: zAgentSoulKnowledgeConfig.optional(),
+  memory: zAgentSoulMemoryConfig.optional(),
+  misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
+  model: zAgentSoulModelConfig.nullish(),
+  prompt: zAgentSoulPromptConfig.optional(),
+  sandbox: zAgentSoulSandboxConfig.optional(),
+  schema_version: z.int().optional().default(1),
+  tools: zAgentSoulToolsConfig.optional(),
+})
+
+/**
+ * AgentAppComposerResponse
+ */
+export const zAgentAppComposerResponse = z.object({
+  active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
+  agent: zAgentComposerAgentResponse,
+  agent_soul: zAgentSoulConfig,
+  save_options: z.array(zComposerSaveStrategy),
+  validation: zComposerValidationFindingsResponse.nullish(),
+  variant: z.literal('agent_app'),
+})
+
+/**
+ * ComposerSavePayload
+ */
+export const zComposerSavePayload = z.object({
+  agent_soul: zAgentSoulConfig.nullish(),
+  binding: zComposerBindingPayload.nullish(),
+  client_revision_id: z.string().nullish(),
+  idempotency_key: z.string().nullish(),
+  new_agent_name: z.string().min(1).max(255).nullish(),
+  node_job: zWorkflowNodeJobConfig.nullish(),
+  save_strategy: zComposerSaveStrategy,
+  soul_lock: zComposerSoulLockPayload.optional(),
+  variant: zComposerVariant,
+  version_note: z.string().nullish(),
+})
+
+/**
+ * AgentConfigSnapshotDetailResponse
+ */
+export const zAgentConfigSnapshotDetailResponse = z.object({
+  agent_id: z.string().nullish(),
+  config_snapshot: zAgentSoulConfig,
+  created_at: z.int().nullish(),
+  created_by: z.string().nullish(),
+  display_version: z.int().nullish(),
+  id: z.string(),
+  revisions: z.array(zAgentConfigRevisionResponse).optional(),
+  snapshot_version: z.int().nullish(),
+  summary: z.string().nullish(),
+  version: z.int(),
+  version_note: z.string().nullish(),
+})
+
 /**
 * ValueSourceType
 *
--- a/packages/contracts/generated/api/console/apps/types.gen.ts
+++ b/packages/contracts/generated/api/console/apps/types.gen.ts
@ -1890,7 +1890,7 @@ export type AgentComposerSoulCandidatesResponse = {
  cli_tools?: Array<AgentCliToolConfig>
  dify_tools?: Array<AgentComposerDifyToolCandidateResponse>
  human_contacts?: Array<AgentHumanContactConfig>
-  knowledge_datasets?: Array<AgentKnowledgeDatasetConfig>
+  knowledge_sets?: Array<AgentComposerKnowledgeSetCandidateResponse>
 }

 export type ComposerCandidateCapabilities = {
@ -2124,9 +2124,7 @@ export type AgentSoulHumanConfig = {
 }

 export type AgentSoulKnowledgeConfig = {
-  datasets?: Array<AgentKnowledgeDatasetConfig>
-  query_config?: AgentKnowledgeQueryConfig
-  query_mode?: AgentKnowledgeQueryMode | null
+  sets?: Array<AgentKnowledgeSetConfig>
 }

 export type AgentSoulMemoryConfig = {
@ -2278,11 +2276,12 @@ export type AgentComposerDifyToolCandidateResponse = {
  tools_count?: number | null
 }

-export type AgentKnowledgeDatasetConfig = {
+export type AgentComposerKnowledgeSetCandidateResponse = {
+  datasets?: Array<AgentComposerKnowledgeDatasetCandidateResponse>
  description?: string | null
-  id?: string | null
-  name?: string | null
-  [key: string]: unknown
+  id: string
+  missing_dataset_ids?: Array<string>
+  name: string
 }

 export type CheckResultView = {
@ -2393,16 +2392,16 @@ export type AgentHumanToolConfig = {
  [key: string]: unknown
 }

-export type AgentKnowledgeQueryConfig = {
-  query?: string | null
-  score_threshold?: number | null
-  score_threshold_enabled?: boolean | null
-  top_k?: number | null
-  [key: string]: unknown
+export type AgentKnowledgeSetConfig = {
+  datasets: Array<AgentKnowledgeDatasetConfig>
+  description?: string | null
+  id: string
+  metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
+  name: string
+  query: AgentKnowledgeQueryConfig
+  retrieval: AgentKnowledgeRetrievalConfig
 }

-export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
-
 export type AgentMemoryArtifactConfig = {
  id?: string | null
  name?: string | null
@ -2506,6 +2505,13 @@ export type AgentPermissionConfig = {

 export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'

+export type AgentComposerKnowledgeDatasetCandidateResponse = {
+  description?: string | null
+  id?: string | null
+  missing?: boolean
+  name?: string | null
+}
+
 export type ButtonStyle = 'accent' | 'default' | 'ghost' | 'primary'

 export type ParagraphInputConfig = {
@ -2545,6 +2551,34 @@ export type AgentModerationProviderConfig = {
  [key: string]: unknown
 }

+export type AgentKnowledgeDatasetConfig = {
+  description?: string | null
+  id?: string | null
+  name?: string | null
+}
+
+export type AgentKnowledgeMetadataFilteringConfig = {
+  conditions?: AgentKnowledgeMetadataConditions | null
+  mode?: 'automatic' | 'disabled' | 'manual'
+  model_config?: AgentKnowledgeModelConfig | null
+}
+
+export type AgentKnowledgeQueryConfig = {
+  mode: AgentKnowledgeQueryMode
+  value?: string | null
+}
+
+export type AgentKnowledgeRetrievalConfig = {
+  mode: 'multiple' | 'single'
+  model?: AgentKnowledgeModelConfig | null
+  reranking_enable?: boolean
+  reranking_mode?: string
+  reranking_model?: AgentKnowledgeRerankingModelConfig | null
+  score_threshold?: number | null
+  top_k?: number | null
+  weights?: AgentKnowledgeWeightedScoreConfig | null
+}
+
 export type AgentModelResponseFormatConfig = {
  type?: string | null
  [key: string]: unknown
@ -2578,8 +2612,64 @@ export type AgentModerationIoConfig = {
  [key: string]: unknown
 }

+export type AgentKnowledgeMetadataConditions = {
+  conditions?: Array<AgentKnowledgeMetadataCondition>
+  logical_operator?: 'and' | 'or'
+}
+
+export type AgentKnowledgeModelConfig = {
+  completion_params?: {
+    [key: string]: unknown
+  }
+  mode: string
+  name: string
+  provider: string
+}
+
+export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
+
+export type AgentKnowledgeRerankingModelConfig = {
+  model: string
+  provider: string
+}
+
+export type AgentKnowledgeWeightedScoreConfig = {
+  keyword_setting?: {
+    [key: string]: unknown
+  } | null
+  vector_setting?: {
+    [key: string]: unknown
+  } | null
+  weight_type?: string | null
+  [key: string]: unknown
+}
+
 export type ValueSourceType = 'constant' | 'variable'

+export type AgentKnowledgeMetadataCondition = {
+  comparison_operator:
+    | '<'
+    | '='
+    | '>'
+    | 'after'
+    | 'before'
+    | 'contains'
+    | 'empty'
+    | 'end with'
+    | 'in'
+    | 'is'
+    | 'is not'
+    | 'not contains'
+    | 'not empty'
+    | 'not in'
+    | 'start with'
+    | '≠'
+    | '≤'
+    | '≥'
+  name: string
+  value?: string | Array<string> | number | null
+}
+
 export type AppPaginationWritable = {
  data: Array<AppPartialWritable>
  has_more: boolean
--- a/packages/contracts/generated/api/console/apps/zod.gen.ts
+++ b/packages/contracts/generated/api/console/apps/zod.gen.ts
@ -2629,15 +2629,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
  tools_count: z.int().nullish(),
 })

-/**
- * AgentKnowledgeDatasetConfig
- */
-export const zAgentKnowledgeDatasetConfig = z.object({
-  description: z.string().nullish(),
-  id: z.string().max(255).nullish(),
-  name: z.string().max(255).nullish(),
-})
-
 /**
 * CheckResultView
 *
@ -2767,30 +2758,6 @@ export const zAgentSoulHumanConfig = z.object({
  tools: z.array(zAgentHumanToolConfig).optional(),
 })

-/**
- * AgentKnowledgeQueryConfig
- */
-export const zAgentKnowledgeQueryConfig = z.object({
-  query: z.string().nullish(),
-  score_threshold: z.number().gte(0).lte(1).nullish(),
-  score_threshold_enabled: z.boolean().nullish(),
-  top_k: z.int().gte(1).nullish(),
-})
-
-/**
- * AgentKnowledgeQueryMode
- */
-export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
-
-/**
- * AgentSoulKnowledgeConfig
- */
-export const zAgentSoulKnowledgeConfig = z.object({
-  datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
-  query_config: zAgentKnowledgeQueryConfig.optional(),
-  query_mode: zAgentKnowledgeQueryMode.nullish(),
-})
-
 /**
 * AgentMemoryArtifactConfig
 */
@ -3002,6 +2969,27 @@ export const zAgentCliToolConfig = z.object({
  tool_name: z.string().max(255).nullish(),
 })

+/**
+ * AgentComposerKnowledgeDatasetCandidateResponse
+ */
+export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
+  description: z.string().nullish(),
+  id: z.string().max(255).nullish(),
+  missing: z.boolean().optional().default(false),
+  name: z.string().max(255).nullish(),
+})
+
+/**
+ * AgentComposerKnowledgeSetCandidateResponse
+ */
+export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
+  datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
+  description: z.string().nullish(),
+  id: z.string(),
+  missing_dataset_ids: z.array(z.string()).optional(),
+  name: z.string(),
+})
+
 /**
 * AgentComposerSoulCandidatesResponse
 */
@ -3009,7 +2997,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
  cli_tools: z.array(zAgentCliToolConfig).optional(),
  dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
  human_contacts: z.array(zAgentHumanContactConfig).optional(),
-  knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
+  knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
 })

 /**
@ -3041,6 +3029,15 @@ export const zUserActionConfig = z.object({
  title: z.string().max(100),
 })

+/**
+ * AgentKnowledgeDatasetConfig
+ */
+export const zAgentKnowledgeDatasetConfig = z.object({
+  description: z.string().nullish(),
+  id: z.string().max(255).nullish(),
+  name: z.string().max(255).nullish(),
+})
+
 /**
 * AgentModelResponseFormatConfig
 */
@ -3292,57 +3289,57 @@ export const zAgentSoulAppFeaturesConfig = z.object({
 })

 /**
- * AgentSoulConfig
+ * AgentKnowledgeModelConfig
 */
-export const zAgentSoulConfig = z.object({
-  app_features: zAgentSoulAppFeaturesConfig.optional(),
-  app_variables: z.array(zAppVariableConfig).optional(),
-  env: zAgentSoulEnvConfig.optional(),
-  human: zAgentSoulHumanConfig.optional(),
-  knowledge: zAgentSoulKnowledgeConfig.optional(),
-  memory: zAgentSoulMemoryConfig.optional(),
-  misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
-  model: zAgentSoulModelConfig.nullish(),
-  prompt: zAgentSoulPromptConfig.optional(),
-  sandbox: zAgentSoulSandboxConfig.optional(),
-  schema_version: z.int().optional().default(1),
-  tools: zAgentSoulToolsConfig.optional(),
+export const zAgentKnowledgeModelConfig = z.object({
+  completion_params: z.record(z.string(), z.unknown()).optional(),
+  mode: z.string().min(1).max(64),
+  name: z.string().min(1).max(255),
+  provider: z.string().min(1).max(255),
 })

 /**
- * WorkflowAgentComposerResponse
+ * AgentKnowledgeQueryMode
 */
-export const zWorkflowAgentComposerResponse = z.object({
-  active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
-  agent: zAgentComposerAgentResponse.nullish(),
-  agent_soul: zAgentSoulConfig,
-  app_id: z.string().nullish(),
-  binding: zAgentComposerBindingResponse.nullish(),
-  effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
-  impact_summary: zAgentComposerImpactResponse.nullish(),
-  node_id: z.string().nullish(),
-  node_job: zWorkflowNodeJobConfig,
-  save_options: z.array(zComposerSaveStrategy),
-  soul_lock: zAgentComposerSoulLockResponse,
-  validation: zComposerValidationFindingsResponse.nullish(),
-  variant: z.literal('workflow'),
-  workflow_id: z.string().nullish(),
+export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
+
+/**
+ * AgentKnowledgeQueryConfig
+ */
+export const zAgentKnowledgeQueryConfig = z.object({
+  mode: zAgentKnowledgeQueryMode,
+  value: z.string().nullish(),
 })

 /**
- * ComposerSavePayload
+ * AgentKnowledgeRerankingModelConfig
 */
-export const zComposerSavePayload = z.object({
-  agent_soul: zAgentSoulConfig.nullish(),
-  binding: zComposerBindingPayload.nullish(),
-  client_revision_id: z.string().nullish(),
-  idempotency_key: z.string().nullish(),
-  new_agent_name: z.string().min(1).max(255).nullish(),
-  node_job: zWorkflowNodeJobConfig.nullish(),
-  save_strategy: zComposerSaveStrategy,
-  soul_lock: zComposerSoulLockPayload.optional(),
-  variant: zComposerVariant,
-  version_note: z.string().nullish(),
+export const zAgentKnowledgeRerankingModelConfig = z.object({
+  model: z.string().min(1).max(255),
+  provider: z.string().min(1).max(255),
+})
+
+/**
+ * AgentKnowledgeWeightedScoreConfig
+ */
+export const zAgentKnowledgeWeightedScoreConfig = z.object({
+  keyword_setting: z.record(z.string(), z.unknown()).nullish(),
+  vector_setting: z.record(z.string(), z.unknown()).nullish(),
+  weight_type: z.string().max(64).nullish(),
+})
+
+/**
+ * AgentKnowledgeRetrievalConfig
+ */
+export const zAgentKnowledgeRetrievalConfig = z.object({
+  mode: z.enum(['multiple', 'single']),
+  model: zAgentKnowledgeModelConfig.nullish(),
+  reranking_enable: z.boolean().optional().default(true),
+  reranking_mode: z.string().optional().default('reranking_model'),
+  reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
+  score_threshold: z.number().gte(0).lte(1).nullish(),
+  top_k: z.int().gte(1).nullish(),
+  weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
 })

 /**
@ -3466,6 +3463,125 @@ export const zMessageInfiniteScrollPaginationResponse = z.object({
  limit: z.int(),
 })

+/**
+ * AgentKnowledgeMetadataCondition
+ */
+export const zAgentKnowledgeMetadataCondition = z.object({
+  comparison_operator: z.enum([
+    '<',
+    '=',
+    '>',
+    'after',
+    'before',
+    'contains',
+    'empty',
+    'end with',
+    'in',
+    'is',
+    'is not',
+    'not contains',
+    'not empty',
+    'not in',
+    'start with',
+    '≠',
+    '≤',
+    '≥',
+  ]),
+  name: z.string().min(1).max(255),
+  value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
+})
+
+/**
+ * AgentKnowledgeMetadataConditions
+ */
+export const zAgentKnowledgeMetadataConditions = z.object({
+  conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
+  logical_operator: z.enum(['and', 'or']).optional().default('and'),
+})
+
+/**
+ * AgentKnowledgeMetadataFilteringConfig
+ */
+export const zAgentKnowledgeMetadataFilteringConfig = z.object({
+  conditions: zAgentKnowledgeMetadataConditions.nullish(),
+  mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
+  model_config: zAgentKnowledgeModelConfig.nullish(),
+})
+
+/**
+ * AgentKnowledgeSetConfig
+ */
+export const zAgentKnowledgeSetConfig = z.object({
+  datasets: z.array(zAgentKnowledgeDatasetConfig),
+  description: z.string().nullish(),
+  id: z.string().min(1).max(255),
+  metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
+  name: z.string().min(1).max(255),
+  query: zAgentKnowledgeQueryConfig,
+  retrieval: zAgentKnowledgeRetrievalConfig,
+})
+
+/**
+ * AgentSoulKnowledgeConfig
+ */
+export const zAgentSoulKnowledgeConfig = z.object({
+  sets: z.array(zAgentKnowledgeSetConfig).optional(),
+})
+
+/**
+ * AgentSoulConfig
+ */
+export const zAgentSoulConfig = z.object({
+  app_features: zAgentSoulAppFeaturesConfig.optional(),
+  app_variables: z.array(zAppVariableConfig).optional(),
+  env: zAgentSoulEnvConfig.optional(),
+  human: zAgentSoulHumanConfig.optional(),
+  knowledge: zAgentSoulKnowledgeConfig.optional(),
+  memory: zAgentSoulMemoryConfig.optional(),
+  misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
+  model: zAgentSoulModelConfig.nullish(),
+  prompt: zAgentSoulPromptConfig.optional(),
+  sandbox: zAgentSoulSandboxConfig.optional(),
+  schema_version: z.int().optional().default(1),
+  tools: zAgentSoulToolsConfig.optional(),
+})
+
+/**
+ * WorkflowAgentComposerResponse
+ */
+export const zWorkflowAgentComposerResponse = z.object({
+  active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
+  agent: zAgentComposerAgentResponse.nullish(),
+  agent_soul: zAgentSoulConfig,
+  app_id: z.string().nullish(),
+  binding: zAgentComposerBindingResponse.nullish(),
+  effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
+  impact_summary: zAgentComposerImpactResponse.nullish(),
+  node_id: z.string().nullish(),
+  node_job: zWorkflowNodeJobConfig,
+  save_options: z.array(zComposerSaveStrategy),
+  soul_lock: zAgentComposerSoulLockResponse,
+  validation: zComposerValidationFindingsResponse.nullish(),
+  variant: z.literal('workflow'),
+  workflow_id: z.string().nullish(),
+})
+
+/**
+ * ComposerSavePayload
+ */
+export const zComposerSavePayload = z.object({
+  agent_soul: zAgentSoulConfig.nullish(),
+  binding: zComposerBindingPayload.nullish(),
+  client_revision_id: z.string().nullish(),
+  idempotency_key: z.string().nullish(),
+  new_agent_name: z.string().min(1).max(255).nullish(),
+  node_job: zWorkflowNodeJobConfig.nullish(),
+  save_strategy: zComposerSaveStrategy,
+  soul_lock: zComposerSoulLockPayload.optional(),
+  variant: zComposerVariant,
+  version_note: z.string().nullish(),
+})
+
 /**
 * GeneratedAppResponse
 */