diff --git a/api/clients/agent_backend/request_builder.py b/api/clients/agent_backend/request_builder.py
index 6eadd4ce3d8..29cc28a3179 100644
--- a/api/clients/agent_backend/request_builder.py
+++ b/api/clients/agent_backend/request_builder.py
@@ -312,7 +312,7 @@ class AgentBackendRunRequestBuilder:
)
)
- if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
+ if run_input.knowledge is not None and run_input.knowledge.sets:
layers.append(
RunLayerSpec(
name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
@@ -513,7 +513,7 @@ class AgentBackendRunRequestBuilder:
)
)
- if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
+ if run_input.knowledge is not None and run_input.knowledge.sets:
layers.append(
RunLayerSpec(
name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
diff --git a/api/controllers/console/agent/composer.py b/api/controllers/console/agent/composer.py
index 2cd01e427f7..32c134b9fe3 100644
--- a/api/controllers/console/agent/composer.py
+++ b/api/controllers/console/agent/composer.py
@@ -105,6 +105,7 @@ class WorkflowAgentComposerValidateApi(Resource):
def post(self, tenant_id: str, app_model: App, node_id: str):
payload = ComposerSavePayload.model_validate(console_ns.payload or {})
ComposerConfigValidator.validate_save_payload(payload)
+ AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
findings = AgentComposerService.collect_validation_findings(
tenant_id=tenant_id,
payload=payload,
@@ -239,6 +240,7 @@ class AgentComposerValidateApi(Resource):
_resolve_agent_app_id(tenant_id=tenant_id, agent_id=agent_id)
payload = ComposerSavePayload.model_validate(console_ns.payload or {})
ComposerConfigValidator.validate_save_payload(payload)
+ AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
findings = AgentComposerService.collect_validation_findings(
tenant_id=tenant_id,
payload=payload,
diff --git a/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py b/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py
index fa7b28cbb0a..8fd2783f61f 100644
--- a/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py
+++ b/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py
@@ -3,6 +3,7 @@ from __future__ import annotations
from typing import Any
from models.agent_config_entities import AgentSoulConfig
+from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids
SUPPORTED_AGENT_BACKEND_FEATURES = frozenset(
{
@@ -48,9 +49,7 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
)
reserved_status = dict.fromkeys(sorted(RESERVED_AGENT_BACKEND_FEATURES), "reserved_not_executed")
- reserved_status["knowledge"] = (
- "supported_by_knowledge_layer" if list_configured_knowledge_dataset_ids(agent_soul) else "not_configured"
- )
+ reserved_status["knowledge"] = "supported_by_knowledge_layer" if agent_soul.knowledge.sets else "not_configured"
reserved_status["tools.dify_tools"] = "supported_when_config_valid"
reserved_status["tools.cli_tools"] = "supported_by_shell_bootstrap"
reserved_status["env"] = "supported_by_shell_bootstrap"
@@ -66,14 +65,14 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
def list_configured_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
- """Return the normalized knowledge dataset ids that can produce a runtime layer.
+ """Return normalized dataset ids selected by Agent v2 knowledge sets.
``build_runtime_feature_manifest()`` and ``build_knowledge_layer_config()``
- must stay aligned: both decide knowledge support from this effective,
- non-blank dataset-id set rather than from raw
- ``agent_soul.knowledge.datasets`` entries.
+ stay aligned on the set-based contract: DTO validation rejects blank dataset
+ ids before runtime, so this helper only flattens configured set datasets for
+ metadata/diagnostic surfaces that still need a dataset-id summary.
"""
- return [dataset_id for dataset in agent_soul.knowledge.datasets if (dataset_id := (dataset.id or "").strip())]
+ return list_agent_soul_knowledge_dataset_ids(agent_soul)
def _get_nested(value: dict[str, Any], path: str) -> Any:
diff --git a/api/core/workflow/nodes/agent_v2/runtime_request_builder.py b/api/core/workflow/nodes/agent_v2/runtime_request_builder.py
index e3c2dcee839..366076a7c68 100644
--- a/api/core/workflow/nodes/agent_v2/runtime_request_builder.py
+++ b/api/core/workflow/nodes/agent_v2/runtime_request_builder.py
@@ -15,7 +15,16 @@ from dify_agent.layers.execution_context import (
DifyExecutionContextLayerConfig,
DifyExecutionContextUserFrom,
)
-from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig, DifyKnowledgeRetrievalConfig
+from dify_agent.layers.knowledge import (
+ DifyKnowledgeBaseLayerConfig,
+ DifyKnowledgeDatasetConfig,
+ DifyKnowledgeMetadataFilteringConfig,
+ DifyKnowledgeModelConfig,
+ DifyKnowledgeQueryConfig,
+ DifyKnowledgeRerankingModelConfig,
+ DifyKnowledgeRetrievalConfig,
+ DifyKnowledgeSetConfig,
+)
from dify_agent.layers.shell import (
DifyShellCliToolConfig,
DifyShellEnvVarConfig,
@@ -40,7 +49,9 @@ from graphon.file import FileTransferMethod
from graphon.variables.segments import Segment
from models.agent import Agent, AgentConfigSnapshot, WorkflowAgentNodeBinding
from models.agent_config_entities import (
- AgentKnowledgeQueryConfig,
+ AgentKnowledgeMetadataFilteringConfig,
+ AgentKnowledgeModelConfig,
+ AgentKnowledgeRetrievalConfig,
AgentSoulConfig,
DeclaredArrayItem,
DeclaredOutputChildConfig,
@@ -547,42 +558,84 @@ def build_shell_layer_config(agent_soul: AgentSoulConfig) -> DifyShellLayerConfi
def build_knowledge_layer_config(agent_soul: AgentSoulConfig) -> DifyKnowledgeBaseLayerConfig | None:
- """Map Agent Soul knowledge config into the fixed Dify knowledge-base layer.
+ """Map Agent Soul knowledge sets into one Dify knowledge-base layer.
- Normalization intentionally matches the current dify-agent runtime contract:
-
- - blank or missing dataset ids are ignored;
- - if no valid dataset ids remain, no knowledge layer is injected;
- - retrieval mode is always forced to ``multiple`` in this first wiring pass;
- - ``top_k`` falls back to a stable runtime default when the soul omits it;
- - ``score_threshold`` is only forwarded when the product config explicitly
- enables it, otherwise the layer keeps the disabled/default ``0.0`` value;
- - metadata filtering stays at the layer DTO default (disabled).
+ Agent Soul DTO validation owns malformed set rejection. Runtime mapping is
+ intentionally lossless: every configured set is forwarded with its query
+ policy, dataset refs, retrieval controls, and metadata-filtering controls.
+ ``score_threshold=None`` means disabled threshold filtering and maps to the
+ inner retrieval request's ``0.0`` default through the Agent backend DTO.
"""
- dataset_ids = list_configured_knowledge_dataset_ids(agent_soul)
- if not dataset_ids:
+ if not agent_soul.knowledge.sets:
return None
- query_config = agent_soul.knowledge.query_config
return DifyKnowledgeBaseLayerConfig(
- dataset_ids=dataset_ids,
- retrieval=DifyKnowledgeRetrievalConfig(
- mode="multiple",
- top_k=_knowledge_top_k(query_config),
- score_threshold=_knowledge_score_threshold(query_config),
- ),
+ sets=[
+ DifyKnowledgeSetConfig(
+ id=knowledge_set.id,
+ name=knowledge_set.name,
+ description=knowledge_set.description,
+ datasets=[
+ DifyKnowledgeDatasetConfig(
+ id=dataset.id or "",
+ name=dataset.name,
+ description=dataset.description,
+ )
+ for dataset in knowledge_set.datasets
+ ],
+ query=DifyKnowledgeQueryConfig(
+ mode=cast(Literal["user_query", "generated_query"], knowledge_set.query.mode.value),
+ value=knowledge_set.query.value,
+ ),
+ retrieval=_knowledge_retrieval_config(knowledge_set.retrieval),
+ metadata_filtering=_knowledge_metadata_filtering_config(knowledge_set.metadata_filtering),
+ )
+ for knowledge_set in agent_soul.knowledge.sets
+ ],
)
-def _knowledge_top_k(query_config: AgentKnowledgeQueryConfig) -> int:
- top_k = query_config.top_k
- return top_k if isinstance(top_k, int) and top_k >= 1 else 4
+def _knowledge_retrieval_config(retrieval: AgentKnowledgeRetrievalConfig) -> DifyKnowledgeRetrievalConfig:
+ return DifyKnowledgeRetrievalConfig(
+ mode=retrieval.mode,
+ top_k=retrieval.top_k,
+ score_threshold=retrieval.score_threshold or 0.0,
+ reranking_mode=retrieval.reranking_mode,
+ reranking_enable=retrieval.reranking_enable,
+ reranking_model=DifyKnowledgeRerankingModelConfig(
+ provider=retrieval.reranking_model.provider,
+ model=retrieval.reranking_model.model,
+ )
+ if retrieval.reranking_model is not None
+ else None,
+ weights=cast(dict[str, Any], retrieval.weights.model_dump(mode="json", exclude_none=True))
+ if retrieval.weights is not None
+ else None,
+ model=_knowledge_model_config(retrieval.model),
+ )
-def _knowledge_score_threshold(query_config: AgentKnowledgeQueryConfig) -> float:
- if query_config.score_threshold_enabled and query_config.score_threshold is not None:
- return query_config.score_threshold
- return 0.0
+def _knowledge_metadata_filtering_config(
+ metadata_filtering: AgentKnowledgeMetadataFilteringConfig,
+) -> DifyKnowledgeMetadataFilteringConfig:
+ return DifyKnowledgeMetadataFilteringConfig(
+ mode=metadata_filtering.mode,
+ model_config=_knowledge_model_config(metadata_filtering.metadata_model_config),
+ conditions=cast(Any, metadata_filtering.conditions.model_dump(mode="json"))
+ if metadata_filtering.conditions is not None
+ else None,
+ )
+
+
+def _knowledge_model_config(model: AgentKnowledgeModelConfig | None) -> DifyKnowledgeModelConfig | None:
+ if model is None:
+ return None
+ return DifyKnowledgeModelConfig(
+ provider=model.provider,
+ name=model.name,
+ mode=model.mode,
+ completion_params=model.completion_params,
+ )
def build_ask_human_layer_config(agent_soul: AgentSoulConfig) -> DifyAskHumanLayerConfig | None:
diff --git a/api/core/workflow/nodes/agent_v2/validators.py b/api/core/workflow/nodes/agent_v2/validators.py
index 2eabac10dd6..7b915fe02be 100644
--- a/api/core/workflow/nodes/agent_v2/validators.py
+++ b/api/core/workflow/nodes/agent_v2/validators.py
@@ -18,6 +18,7 @@ from models.agent_config_entities import (
)
from models.model import UploadFile
from models.workflow import Workflow
+from services.agent.knowledge_datasets import list_missing_tenant_knowledge_dataset_ids
from .entities import DifyAgentNodeData
@@ -146,6 +147,7 @@ class WorkflowAgentNodeValidator:
)
cls._validate_agent_soul_env(binding=binding, agent_soul=agent_soul)
cls._validate_agent_soul_tools(binding=binding, agent_soul=agent_soul)
+ cls._validate_agent_soul_knowledge(binding=binding, agent_soul=agent_soul)
node_job = WorkflowNodeJobConfig.model_validate(binding.node_job_config_dict)
cls.validate_node_job(session=session, binding=binding, node_job=node_job, topology=topology)
@@ -364,6 +366,24 @@ class WorkflowAgentNodeValidator:
)
cli_tool_names.add(normalized_name)
+ @classmethod
+ def _validate_agent_soul_knowledge(
+ cls,
+ *,
+ binding: WorkflowAgentNodeBinding,
+ agent_soul: AgentSoulConfig,
+ ) -> None:
+ """Validate knowledge set dataset rows against the publishing tenant."""
+ missing_ids = list_missing_tenant_knowledge_dataset_ids(
+ tenant_id=binding.tenant_id,
+ agent_soul=agent_soul,
+ )
+ if missing_ids:
+ raise WorkflowAgentNodeValidationError(
+ f"Workflow Agent node {binding.node_id} references missing or out-of-scope knowledge datasets: "
+ f"{', '.join(missing_ids)}."
+ )
+
@classmethod
def _validate_agent_soul_env(
cls,
diff --git a/api/fields/agent_fields.py b/api/fields/agent_fields.py
index e60a6b01426..d664a2af12a 100644
--- a/api/fields/agent_fields.py
+++ b/api/fields/agent_fields.py
@@ -400,10 +400,22 @@ class AgentComposerNodeJobCandidatesResponse(ResponseModel):
human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)
+class AgentComposerKnowledgeDatasetCandidateResponse(AgentKnowledgeDatasetConfig):
+ missing: bool = False
+
+
+class AgentComposerKnowledgeSetCandidateResponse(ResponseModel):
+ id: str
+ name: str
+ description: str | None = None
+ datasets: list[AgentComposerKnowledgeDatasetCandidateResponse] = Field(default_factory=list)
+ missing_dataset_ids: list[str] = Field(default_factory=list)
+
+
class AgentComposerSoulCandidatesResponse(ResponseModel):
dify_tools: list[AgentComposerDifyToolCandidateResponse] = Field(default_factory=list)
cli_tools: list[AgentCliToolConfig] = Field(default_factory=list)
- knowledge_datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
+ knowledge_sets: list[AgentComposerKnowledgeSetCandidateResponse] = Field(default_factory=list)
human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)
diff --git a/api/models/agent_config_entities.py b/api/models/agent_config_entities.py
index 2503ba66f06..2f81495e9f9 100644
--- a/api/models/agent_config_entities.py
+++ b/api/models/agent_config_entities.py
@@ -2,10 +2,11 @@ from __future__ import annotations
import re
from enum import StrEnum
-from typing import Annotated, Any, Final, Literal
+from typing import Annotated, Any, Final, Literal, Self
from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, field_validator, model_validator
+from core.rag.entities.metadata_entities import ConditionValue, SupportedComparisonOperator
from core.workflow.file_reference import is_canonical_file_reference
from graphon.file import FileTransferMethod
@@ -236,17 +237,161 @@ class AgentCliToolConfig(AgentFlexibleConfig):
inferred_from: str | None = Field(default=None, max_length=255)
-class AgentKnowledgeDatasetConfig(AgentFlexibleConfig):
+class AgentKnowledgeDatasetConfig(BaseModel):
+ model_config = ConfigDict(extra="forbid")
+
id: str | None = Field(default=None, max_length=255)
name: str | None = Field(default=None, max_length=255)
description: str | None = None
-class AgentKnowledgeQueryConfig(AgentFlexibleConfig):
- query: str | None = None
+class AgentKnowledgeQueryConfig(BaseModel):
+ """Per-set query policy for Agent v2 knowledge retrieval.
+
+ Agent v2 stores knowledge as explicit ``knowledge.sets`` rather than the
+ legacy flat ``datasets`` / ``query_mode`` / ``query_config`` shape. Each
+ set owns its own query policy, so ``user_query`` must carry an explicit
+ ``value`` while ``generated_query`` leaves that value empty.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ mode: AgentKnowledgeQueryMode
+ value: str | None = None
+
+ @model_validator(mode="after")
+ def validate_query(self) -> Self:
+ if self.mode == AgentKnowledgeQueryMode.USER_QUERY and not (self.value or "").strip():
+ raise ValueError("knowledge query.value is required for user_query mode")
+ return self
+
+
+class AgentKnowledgeModelConfig(BaseModel):
+ model_config = ConfigDict(extra="forbid")
+
+ provider: str = Field(min_length=1, max_length=255)
+ name: str = Field(min_length=1, max_length=255)
+ mode: str = Field(min_length=1, max_length=64)
+ completion_params: dict[str, Any] = Field(default_factory=dict)
+
+
+class AgentKnowledgeRerankingModelConfig(BaseModel):
+ model_config = ConfigDict(extra="forbid")
+
+ provider: str = Field(min_length=1, max_length=255)
+ model: str = Field(min_length=1, max_length=255)
+
+
+class AgentKnowledgeWeightedScoreConfig(AgentFlexibleConfig):
+ weight_type: str | None = Field(default=None, max_length=64)
+ vector_setting: dict[str, Any] | None = None
+ keyword_setting: dict[str, Any] | None = None
+
+
+class AgentKnowledgeRetrievalConfig(BaseModel):
+ """Per-set retrieval policy for Agent v2 knowledge retrieval.
+
+ Retrieval settings now live on each knowledge set instead of one shared
+ flat config. A set may use either ``multiple`` retrieval with ``top_k`` or
+ ``single`` retrieval with a required model config.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ mode: Literal["single", "multiple"]
top_k: int | None = Field(default=None, ge=1)
score_threshold: float | None = Field(default=None, ge=0, le=1)
- score_threshold_enabled: bool | None = None
+ reranking_mode: str = "reranking_model"
+ reranking_enable: bool = True
+ reranking_model: AgentKnowledgeRerankingModelConfig | None = None
+ weights: AgentKnowledgeWeightedScoreConfig | None = None
+ model: AgentKnowledgeModelConfig | None = None
+
+ @model_validator(mode="after")
+ def validate_mode_fields(self) -> Self:
+ if self.mode == "multiple" and self.top_k is None:
+ raise ValueError("knowledge retrieval.top_k is required for multiple mode")
+ if self.mode == "single" and self.model is None:
+ raise ValueError("knowledge retrieval.model is required for single mode")
+ return self
+
+
+class AgentKnowledgeMetadataCondition(BaseModel):
+ model_config = ConfigDict(extra="forbid")
+
+ name: str = Field(min_length=1, max_length=255)
+ comparison_operator: SupportedComparisonOperator
+ value: ConditionValue = None
+
+
+class AgentKnowledgeMetadataConditions(BaseModel):
+ model_config = ConfigDict(extra="forbid")
+
+ logical_operator: Literal["and", "or"] = "and"
+ conditions: list[AgentKnowledgeMetadataCondition] = Field(default_factory=list)
+
+
+class AgentKnowledgeMetadataFilteringConfig(BaseModel):
+ """Per-set metadata filtering policy.
+
+ The Python attribute uses ``metadata_model_config`` for clarity because the
+ model belongs to metadata filtering specifically, while the external API and
+ generated schema keep the historical ``model_config`` field name via alias.
+ """
+
+ model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+ mode: Literal["disabled", "automatic", "manual"] = "disabled"
+ # Internal name is explicit; wire format remains ``model_config``.
+ metadata_model_config: AgentKnowledgeModelConfig | None = Field(default=None, alias="model_config")
+ conditions: AgentKnowledgeMetadataConditions | None = None
+
+ @model_validator(mode="after")
+ def validate_mode_fields(self) -> Self:
+ if self.mode == "automatic" and self.metadata_model_config is None:
+ raise ValueError("metadata_filtering.model_config is required for automatic mode")
+ if self.mode == "manual" and (self.conditions is None or not self.conditions.conditions):
+ raise ValueError("metadata_filtering.conditions is required for manual mode")
+ return self
+
+
+class AgentKnowledgeSetConfig(BaseModel):
+ """One explicit knowledge set in Agent v2.
+
+ ``knowledge.sets`` replaces the old flat knowledge config. Each set owns
+ its datasets plus query, retrieval, and metadata policies. An individual
+ set must contain at least one dataset id even though the overall knowledge
+ section may be empty, which is how callers express "no knowledge layer".
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ id: str = Field(min_length=1, max_length=255)
+ name: str = Field(min_length=1, max_length=255)
+ description: str | None = None
+ datasets: list[AgentKnowledgeDatasetConfig]
+ query: AgentKnowledgeQueryConfig
+ retrieval: AgentKnowledgeRetrievalConfig
+ metadata_filtering: AgentKnowledgeMetadataFilteringConfig = Field(
+ default_factory=AgentKnowledgeMetadataFilteringConfig
+ )
+
+ @field_validator("id", "name")
+ @classmethod
+ def validate_non_blank_identity(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("knowledge set id and name must not be blank")
+ return normalized
+
+ @model_validator(mode="after")
+ def validate_datasets(self) -> Self:
+ dataset_ids = [(dataset.id or "").strip() for dataset in self.datasets]
+ if not dataset_ids or any(not dataset_id for dataset_id in dataset_ids):
+ raise ValueError("knowledge set requires at least one dataset id")
+ if len(dataset_ids) != len(set(dataset_ids)):
+ raise ValueError("knowledge set dataset ids must be unique")
+ return self
class AgentHumanContactConfig(AgentFlexibleConfig):
@@ -453,9 +598,28 @@ class AgentSoulToolsConfig(BaseModel):
class AgentSoulKnowledgeConfig(BaseModel):
- datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
- query_mode: AgentKnowledgeQueryMode | None = None
- query_config: AgentKnowledgeQueryConfig = Field(default_factory=AgentKnowledgeQueryConfig)
+ """Top-level Agent v2 knowledge config.
+
+ Agent v2 models knowledge as explicit sets instead of one flat
+ ``datasets`` / ``query_mode`` / ``query_config`` block. An empty ``sets``
+ list means no knowledge layer should be emitted at runtime, while set-name
+ uniqueness stays case-insensitive because runtime selection addresses sets
+ by name.
+ """
+
+ model_config = ConfigDict(extra="forbid")
+
+ sets: list[AgentKnowledgeSetConfig] = Field(default_factory=list)
+
+ @model_validator(mode="after")
+ def validate_unique_sets(self) -> Self:
+ set_ids = [item.id.strip() for item in self.sets]
+ if len(set_ids) != len(set(set_ids)):
+ raise ValueError("knowledge set ids must be unique")
+ set_names = [item.name.strip().lower() for item in self.sets]
+ if len(set_names) != len(set(set_names)):
+ raise ValueError("knowledge set names must be unique")
+ return self
class AgentSoulHumanConfig(BaseModel):
diff --git a/api/openapi/markdown/console-openapi.md b/api/openapi/markdown/console-openapi.md
index c600984c089..f37e05f8c2c 100644
--- a/api/openapi/markdown/console-openapi.md
+++ b/api/openapi/markdown/console-openapi.md
@@ -12433,6 +12433,25 @@ Risk marker for CLI tool bootstrap commands.
| current_snapshot_id | string | | No |
| workflow_node_count | integer | | Yes |
+#### AgentComposerKnowledgeDatasetCandidateResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| description | string | | No |
+| id | string | | No |
+| missing | boolean | | No |
+| name | string | | No |
+
+#### AgentComposerKnowledgeSetCandidateResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| datasets | [ [AgentComposerKnowledgeDatasetCandidateResponse](#agentcomposerknowledgedatasetcandidateresponse) ] | | No |
+| description | string | | No |
+| id | string | | Yes |
+| missing_dataset_ids | [ string ] | | No |
+| name | string | | Yes |
+
#### AgentComposerNodeJobCandidatesResponse
| Name | Type | Description | Required |
@@ -12448,7 +12467,7 @@ Risk marker for CLI tool bootstrap commands.
| cli_tools | [ [AgentCliToolConfig](#agentclitoolconfig) ] | | No |
| dify_tools | [ [AgentComposerDifyToolCandidateResponse](#agentcomposerdifytoolcandidateresponse) ] | | No |
| human_contacts | [ [AgentHumanContactConfig](#agenthumancontactconfig) ] | | No |
-| knowledge_datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No |
+| knowledge_sets | [ [AgentComposerKnowledgeSetCandidateResponse](#agentcomposerknowledgesetcandidateresponse) ] | | No |
#### AgentComposerSoulLockResponse
@@ -12842,14 +12861,44 @@ the current roster/workflow APIs scoped to Dify Agent.
| id | string | | No |
| name | string | | No |
+#### AgentKnowledgeMetadataCondition
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| comparison_operator | string,
**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
+| name | string | | Yes |
+| value | string
[ string ]
number | | No |
+
+#### AgentKnowledgeMetadataConditions
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| conditions | [ [AgentKnowledgeMetadataCondition](#agentknowledgemetadatacondition) ] | | No |
+| logical_operator | string,
**Available values:** "and", "or",
**Default:** and | *Enum:* `"and"`, `"or"` | No |
+
+#### AgentKnowledgeMetadataFilteringConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| conditions | [AgentKnowledgeMetadataConditions](#agentknowledgemetadataconditions) | | No |
+| mode | string,
**Available values:** "automatic", "disabled", "manual",
**Default:** disabled | *Enum:* `"automatic"`, `"disabled"`, `"manual"` | No |
+| model_config | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No |
+
+#### AgentKnowledgeModelConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| completion_params | object | | No |
+| mode | string | | Yes |
+| name | string | | Yes |
+| provider | string | | Yes |
+
#### AgentKnowledgeQueryConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
-| query | string | | No |
-| score_threshold | number | | No |
-| score_threshold_enabled | boolean | | No |
-| top_k | integer | | No |
+| mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | Yes |
+| value | string | | No |
#### AgentKnowledgeQueryMode
@@ -12857,6 +12906,46 @@ the current roster/workflow APIs scoped to Dify Agent.
| ---- | ---- | ----------- | -------- |
| AgentKnowledgeQueryMode | string | | |
+#### AgentKnowledgeRerankingModelConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| model | string | | Yes |
+| provider | string | | Yes |
+
+#### AgentKnowledgeRetrievalConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| mode | string,
**Available values:** "multiple", "single" | *Enum:* `"multiple"`, `"single"` | Yes |
+| model | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No |
+| reranking_enable | boolean,
**Default:** true | | No |
+| reranking_mode | string,
**Default:** reranking_model | | No |
+| reranking_model | [AgentKnowledgeRerankingModelConfig](#agentknowledgererankingmodelconfig) | | No |
+| score_threshold | number | | No |
+| top_k | integer | | No |
+| weights | [AgentKnowledgeWeightedScoreConfig](#agentknowledgeweightedscoreconfig) | | No |
+
+#### AgentKnowledgeSetConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | Yes |
+| description | string | | No |
+| id | string | | Yes |
+| metadata_filtering | [AgentKnowledgeMetadataFilteringConfig](#agentknowledgemetadatafilteringconfig) | | No |
+| name | string | | Yes |
+| query | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | Yes |
+| retrieval | [AgentKnowledgeRetrievalConfig](#agentknowledgeretrievalconfig) | | Yes |
+
+#### AgentKnowledgeWeightedScoreConfig
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| keyword_setting | object | | No |
+| vector_setting | object | | No |
+| weight_type | string | | No |
+
#### AgentLogConversationItemResponse
| Name | Type | Description | Required |
@@ -13258,9 +13347,7 @@ old Agent tool payloads can be read while new payloads stay explicit.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
-| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No |
-| query_config | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | No |
-| query_mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | No |
+| sets | [ [AgentKnowledgeSetConfig](#agentknowledgesetconfig) ] | | No |
#### AgentSoulMemoryConfig
diff --git a/api/services/agent/composer_candidates.py b/api/services/agent/composer_candidates.py
index 7868f2a2f63..b897ffc7b9a 100644
--- a/api/services/agent/composer_candidates.py
+++ b/api/services/agent/composer_candidates.py
@@ -25,6 +25,7 @@ from models.agent_config_entities import (
AgentSoulConfig,
DeclaredOutputConfig,
)
+from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids
MAX_CANDIDATES_PER_LIST = 200
@@ -139,19 +140,34 @@ def soul_candidates(
cli_tools = [tool.model_dump(exclude_none=True) for tool in soul.tools.cli_tools if tool.enabled]
- dataset_ids = [dataset.id for dataset in soul.knowledge.datasets if dataset.id]
+ dataset_ids = list_agent_soul_knowledge_dataset_ids(soul)
dataset_rows = dataset_lookup(dataset_ids) if dataset_ids else {}
- knowledge_datasets: list[dict[str, Any]] = []
- for dataset in soul.knowledge.datasets:
- if not dataset.id:
- continue
- row = dataset_rows.get(dataset.id)
- knowledge_datasets.append(
+ knowledge_sets: list[dict[str, Any]] = []
+ for knowledge_set in soul.knowledge.sets:
+ missing_dataset_ids: list[str] = []
+ datasets: list[dict[str, Any]] = []
+ for dataset in knowledge_set.datasets:
+ dataset_id = (dataset.id or "").strip()
+ if not dataset_id:
+ continue
+ row = dataset_rows.get(dataset_id)
+ if row is None:
+ missing_dataset_ids.append(dataset_id)
+ datasets.append(
+ {
+ "id": dataset_id,
+ "name": (getattr(row, "name", None) or dataset.name or dataset_id),
+ "description": getattr(row, "description", None) or dataset.description,
+ "missing": row is None,
+ }
+ )
+ knowledge_sets.append(
{
- "id": dataset.id,
- "name": (getattr(row, "name", None) or dataset.name or dataset.id),
- "description": getattr(row, "description", None) or dataset.description,
- "missing": row is None,
+ "id": knowledge_set.id,
+ "name": knowledge_set.name,
+ "description": knowledge_set.description,
+ "datasets": datasets,
+ "missing_dataset_ids": missing_dataset_ids,
}
)
@@ -161,7 +177,7 @@ def soul_candidates(
lists = {
"dify_tools": dify_tools,
"cli_tools": cli_tools,
- "knowledge_datasets": knowledge_datasets,
+ "knowledge_sets": knowledge_sets,
"human_contacts": human_contacts,
}
capped: dict[str, list[dict[str, Any]]] = {}
@@ -192,7 +208,6 @@ def _ref_entry(
"inferred": inferred,
}
-
def _capped(values: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], bool]:
if len(values) > MAX_CANDIDATES_PER_LIST:
return values[:MAX_CANDIDATES_PER_LIST], True
diff --git a/api/services/agent/composer_service.py b/api/services/agent/composer_service.py
index 815fdcc4420..ae47cce23c1 100644
--- a/api/services/agent/composer_service.py
+++ b/api/services/agent/composer_service.py
@@ -33,6 +33,11 @@ from services.agent.errors import (
AgentNameConflictError,
AgentNotFoundError,
AgentVersionNotFoundError,
+ InvalidComposerConfigError,
+)
+from services.agent.knowledge_datasets import (
+ get_tenant_knowledge_dataset_rows,
+ list_missing_tenant_knowledge_dataset_ids,
)
from services.entities.agent_entities import (
AgentSoulConfig,
@@ -101,6 +106,7 @@ class AgentComposerService:
_backfill_cli_tool_ids(payload.agent_soul)
ComposerConfigValidator.validate_save_payload(payload)
+ cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
workflow = cls._get_draft_workflow(tenant_id=tenant_id, app_id=app_id)
binding = cls._get_workflow_binding(tenant_id=tenant_id, workflow_id=workflow.id, node_id=node_id)
@@ -195,6 +201,7 @@ class AgentComposerService:
raise ValueError("Agent App composer endpoint only accepts agent_app variant")
_backfill_cli_tool_ids(payload.agent_soul)
ComposerConfigValidator.validate_save_payload(payload)
+ cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
if payload.agent_soul is None:
raise ValueError("agent_soul is required")
@@ -273,19 +280,15 @@ class AgentComposerService:
agent_id: str | None = None,
) -> dict[str, Any]:
"""ENG-617 soft findings, with DB-backed dataset and drive mention checks."""
- from services.agent.prompt_mentions import MentionKind, parse_prompt_mentions
-
- mentioned_ids: set[str] = set()
- if payload.agent_soul is not None:
- mentioned_ids |= {
- mention.ref_id
- for mention in parse_prompt_mentions(payload.agent_soul.prompt.system_prompt)
- if mention.kind == MentionKind.KNOWLEDGE
- }
- existing_dataset_ids: set[str] | None = None
- if mentioned_ids:
- existing_dataset_ids = set(cls._dataset_rows(tenant_id=tenant_id, dataset_ids=sorted(mentioned_ids)))
- findings = ComposerConfigValidator.collect_soft_findings(payload, existing_dataset_ids=existing_dataset_ids)
+ existing_knowledge_set_ids = (
+ {knowledge_set.id for knowledge_set in payload.agent_soul.knowledge.sets}
+ if payload.agent_soul is not None
+ else None
+ )
+ findings = ComposerConfigValidator.collect_soft_findings(
+ payload,
+ existing_knowledge_set_ids=existing_knowledge_set_ids,
+ )
if agent_id and payload.agent_soul is not None:
findings["warnings"].extend(
cls._drive_mention_findings(
@@ -296,6 +299,24 @@ class AgentComposerService:
)
return findings
+ @classmethod
+ def validate_knowledge_datasets(cls, *, tenant_id: str, agent_soul: AgentSoulConfig | None) -> None:
+ """Hard-validate tenant-scoped knowledge set datasets before saving.
+
+ DTO validators own set shape, duplicate set ids/names, and duplicate
+ dataset ids within one set. This service-level check owns database
+ existence and tenant ownership so invalid or cross-tenant datasets fail
+ before Agent Soul snapshots are persisted.
+ """
+ if agent_soul is None:
+ return
+ missing_ids = list_missing_tenant_knowledge_dataset_ids(tenant_id=tenant_id, agent_soul=agent_soul)
+ if missing_ids:
+ raise InvalidComposerConfigError(
+ "knowledge_dataset_not_found: knowledge sets reference missing or out-of-scope datasets: "
+ + ", ".join(missing_ids)
+ )
+
@classmethod
def resolve_bound_agent_id(cls, *, tenant_id: str, app_id: str) -> str | None:
"""The Agent App's bound roster agent id, if any (validate-endpoint context)."""
@@ -410,7 +431,7 @@ class AgentComposerService:
soul_lists, soul_truncated = soul_candidates(
agent_soul=agent_soul,
- dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
+ dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
)
truncated = truncated or soul_truncated
@@ -437,7 +458,7 @@ class AgentComposerService:
agent_soul = cls._load_agent_app_soul(tenant_id=tenant_id, app_id=app_id)
soul_lists, truncated = soul_candidates(
agent_soul=agent_soul,
- dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
+ dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
)
response = ComposerCandidatesResponse(
@@ -530,30 +551,6 @@ class AgentComposerService:
variables = WorkflowDraftVariableService(session=session).list_system_variables(app_id, user_id)
return [(variable.name, variable.value_type.value) for variable in variables.variables]
- @staticmethod
- def _dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
- """Tenant-scoped dataset lookup tolerating malformed ids.
-
- Mention ids come from user-editable prompt text; a non-UUID id can never
- match a dataset row, so it is simply absent from the result (-> missing/
- placeholder semantics) instead of breaking the UUID-typed query.
- """
- from uuid import UUID
-
- from services.dataset_service import DatasetService
-
- valid_ids: list[str] = []
- for dataset_id in dataset_ids:
- try:
- UUID(dataset_id)
- except (ValueError, TypeError):
- continue
- valid_ids.append(dataset_id)
- if not valid_ids:
- return {}
- rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
- return {str(row.id): row for row in rows}
-
@staticmethod
def _workspace_dify_tools(*, tenant_id: str, user_id: str) -> list[dict[str, Any]]:
"""Workspace Dify Plugin tools, same source as the tool selector.
diff --git a/api/services/agent/composer_validator.py b/api/services/agent/composer_validator.py
index a1d5ce07655..4a9d0a5a9a9 100644
--- a/api/services/agent/composer_validator.py
+++ b/api/services/agent/composer_validator.py
@@ -141,15 +141,15 @@ class ComposerConfigValidator:
cls,
payload: ComposerSavePayload,
*,
- existing_dataset_ids: set[str] | None = None,
+ existing_knowledge_set_ids: set[str] | None = None,
) -> dict[str, Any]:
"""ENG-617 §5.3/§5.4 soft findings — never block save.
``warnings`` carries ``mention_target_missing`` / ``mention_malformed``
- entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge
+ entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge-set
mentions with a placeholder name (0522 consensus) instead of dropping or
- rejecting them. With ``existing_dataset_ids`` provided, configured-but-
- deleted datasets surface as placeholders too.
+ rejecting them. With ``existing_knowledge_set_ids`` provided, mentions
+ that no longer exist in the current Agent Soul surface as placeholders too.
"""
warnings: list[dict[str, Any]] = []
placeholders: list[dict[str, str]] = []
@@ -181,7 +181,7 @@ class ComposerConfigValidator:
resolved = resolver(mention)
if mention.kind == MentionKind.KNOWLEDGE:
dangling = resolved is None or (
- existing_dataset_ids is not None and mention.ref_id not in existing_dataset_ids
+ existing_knowledge_set_ids is not None and mention.ref_id not in existing_knowledge_set_ids
)
if dangling:
placeholders.append(
diff --git a/api/services/agent/knowledge_datasets.py b/api/services/agent/knowledge_datasets.py
new file mode 100644
index 00000000000..962c562ce15
--- /dev/null
+++ b/api/services/agent/knowledge_datasets.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from typing import Any
+from uuid import UUID
+
+from models.agent_config_entities import AgentSoulConfig
+
+
+def list_agent_soul_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
+ """Return normalized unique knowledge dataset ids in config order.
+
+ Agent v2 knowledge dataset selection is owned by ``knowledge.sets``. This
+ helper keeps composer, workflow validation, candidates, and runtime
+ diagnostics aligned on the same normalization rules: strip whitespace, drop
+ blanks, preserve first-seen order, and deduplicate.
+ """
+ dataset_ids: list[str] = []
+ seen: set[str] = set()
+ for knowledge_set in agent_soul.knowledge.sets:
+ for dataset in knowledge_set.datasets:
+ dataset_id = (dataset.id or "").strip()
+ if not dataset_id or dataset_id in seen:
+ continue
+ seen.add(dataset_id)
+ dataset_ids.append(dataset_id)
+ return dataset_ids
+
+
+def get_tenant_knowledge_dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
+ """Return tenant-scoped dataset rows for normalized knowledge dataset ids.
+
+ Knowledge ids come from user-editable config. Malformed ids can never match
+ a dataset row, so they are treated as missing instead of breaking the
+ UUID-typed dataset lookup.
+ """
+ from services.dataset_service import DatasetService
+
+ valid_ids: list[str] = []
+ for dataset_id in dataset_ids:
+ try:
+ UUID(dataset_id)
+ except (TypeError, ValueError):
+ continue
+ valid_ids.append(dataset_id)
+
+ if not valid_ids:
+ return {}
+
+ rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
+ return {str(row.id): row for row in rows}
+
+
+def list_missing_tenant_knowledge_dataset_ids(*, tenant_id: str, agent_soul: AgentSoulConfig | None) -> list[str]:
+ """Return normalized knowledge dataset ids missing from the tenant scope."""
+ if agent_soul is None:
+ return []
+
+ dataset_ids = list_agent_soul_knowledge_dataset_ids(agent_soul)
+ if not dataset_ids:
+ return []
+
+ rows = get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=dataset_ids)
+ return [dataset_id for dataset_id in dataset_ids if dataset_id not in rows]
diff --git a/api/services/agent/prompt_mentions.py b/api/services/agent/prompt_mentions.py
index 27bed49c53b..cc35979a644 100644
--- a/api/services/agent/prompt_mentions.py
+++ b/api/services/agent/prompt_mentions.py
@@ -6,7 +6,7 @@ Slash-menu insertions are stored inline in the plain-string prompt as tokens:
``kind`` is a fixed lowercase word; ``id`` points at an item in the Agent
runtime context. For prompt-owned entities that means Agent Soul lists such as
-``tools`` / ``knowledge.datasets`` / ``human.contacts`` and workflow job lists
+``tools`` / ``knowledge.sets`` / ``human.contacts`` and workflow job lists
such as ``previous_node_output_refs`` / ``declared_outputs``. For drive-backed
``skill`` / ``file`` mentions the field stores a URL-encoded drive key and is
resolved against ``agent_drive_files`` at runtime. ``label`` is an optional
@@ -211,9 +211,9 @@ def build_soul_mention_resolver(agent_soul: AgentSoulConfig) -> MentionResolver:
if mention.ref_id in (cli_tool.id, cli_tool.name):
return cli_tool.name or cli_tool.id
case MentionKind.KNOWLEDGE:
- for dataset in agent_soul.knowledge.datasets:
- if mention.ref_id == dataset.id:
- return dataset.name or dataset.id
+ for knowledge_set in agent_soul.knowledge.sets:
+ if mention.ref_id == knowledge_set.id:
+ return knowledge_set.name or knowledge_set.id
case MentionKind.HUMAN:
return _resolve_human_contact(agent_soul.human.contacts, mention.ref_id)
case _:
diff --git a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py
index c91d0fd3e8a..3bb73289580 100644
--- a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py
+++ b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py
@@ -162,8 +162,15 @@ def test_request_builder_adds_knowledge_layer_when_configured():
run_input = _run_input()
run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
{
- "dataset_ids": ["dataset-1"],
- "retrieval": {"mode": "multiple", "top_k": 4},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ],
}
)
@@ -174,7 +181,7 @@ def test_request_builder_adds_knowledge_layer_when_configured():
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
- assert knowledge_config.dataset_ids == ["dataset-1"]
+ assert knowledge_config.sets[0].dataset_ids == ["dataset-1"]
def test_request_builder_can_delete_on_exit_for_cleanup_paths():
@@ -386,8 +393,15 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
run_input = _agent_app_input()
run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
{
- "dataset_ids": ["dataset-1", "dataset-2"],
- "retrieval": {"mode": "multiple", "top_k": 2},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 2},
+ }
+ ],
}
)
@@ -398,7 +412,7 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
- assert knowledge_config.dataset_ids == ["dataset-1", "dataset-2"]
+ assert knowledge_config.sets[0].dataset_ids == ["dataset-1", "dataset-2"]
# ── ENG-635 / ENG-638: ask_human layer injection + deferred_tool_results ─────
diff --git a/api/tests/unit_tests/commands/test_generate_swagger_specs.py b/api/tests/unit_tests/commands/test_generate_swagger_specs.py
index c30386c9d65..403fb0e94a4 100644
--- a/api/tests/unit_tests/commands/test_generate_swagger_specs.py
+++ b/api/tests/unit_tests/commands/test_generate_swagger_specs.py
@@ -149,3 +149,55 @@ def test_generate_specs_is_idempotent(tmp_path):
assert [path.name for path in first_paths] == [path.name for path in second_paths]
for first_path, second_path in zip(first_paths, second_paths):
assert first_path.read_text(encoding="utf-8") == second_path.read_text(encoding="utf-8")
+
+
+def test_generate_specs_include_agent_v2_knowledge_set_schema_and_query_enums(tmp_path):
+ module = _load_generate_swagger_specs_module()
+
+ written_paths = module.generate_specs(tmp_path)
+ console_path = next(path for path in written_paths if path.name == "console-openapi.json")
+ payload = json.loads(console_path.read_text(encoding="utf-8"))
+ schemas = payload["components"]["schemas"]
+
+ assert "AgentKnowledgeSetConfig" in schemas
+ assert schemas["AgentSoulKnowledgeConfig"]["properties"]["sets"]["items"]["$ref"] == (
+ "#/components/schemas/AgentKnowledgeSetConfig"
+ )
+ assert schemas["AgentKnowledgeQueryMode"]["enum"] == ["generated_query", "user_query"]
+
+
+def test_checked_in_agent_v2_knowledge_openapi_and_generated_contracts_are_in_sync():
+ api_dir = Path(__file__).resolve().parents[3]
+ repo_root = api_dir.parent
+
+ markdown = (api_dir / "openapi" / "markdown" / "console-openapi.md").read_text(encoding="utf-8")
+ agent_types = (
+ repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "types.gen.ts"
+ ).read_text(encoding="utf-8")
+ apps_types = (
+ repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "types.gen.ts"
+ ).read_text(encoding="utf-8")
+ agent_zod = (
+ repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "zod.gen.ts"
+ ).read_text(encoding="utf-8")
+ apps_zod = (
+ repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "zod.gen.ts"
+ ).read_text(encoding="utf-8")
+
+ assert "#### AgentKnowledgeSetConfig" in markdown
+ assert "#### AgentSoulKnowledgeConfig" in markdown
+ assert "#### AgentKnowledgeQueryMode" in markdown
+
+ for content in (agent_types, apps_types):
+ assert "export type AgentKnowledgeSetConfig = {" in content
+ assert "export type AgentSoulKnowledgeConfig = {" in content
+ assert "AgentKnowledgeQueryMode" in content
+ assert "generated_query" in content
+ assert "user_query" in content
+
+ for content in (agent_zod, apps_zod):
+ assert "export const zAgentKnowledgeSetConfig = z.object({" in content
+ assert "export const zAgentSoulKnowledgeConfig = z.object({" in content
+ assert "zAgentKnowledgeQueryMode = z.enum([" in content
+ assert "generated_query" in content
+ assert "user_query" in content
diff --git a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py
index 0d1483e1b79..43b6710f41d 100644
--- a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py
+++ b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py
@@ -153,12 +153,19 @@ class TestAgentAppRuntimeRequestBuilder:
"model": "gpt-4o-mini",
},
"knowledge": {
- "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
- "query_config": {
- "top_k": 3,
- "score_threshold": 0.5,
- "score_threshold_enabled": False,
- },
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {
+ "mode": "multiple",
+ "top_k": 3,
+ "score_threshold": None,
+ },
+ }
+ ],
},
}
)
@@ -173,10 +180,12 @@ class TestAgentAppRuntimeRequestBuilder:
assert knowledge.type == "dify.knowledge_base"
assert knowledge.deps == {"execution_context": "execution_context"}
dumped_config = knowledge.config.model_dump(mode="json", by_alias=True)
- assert dumped_config["dataset_ids"] == ["dataset-1", "dataset-2"]
- assert dumped_config["retrieval"]["mode"] == "multiple"
- assert dumped_config["retrieval"]["top_k"] == 3
- assert dumped_config["retrieval"]["score_threshold"] == 0.0
+ knowledge_set = dumped_config["sets"][0]
+ assert [dataset["id"] for dataset in knowledge_set["datasets"]] == ["dataset-1", "dataset-2"]
+ assert knowledge_set["query"] == {"mode": "generated_query", "value": None}
+ assert knowledge_set["retrieval"]["mode"] == "multiple"
+ assert knowledge_set["retrieval"]["top_k"] == 3
+ assert knowledge_set["retrieval"]["score_threshold"] == 0.0
def test_build_raises_when_model_missing(self):
builder = AgentAppRuntimeRequestBuilder(
diff --git a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py
index 78e49769159..cf9d1b797e3 100644
--- a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py
+++ b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py
@@ -512,12 +512,55 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
"model": "gpt-test",
},
"knowledge": {
- "datasets": [{"id": "dataset-1"}, {"id": " "}, {"id": "dataset-2"}],
- "query_config": {
- "top_k": 6,
- "score_threshold": 0.4,
- "score_threshold_enabled": True,
- },
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "description": "Support content",
+ "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {
+ "mode": "multiple",
+ "top_k": 6,
+ "score_threshold": 0.4,
+ "reranking_model": {"provider": "cohere", "model": "rerank-v3"},
+ "weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
+ },
+ "metadata_filtering": {
+ "mode": "manual",
+ "conditions": {
+ "logical_operator": "and",
+ "conditions": [
+ {"name": "category", "comparison_operator": "contains", "value": "auth"}
+ ],
+ },
+ },
+ },
+ {
+ "id": "release",
+ "name": "Release Notes",
+ "datasets": [{"id": "dataset-3"}],
+ "query": {"mode": "user_query", "value": "release notes"},
+ "retrieval": {
+ "mode": "single",
+ "model": {
+ "provider": "openai",
+ "name": "gpt-4o-mini",
+ "mode": "chat",
+ "completion_params": {"temperature": 0.2},
+ },
+ },
+ "metadata_filtering": {
+ "mode": "automatic",
+ "model_config": {
+ "provider": "openai",
+ "name": "gpt-4o-mini",
+ "mode": "chat",
+ "completion_params": {},
+ },
+ },
+ },
+ ],
},
}
),
@@ -531,25 +574,75 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
knowledge_layer = layers["knowledge"]
assert knowledge_layer["type"] == "dify.knowledge_base"
assert knowledge_layer["deps"] == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
- assert knowledge_layer["config"] == {
- "dataset_ids": ["dataset-1", "dataset-2"],
- "retrieval": {
- "mode": "multiple",
- "top_k": 6,
- "score_threshold": 0.4,
- "reranking_mode": "reranking_model",
- "reranking_enable": True,
- "reranking_model": None,
- "weights": None,
- "model": None,
+ assert knowledge_layer["config"]["sets"] == [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "description": "Support content",
+ "datasets": [
+ {"id": "dataset-1", "name": None, "description": None},
+ {"id": "dataset-2", "name": None, "description": None},
+ ],
+ "query": {"mode": "generated_query", "value": None},
+ "retrieval": {
+ "mode": "multiple",
+ "top_k": 6,
+ "score_threshold": 0.4,
+ "reranking_mode": "reranking_model",
+ "reranking_enable": True,
+ "reranking_model": {"provider": "cohere", "model": "rerank-v3"},
+ "weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
+ "model": None,
+ },
+ "metadata_filtering": {
+ "mode": "manual",
+ "metadata_model_config": None,
+ "conditions": {
+ "logical_operator": "and",
+ "conditions": [
+ {"name": "category", "comparison_operator": "contains", "value": "auth"}
+ ],
+ },
+ },
},
- "metadata_filtering": {"mode": "disabled", "metadata_model_config": None, "conditions": None},
- "max_result_content_chars": 2000,
- "max_observation_chars": 12000,
- }
+ {
+ "id": "release",
+ "name": "Release Notes",
+ "description": None,
+ "datasets": [{"id": "dataset-3", "name": None, "description": None}],
+ "query": {"mode": "user_query", "value": "release notes"},
+ "retrieval": {
+ "mode": "single",
+ "top_k": None,
+ "score_threshold": 0.0,
+ "reranking_mode": "reranking_model",
+ "reranking_enable": True,
+ "reranking_model": None,
+ "weights": None,
+ "model": {
+ "provider": "openai",
+ "name": "gpt-4o-mini",
+ "mode": "chat",
+ "completion_params": {"temperature": 0.2},
+ },
+ },
+ "metadata_filtering": {
+ "mode": "automatic",
+ "metadata_model_config": {
+ "provider": "openai",
+ "name": "gpt-4o-mini",
+ "mode": "chat",
+ "completion_params": {},
+ },
+ "conditions": None,
+ },
+ },
+ ]
+ assert knowledge_layer["config"]["max_result_content_chars"] == 2000
+ assert knowledge_layer["config"]["max_observation_chars"] == 12000
-def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits_it():
+def test_build_knowledge_layer_maps_disabled_score_threshold_to_zero():
context = _context()
snapshot = AgentConfigSnapshot(
id="snapshot-1",
@@ -565,8 +658,19 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
"model": "gpt-test",
},
"knowledge": {
- "datasets": [{"id": "dataset-1"}],
- "query_config": {},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {
+ "mode": "multiple",
+ "top_k": 4,
+ "score_threshold": None,
+ },
+ }
+ ],
},
}
),
@@ -577,10 +681,10 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
dumped = result.request.model_dump(mode="json")
knowledge_layer = next(layer for layer in dumped["composition"]["layers"] if layer["name"] == "knowledge")
- assert knowledge_layer["config"]["retrieval"]["top_k"] == 4
+ assert knowledge_layer["config"]["sets"][0]["retrieval"]["score_threshold"] == 0.0
-def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
+def test_build_skips_knowledge_layer_when_agent_soul_has_no_sets():
context = _context()
snapshot = AgentConfigSnapshot(
id="snapshot-1",
@@ -595,9 +699,7 @@ def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
"model_provider": "openai",
"model": "gpt-test",
},
- "knowledge": {
- "datasets": [{"id": " "}, {}],
- },
+ "knowledge": {"sets": []},
}
),
)
@@ -1094,7 +1196,15 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
soul = AgentSoulConfig.model_validate(
{
"knowledge": {
- "datasets": [{"id": "dataset-1", "name": "Product Docs"}],
+ "sets": [
+ {
+ "id": "product",
+ "name": "Product Docs",
+ "datasets": [{"id": "dataset-1", "name": "Product Docs"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ],
}
}
)
@@ -1106,13 +1216,13 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
assert all("knowledge" not in w["section"] for w in manifest["unsupported_runtime_warnings"])
-def test_feature_manifest_treats_blank_knowledge_dataset_ids_as_not_configured():
+def test_feature_manifest_treats_empty_knowledge_sets_as_not_configured():
from core.workflow.nodes.agent_v2.runtime_feature_manifest import build_runtime_feature_manifest
soul = AgentSoulConfig.model_validate(
{
"knowledge": {
- "datasets": [{"id": " "}, {}],
+ "sets": [],
}
}
)
diff --git a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py
index 440bd49e5c0..2254cd16d49 100644
--- a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py
+++ b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py
@@ -55,6 +55,33 @@ def _snapshot() -> AgentConfigSnapshot:
)
+def _snapshot_with_knowledge_dataset(dataset_id: str) -> AgentConfigSnapshot:
+ return AgentConfigSnapshot(
+ id="snapshot-1",
+ tenant_id="tenant-1",
+ agent_id="agent-1",
+ version=1,
+ config_snapshot=AgentSoulConfig(
+ model=AgentSoulModelConfig(
+ plugin_id="langgenius/openai",
+ model_provider="openai",
+ model="gpt-test",
+ ),
+ knowledge={
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": dataset_id}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ },
+ ),
+ )
+
+
def _graph(edges: list[dict]) -> dict:
return {
"nodes": [
@@ -515,6 +542,35 @@ def test_publish_validation_rejects_missing_file_ref():
)
+def test_publish_validation_rejects_missing_or_out_of_scope_knowledge_datasets(
+ monkeypatch: pytest.MonkeyPatch,
+):
+ dataset_id = "550e8400-e29b-41d4-a716-446655440000"
+ node_job = WorkflowNodeJobConfig.model_validate({})
+ snapshot = _snapshot_with_knowledge_dataset(dataset_id)
+ session = Mock()
+ session.scalar.side_effect = [_binding(node_job), _agent(), snapshot]
+
+ captured = {}
+
+ def fake_get_datasets_by_ids(ids, tenant_id):
+ captured["ids"] = ids
+ captured["tenant_id"] = tenant_id
+ return [], 0
+
+ import services.dataset_service as dataset_service_module
+
+ monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
+
+ with pytest.raises(WorkflowAgentNodeValidationError, match=dataset_id):
+ WorkflowAgentNodeValidator.validate_published_workflow(
+ session=session,
+ workflow=_workflow(_graph([{"source": "start", "target": "agent-node"}])),
+ )
+
+ assert captured == {"ids": [dataset_id], "tenant_id": "tenant-1"}
+
+
def test_publish_validation_accepts_tool_node_agentic_manual_mode():
session = Mock()
diff --git a/api/tests/unit_tests/services/agent/test_agent_composer_entities.py b/api/tests/unit_tests/services/agent/test_agent_composer_entities.py
index 089a5c74f3a..ba32222b70a 100644
--- a/api/tests/unit_tests/services/agent/test_agent_composer_entities.py
+++ b/api/tests/unit_tests/services/agent/test_agent_composer_entities.py
@@ -1,4 +1,5 @@
import pytest
+from pydantic import ValidationError
from models.agent_config_entities import AgentKnowledgeQueryMode, AgentSoulModelConfig, DeclaredOutputType
from services.agent.composer_service import AgentComposerService
@@ -91,14 +92,144 @@ def test_knowledge_query_mode_uses_stable_backend_enums():
config = AgentSoulConfig.model_validate(
{
"knowledge": {
- "datasets": [{"dataset_id": "dataset-1"}],
- "query_mode": "generated_query",
- "query_config": {"generation_prompt": "Create a retrieval query."},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ],
}
}
)
- assert config.knowledge.query_mode == AgentKnowledgeQueryMode.GENERATED_QUERY
+ assert config.knowledge.sets[0].query.mode == AgentKnowledgeQueryMode.GENERATED_QUERY
+
+
+@pytest.mark.parametrize(
+ ("knowledge_payload", "match"),
+ [
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ {
+ "id": "support",
+ "name": "Billing KB",
+ "datasets": [{"id": "dataset-2"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ ]
+ },
+ "knowledge set ids must be unique",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Shared KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ {
+ "id": "billing",
+ "name": "Shared KB",
+ "datasets": [{"id": "dataset-2"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ ]
+ },
+ "knowledge set names must be unique",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}, {"id": " dataset-1 "}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ ]
+ },
+ "knowledge set dataset ids must be unique",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "user_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ ]
+ },
+ "knowledge query.value is required for user_query mode",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "single"},
+ },
+ ]
+ },
+ "knowledge retrieval.model is required for single mode",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ "metadata_filtering": {"mode": "automatic"},
+ },
+ ]
+ },
+ "metadata_filtering.model_config is required for automatic mode",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ "metadata_filtering": {"mode": "manual"},
+ },
+ ]
+ },
+ "metadata_filtering.conditions is required for manual mode",
+ ),
+ ],
+)
+def test_knowledge_sets_contract_rejects_invalid_configs(knowledge_payload, match: str):
+ with pytest.raises(ValidationError, match=match):
+ AgentSoulConfig.model_validate({"knowledge": knowledge_payload})
def test_agent_soul_model_config_is_first_class_without_credentials():
diff --git a/api/tests/unit_tests/services/agent/test_agent_services.py b/api/tests/unit_tests/services/agent/test_agent_services.py
index 2cad3d81af1..36e7c2736bc 100644
--- a/api/tests/unit_tests/services/agent/test_agent_services.py
+++ b/api/tests/unit_tests/services/agent/test_agent_services.py
@@ -2594,20 +2594,151 @@ def test_dataset_rows_filters_malformed_ids(monkeypatch: pytest.MonkeyPatch):
return [], 0
import services.dataset_service as dataset_service_module
+ from services.agent.knowledge_datasets import get_tenant_knowledge_dataset_rows
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
valid = "550e8400-e29b-41d4-a716-446655440000"
- rows = AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
+ rows = get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
assert rows == {}
assert captured["ids"] == [valid]
# all-malformed input never touches the DB
captured.clear()
- assert AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
+ assert get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
assert captured == {}
+@pytest.mark.parametrize(
+ ("variant", "save_call"),
+ [
+ (
+ ComposerVariant.AGENT_APP,
+ lambda payload: AgentComposerService.save_agent_app_composer(
+ tenant_id="tenant-1",
+ app_id="app-1",
+ account_id="account-1",
+ payload=payload,
+ ),
+ ),
+ (
+ ComposerVariant.WORKFLOW,
+ lambda payload: AgentComposerService.save_workflow_composer(
+ tenant_id="tenant-1",
+ app_id="app-1",
+ node_id="node-1",
+ account_id="account-1",
+ payload=payload,
+ ),
+ ),
+ ],
+)
+def test_composer_save_rejects_malformed_knowledge_dataset_ids(monkeypatch: pytest.MonkeyPatch, variant, save_call):
+ captured = {"calls": 0}
+
+ def fake_get_datasets_by_ids(ids, tenant_id):
+ captured["calls"] += 1
+ captured["ids"] = ids
+ captured["tenant_id"] = tenant_id
+ return [], 0
+
+ import services.dataset_service as dataset_service_module
+
+ monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
+
+ payload = ComposerSavePayload.model_validate(
+ {
+ "variant": variant.value,
+ "save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
+ "soul_lock": {"locked": False},
+ "agent_soul": {
+ "knowledge": {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "not-a-uuid"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ }
+ },
+ }
+ )
+
+ with pytest.raises(InvalidComposerConfigError, match="not-a-uuid"):
+ save_call(payload)
+
+ assert captured == {"calls": 0}
+
+
+@pytest.mark.parametrize(
+ ("variant", "save_call"),
+ [
+ (
+ ComposerVariant.AGENT_APP,
+ lambda payload: AgentComposerService.save_agent_app_composer(
+ tenant_id="tenant-1",
+ app_id="app-1",
+ account_id="account-1",
+ payload=payload,
+ ),
+ ),
+ (
+ ComposerVariant.WORKFLOW,
+ lambda payload: AgentComposerService.save_workflow_composer(
+ tenant_id="tenant-1",
+ app_id="app-1",
+ node_id="node-1",
+ account_id="account-1",
+ payload=payload,
+ ),
+ ),
+ ],
+)
+def test_composer_save_rejects_missing_or_out_of_scope_knowledge_datasets(
+ monkeypatch: pytest.MonkeyPatch, variant, save_call
+):
+ captured = {}
+ missing_dataset_id = "550e8400-e29b-41d4-a716-446655440000"
+
+ def fake_get_datasets_by_ids(ids, tenant_id):
+ captured["ids"] = ids
+ captured["tenant_id"] = tenant_id
+ return [], 0
+
+ import services.dataset_service as dataset_service_module
+
+ monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
+
+ payload = ComposerSavePayload.model_validate(
+ {
+ "variant": variant.value,
+ "save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
+ "soul_lock": {"locked": False},
+ "agent_soul": {
+ "knowledge": {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": missing_dataset_id}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ }
+ },
+ }
+ )
+
+ with pytest.raises(InvalidComposerConfigError, match=missing_dataset_id):
+ save_call(payload)
+
+ assert captured == {"ids": [missing_dataset_id], "tenant_id": "tenant-1"}
+
+
def test_workspace_dify_tools_returns_provider_and_tool_granularities(monkeypatch: pytest.MonkeyPatch):
"""The slash-menu Tools tab needs both selection granularities: a provider
hosts many tools (like an MCP server), so candidates return one
diff --git a/api/tests/unit_tests/services/agent/test_composer_candidates.py b/api/tests/unit_tests/services/agent/test_composer_candidates.py
index 863ebafc994..5566d48c690 100644
--- a/api/tests/unit_tests/services/agent/test_composer_candidates.py
+++ b/api/tests/unit_tests/services/agent/test_composer_candidates.py
@@ -124,7 +124,18 @@ def _soul() -> AgentSoulConfig:
{"id": "ct-2", "name": "disabled-one", "enabled": False},
],
},
- "knowledge": {"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}]},
+ "knowledge": {
+ "sets": [
+ {
+ "id": "kb-1",
+ "name": "产品知识",
+ "description": "knowledge set",
+ "datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ },
"human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
}
)
@@ -143,12 +154,16 @@ def test_soul_candidates_lists_configured_items_only():
assert [item["name"] for item in lists["cli_tools"]] == ["ffmpeg"]
# the stable mention id flows through so the frontend can mint [§cli_tool:§]
assert [item["id"] for item in lists["cli_tools"]] == ["ct-1"]
- # enriched from DB; dangling dataset kept with missing flag (placeholder, 0522)
- knowledge = {item["id"]: item for item in lists["knowledge_datasets"]}
- assert knowledge["ds-1"]["name"] == "产品手册"
- assert knowledge["ds-1"]["missing"] is False
- assert knowledge["ds-gone"]["missing"] is True
- assert knowledge["ds-gone"]["name"] == "已删"
+ # Knowledge mentions point at set ids; nested datasets are hydrated for context.
+ knowledge_set = lists["knowledge_sets"][0]
+ assert knowledge_set["id"] == "kb-1"
+ assert knowledge_set["name"] == "产品知识"
+ assert knowledge_set["missing_dataset_ids"] == ["ds-gone"]
+ datasets = {item["id"]: item for item in knowledge_set["datasets"]}
+ assert datasets["ds-1"]["name"] == "产品手册"
+ assert datasets["ds-1"]["missing"] is False
+ assert datasets["ds-gone"]["missing"] is True
+ assert datasets["ds-gone"]["name"] == "已删"
assert lists["human_contacts"][0]["id"] == "c-1"
assert lists["dify_tools"][0]["id"] == "tavily/tavily_search"
diff --git a/api/tests/unit_tests/services/agent/test_composer_mention_validation.py b/api/tests/unit_tests/services/agent/test_composer_mention_validation.py
index ffbec86f4e6..f56ae3751e8 100644
--- a/api/tests/unit_tests/services/agent/test_composer_mention_validation.py
+++ b/api/tests/unit_tests/services/agent/test_composer_mention_validation.py
@@ -149,22 +149,32 @@ def test_dangling_knowledge_without_label_gets_fallback_name():
]
-def test_configured_but_deleted_dataset_surfaces_as_placeholder():
+def test_configured_but_deleted_knowledge_set_surfaces_as_placeholder():
payload = ComposerSavePayload.model_validate(
{
"variant": "agent_app",
"agent_soul": {
- "prompt": {"system_prompt": "see [§knowledge:ds-1:产品手册§]"},
- "knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
+ "prompt": {"system_prompt": "see [§knowledge:kb-1:产品手册§]"},
+ "knowledge": {
+ "sets": [
+ {
+ "id": "kb-1",
+ "name": "产品手册",
+ "datasets": [{"id": "ds-1", "name": "产品手册"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ },
},
"save_strategy": "save_to_current_version",
}
)
- # configured + DB row exists -> clean
- assert _findings(payload, existing_dataset_ids={"ds-1"})["knowledge_retrieval_placeholder"] == []
- # configured but deleted in DB -> placeholder
- assert _findings(payload, existing_dataset_ids=set())["knowledge_retrieval_placeholder"] == [
- {"id": "ds-1", "placeholder_name": "产品手册"}
+ # configured + current Agent Soul row exists -> clean
+ assert _findings(payload, existing_knowledge_set_ids={"kb-1"})["knowledge_retrieval_placeholder"] == []
+ # configured but removed from the current Agent Soul surface -> placeholder
+ assert _findings(payload, existing_knowledge_set_ids=set())["knowledge_retrieval_placeholder"] == [
+ {"id": "kb-1", "placeholder_name": "产品手册"}
]
diff --git a/api/tests/unit_tests/services/agent/test_prompt_mentions.py b/api/tests/unit_tests/services/agent/test_prompt_mentions.py
index b8b908d432f..5bc614f4e49 100644
--- a/api/tests/unit_tests/services/agent/test_prompt_mentions.py
+++ b/api/tests/unit_tests/services/agent/test_prompt_mentions.py
@@ -107,7 +107,17 @@ def soul() -> AgentSoulConfig:
],
"cli_tools": [{"id": "ct-1", "name": "ffmpeg"}],
},
- "knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
+ "knowledge": {
+ "sets": [
+ {
+ "id": "kb-1",
+ "name": "产品手册",
+ "datasets": [{"id": "ds-1", "name": "产品手册"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ },
"human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
}
)
@@ -117,7 +127,7 @@ def test_soul_resolver_resolves_each_kind(soul: AgentSoulConfig):
resolver = build_soul_mention_resolver(soul)
prompt = (
"Use [§tool:tavily/tavily_search:tavily§], run [§cli_tool:ct-1:ffmpeg§], "
- "ground in [§knowledge:ds-1§], ask [§human:c-1§]."
+ "ground in [§knowledge:kb-1§], ask [§human:c-1§]."
)
expanded = expand_prompt_mentions(prompt, resolver)
diff --git a/dify-agent/src/dify_agent/layers/knowledge/__init__.py b/dify-agent/src/dify_agent/layers/knowledge/__init__.py
index 569512d8004..86a9405bce3 100644
--- a/dify-agent/src/dify_agent/layers/knowledge/__init__.py
+++ b/dify-agent/src/dify_agent/layers/knowledge/__init__.py
@@ -7,21 +7,31 @@ root stays import-safe for callers that only need to construct run requests.
from dify_agent.layers.knowledge.configs import (
DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
DifyKnowledgeBaseLayerConfig,
+ DifyKnowledgeDatasetConfig,
+ DifyKnowledgeEagerResult,
DifyKnowledgeMetadataCondition,
DifyKnowledgeMetadataConditions,
DifyKnowledgeMetadataFilteringConfig,
DifyKnowledgeModelConfig,
+ DifyKnowledgeQueryConfig,
DifyKnowledgeRerankingModelConfig,
DifyKnowledgeRetrievalConfig,
+ DifyKnowledgeRuntimeState,
+ DifyKnowledgeSetConfig,
)
__all__ = [
"DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
"DifyKnowledgeBaseLayerConfig",
+ "DifyKnowledgeDatasetConfig",
+ "DifyKnowledgeEagerResult",
"DifyKnowledgeMetadataCondition",
"DifyKnowledgeMetadataConditions",
"DifyKnowledgeMetadataFilteringConfig",
"DifyKnowledgeModelConfig",
+ "DifyKnowledgeQueryConfig",
"DifyKnowledgeRerankingModelConfig",
"DifyKnowledgeRetrievalConfig",
+ "DifyKnowledgeRuntimeState",
+ "DifyKnowledgeSetConfig",
]
diff --git a/dify-agent/src/dify_agent/layers/knowledge/configs.py b/dify-agent/src/dify_agent/layers/knowledge/configs.py
index 9ada075d1cc..b7b71ab9c42 100644
--- a/dify-agent/src/dify_agent/layers/knowledge/configs.py
+++ b/dify-agent/src/dify_agent/layers/knowledge/configs.py
@@ -1,12 +1,11 @@
"""Client-safe DTOs for the Dify knowledge-base Agenton layer.
-The public layer config exposes only static retrieval controls: dataset ids,
-retrieval strategy, metadata filtering, and observation-size limits. The agent
-model itself should only ever see a single ``query`` tool argument; tenant/
-app/user context comes from the execution-context layer and the actual
-retrieval is delegated to the Dify API inner endpoint. Tool naming is not
-caller-configurable: the runtime always exposes the same stable knowledge-base
-search tool.
+The public layer config carries one or more named knowledge sets. Each set owns
+its dataset ids plus query, retrieval, and metadata-filtering policy. Generated-
+query sets are exposed through one stable model-visible search tool whose
+schema lets the model pick ``set_name`` and ``query``; user-query sets are
+retrieved eagerly when the layer enters a run and their formatted observations
+are kept only in JSON-safe ``runtime_state`` for session snapshots.
"""
from __future__ import annotations
@@ -61,6 +60,44 @@ class DifyKnowledgeRerankingModelConfig(BaseModel):
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+class DifyKnowledgeDatasetConfig(BaseModel):
+ """One dataset selected by a knowledge set.
+
+ Only ``id`` is used for retrieval. ``name`` and ``description`` are retained
+ because callers already have them and they are useful in runtime/debug
+ snapshots without changing the inner retrieval request contract.
+ """
+
+ id: str
+ name: str | None = None
+ description: str | None = None
+
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+ @field_validator("id")
+ @classmethod
+ def validate_id(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("dataset id must not be blank")
+ return normalized
+
+
+class DifyKnowledgeQueryConfig(BaseModel):
+ """Query policy for one knowledge set."""
+
+ mode: Literal["user_query", "generated_query"]
+ value: str | None = None
+
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+ @model_validator(mode="after")
+ def validate_mode_specific_fields(self) -> DifyKnowledgeQueryConfig:
+ if self.mode == "user_query" and not (self.value or "").strip():
+ raise ValueError("query.value is required for user_query mode")
+ return self
+
+
class DifyKnowledgeRetrievalConfig(BaseModel):
"""Static retrieval controls mirrored into the inner API request."""
@@ -151,38 +188,90 @@ class DifyKnowledgeMetadataFilteringConfig(BaseModel):
return payload
-class DifyKnowledgeBaseLayerConfig(LayerConfig):
- """Public config for one model-visible knowledge search tool.
+class DifyKnowledgeSetConfig(BaseModel):
+ """One independently searchable or eagerly-preloaded knowledge set."""
- The model only gets to choose whether to call the tool and what ``query``
- to send. Dataset ids, retrieval settings, metadata filtering, and caller
- context remain config/runtime concerns outside the model-visible tool
- schema. The tool name and description are fixed by the layer runtime and do
- not appear in the public config DTO.
- """
-
- dataset_ids: list[str]
+ id: str
+ name: str
+ description: str | None = None
+ datasets: list[DifyKnowledgeDatasetConfig]
+ query: DifyKnowledgeQueryConfig
retrieval: DifyKnowledgeRetrievalConfig
metadata_filtering: DifyKnowledgeMetadataFilteringConfig = Field(
default_factory=DifyKnowledgeMetadataFilteringConfig
)
+
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+ @field_validator("id", "name")
+ @classmethod
+ def validate_non_blank_identity(cls, value: str) -> str:
+ normalized = value.strip()
+ if not normalized:
+ raise ValueError("knowledge set id and name must not be blank")
+ return normalized
+
+ @model_validator(mode="after")
+ def validate_dataset_ids(self) -> DifyKnowledgeSetConfig:
+ if not self.datasets:
+ raise ValueError("knowledge set requires at least one dataset")
+ dataset_ids = [dataset.id for dataset in self.datasets]
+ if len(dataset_ids) != len(set(dataset_ids)):
+ raise ValueError("knowledge set dataset ids must be unique")
+ return self
+
+ @property
+ def dataset_ids(self) -> list[str]:
+ """Return the selected dataset ids for the inner retrieval request."""
+ return [dataset.id for dataset in self.datasets]
+
+
+class DifyKnowledgeEagerResult(BaseModel):
+ """JSON-safe eager user-query result stored in layer runtime state."""
+
+ set_id: str
+ set_name: str
+ query: str
+ observation: str
+ status: Literal["success", "empty", "temporarily_unavailable"]
+
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
+
+
+class DifyKnowledgeRuntimeState(BaseModel):
+ """Serializable eager-retrieval state stored in Agenton session snapshots."""
+
+ eager_config_fingerprint: str | None = None
+ eager_results: list[DifyKnowledgeEagerResult] = Field(default_factory=list)
+
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", validate_assignment=True)
+
+
+class DifyKnowledgeBaseLayerConfig(LayerConfig):
+ """Public config for one knowledge-base layer.
+
+ The model-visible surface stays fixed to ``knowledge_base_search``. Set
+ names are the only model-visible selection labels; dataset ids, retrieval
+ controls, metadata filtering, and caller identity remain config/runtime
+ concerns outside the tool schema.
+ """
+
+ sets: list[DifyKnowledgeSetConfig]
max_result_content_chars: int = Field(default=2000, ge=1)
max_observation_chars: int = Field(default=12000, ge=1)
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
- @field_validator("dataset_ids")
- @classmethod
- def validate_dataset_ids(cls, value: list[str]) -> list[str]:
- if not value:
- raise ValueError("dataset_ids must contain at least one item")
- normalized_ids = [item.strip() for item in value]
- if any(not item for item in normalized_ids):
- raise ValueError("dataset_ids must not contain blank items")
- return normalized_ids
-
@model_validator(mode="after")
- def validate_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
+ def validate_sets_and_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
+ if not self.sets:
+ raise ValueError("sets must contain at least one knowledge set")
+ set_ids = [knowledge_set.id for knowledge_set in self.sets]
+ if len(set_ids) != len(set(set_ids)):
+ raise ValueError("knowledge set ids must be unique")
+ normalized_names = [knowledge_set.name.strip().lower() for knowledge_set in self.sets]
+ if len(normalized_names) != len(set(normalized_names)):
+ raise ValueError("knowledge set names must be unique")
if self.max_observation_chars < self.max_result_content_chars:
raise ValueError("max_observation_chars must be greater than or equal to max_result_content_chars")
return self
@@ -191,10 +280,15 @@ class DifyKnowledgeBaseLayerConfig(LayerConfig):
__all__ = [
"DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
"DifyKnowledgeBaseLayerConfig",
+ "DifyKnowledgeDatasetConfig",
+ "DifyKnowledgeEagerResult",
"DifyKnowledgeMetadataCondition",
"DifyKnowledgeMetadataConditions",
"DifyKnowledgeMetadataFilteringConfig",
"DifyKnowledgeModelConfig",
+ "DifyKnowledgeQueryConfig",
"DifyKnowledgeRerankingModelConfig",
"DifyKnowledgeRetrievalConfig",
+ "DifyKnowledgeRuntimeState",
+ "DifyKnowledgeSetConfig",
]
diff --git a/dify-agent/src/dify_agent/layers/knowledge/layer.py b/dify-agent/src/dify_agent/layers/knowledge/layer.py
index 02c9f07dd56..df07dc3cd36 100644
--- a/dify-agent/src/dify_agent/layers/knowledge/layer.py
+++ b/dify-agent/src/dify_agent/layers/knowledge/layer.py
@@ -1,17 +1,18 @@
-"""Dify knowledge-base layer exposing one model-visible search tool.
+"""Dify knowledge-base layer exposing set-aware retrieval.
The layer depends on ``DifyExecutionContextLayer`` for tenant/app/user/invoke
-identity, keeps retrieval controls in config only, and borrows a lifespan-owned
-HTTP client for each tool invocation. It never owns live clients or stores
-retrieved source content in layer state. Tool identity is intentionally fixed at
-runtime: callers cannot rename the knowledge tool or override its description
-through public layer config because the model-visible surface must stay stable
-across API-side Agent Soul mappings.
+identity. Generated-query sets become one stable model-visible
+``knowledge_base_search(set_name, query)`` tool, while user-query sets are
+retrieved eagerly during context entry and exposed as additional user prompt
+content. Eager observations are persisted only as JSON-safe runtime state so
+Agenton session snapshots can resume without repeating unchanged retrievals.
"""
from __future__ import annotations
from dataclasses import dataclass
+import hashlib
+import json
import logging
from typing import ClassVar, cast
@@ -27,7 +28,13 @@ from dify_agent.layers.knowledge.client import (
DifyKnowledgeBaseClientError,
DifyKnowledgeRetrieveResponse,
)
-from dify_agent.layers.knowledge.configs import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
+from dify_agent.layers.knowledge.configs import (
+ DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
+ DifyKnowledgeBaseLayerConfig,
+ DifyKnowledgeEagerResult,
+ DifyKnowledgeRuntimeState,
+ DifyKnowledgeSetConfig,
+)
logger = logging.getLogger(__name__)
@@ -35,23 +42,14 @@ logger = logging.getLogger(__name__)
# public DTO cannot grow a parallel naming contract that diverges from the
# runtime knowledge-search surface.
_KNOWLEDGE_BASE_TOOL_NAME = "knowledge_base_search"
-_KNOWLEDGE_BASE_TOOL_DESCRIPTION = "Search configured knowledge bases for information relevant to the query."
+_KNOWLEDGE_BASE_TOOL_DESCRIPTION = (
+ "Search a configured knowledge set. Pick one configured set_name and provide a focused search query."
+)
BLANK_QUERY_OBSERVATION = "knowledge base search requires a non-empty query"
NO_RESULTS_OBSERVATION = "No relevant knowledge base results were found."
TEMPORARY_UNAVAILABLE_OBSERVATION = (
"Knowledge base search is temporarily unavailable. Please continue without it if possible."
)
-QUERY_TOOL_SCHEMA = {
- "type": "object",
- "properties": {
- "query": {
- "type": "string",
- "description": "Search query for the configured knowledge bases.",
- }
- },
- "required": ["query"],
- "additionalProperties": False,
-}
class DifyKnowledgeBaseDeps(LayerDeps):
@@ -61,8 +59,10 @@ class DifyKnowledgeBaseDeps(LayerDeps):
@dataclass(slots=True)
-class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig]):
- """Layer that resolves one config-scoped knowledge search tool."""
+class DifyKnowledgeBaseLayer(
+ PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig, DifyKnowledgeRuntimeState]
+):
+ """Layer that resolves set-scoped knowledge tools and eager user prompts."""
type_id: ClassVar[str | None] = DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
@@ -95,7 +95,7 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
)
async def get_tools(self, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
- """Build one Pydantic AI tool that exposes only ``query`` to the model.
+ """Build the unified generated-query Pydantic AI tool, when needed.
Knowledge tools depend on execution-context identity that is optional for
other run types but mandatory here: ``tenant_id``, ``user_id``,
@@ -103,11 +103,15 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
any HTTP request is attempted. Tool execution then follows a strict
observation policy:
+ - unknown ``set_name`` returns a local validation observation;
- blank ``query`` returns a local validation observation;
- retryable client failures (timeouts, connection failures, HTTP
``429``/``502``) become a temporary-unavailable observation;
- non-retryable client failures are raised so the run fails fast.
"""
+ generated_sets = self._generated_query_sets()
+ if not generated_sets:
+ return []
if http_client.is_closed:
raise RuntimeError("DifyKnowledgeBaseLayer.get_tools() requires an open shared HTTP client.")
@@ -118,54 +122,28 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
api_key=self.inner_api_key,
http_client=http_client,
)
+ set_by_name = {knowledge_set.name: knowledge_set for knowledge_set in generated_sets}
- async def knowledge_base_search(_ctx: RunContext[object], query: str) -> str:
+ async def knowledge_base_search(_ctx: RunContext[object], set_name: str, query: str) -> str:
+ knowledge_set = set_by_name.get(set_name)
+ if knowledge_set is None:
+ return f"unknown knowledge set: {set_name}"
normalized_query = query.strip()
if not normalized_query:
return BLANK_QUERY_OBSERVATION
- try:
- response = await client.retrieve(
- tenant_id=caller["tenant_id"],
- user_id=caller["user_id"],
- app_id=caller["app_id"],
- user_from=caller["user_from"],
- invoke_from=caller["invoke_from"],
- dataset_ids=list(self.config.dataset_ids),
- query=normalized_query,
- retrieval=self.config.retrieval,
- metadata_filtering=self.config.metadata_filtering,
- )
- except DifyKnowledgeBaseClientError as exc:
- if exc.retryable:
- logger.warning(
- "knowledge base search temporarily unavailable",
- extra={
- "tenant_id": caller["tenant_id"],
- "app_id": caller["app_id"],
- "invoke_from": caller["invoke_from"],
- "error_code": exc.error_code,
- "status_code": exc.status_code,
- },
- )
- return TEMPORARY_UNAVAILABLE_OBSERVATION
- logger.error(
- "knowledge base search failed",
- extra={
- "tenant_id": caller["tenant_id"],
- "app_id": caller["app_id"],
- "invoke_from": caller["invoke_from"],
- "error_code": exc.error_code,
- "status_code": exc.status_code,
- },
- )
- raise
- return _format_observation(response, self.config)
+ return await self._retrieve_for_set(
+ client=client,
+ caller=caller,
+ knowledge_set=knowledge_set,
+ query=normalized_query,
+ retryable_observation=True,
+ )
async def prepare_tool_definition(_ctx: RunContext[object], tool_def: ToolDefinition) -> ToolDefinition:
return ToolDefinition(
name=tool_def.name,
description=tool_def.description,
- parameters_json_schema=QUERY_TOOL_SCHEMA,
+ parameters_json_schema=_tool_schema(generated_sets),
strict=tool_def.strict,
sequential=tool_def.sequential,
metadata=tool_def.metadata,
@@ -181,11 +159,177 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
knowledge_base_search,
takes_ctx=True,
name=_KNOWLEDGE_BASE_TOOL_NAME,
- description=_KNOWLEDGE_BASE_TOOL_DESCRIPTION,
+ description=_tool_description(generated_sets),
prepare=prepare_tool_definition,
)
]
+ @property
+ @override
+ def user_prompts(self) -> list[str]:
+ """Expose eager user-query results as an additional user prompt."""
+ if not self.runtime_state.eager_results:
+ return []
+
+ sections: list[str] = []
+ for result in self.runtime_state.eager_results:
+ sections.append(
+ "\n".join(
+ [
+ f"Set: {result.set_name}",
+ f"Query: {result.query}",
+ "Results:",
+ result.observation,
+ ]
+ )
+ )
+ return ["Knowledge retrieval results:\n\n" + "\n\n".join(sections)]
+
+ @override
+ async def on_context_create(self) -> None:
+ await self._refresh_eager_results_if_needed()
+
+ @override
+ async def on_context_resume(self) -> None:
+ await self._refresh_eager_results_if_needed()
+
+ def _generated_query_sets(self) -> list[DifyKnowledgeSetConfig]:
+ return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "generated_query"]
+
+ def _user_query_sets(self) -> list[DifyKnowledgeSetConfig]:
+ return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "user_query"]
+
+ async def _refresh_eager_results_if_needed(self) -> None:
+ user_query_sets = self._user_query_sets()
+ if not user_query_sets:
+ self.runtime_state.eager_config_fingerprint = None
+ self.runtime_state.eager_results = []
+ return
+
+ fingerprint = _eager_config_fingerprint(user_query_sets)
+ if self.runtime_state.eager_config_fingerprint == fingerprint:
+ return
+
+ caller = _build_caller_context(self.deps.execution_context.config)
+ async with httpx.AsyncClient() as http_client:
+ client = DifyKnowledgeBaseClient(
+ base_url=self.inner_api_url,
+ api_key=self.inner_api_key,
+ http_client=http_client,
+ )
+ eager_results: list[DifyKnowledgeEagerResult] = []
+ for knowledge_set in user_query_sets:
+ query = (knowledge_set.query.value or "").strip()
+ try:
+ response = await client.retrieve(
+ tenant_id=caller["tenant_id"],
+ user_id=caller["user_id"],
+ app_id=caller["app_id"],
+ user_from=caller["user_from"],
+ invoke_from=caller["invoke_from"],
+ dataset_ids=knowledge_set.dataset_ids,
+ query=query,
+ retrieval=knowledge_set.retrieval,
+ metadata_filtering=knowledge_set.metadata_filtering,
+ )
+ except DifyKnowledgeBaseClientError as exc:
+ if exc.retryable:
+ logger.warning(
+ "eager knowledge retrieval temporarily unavailable",
+ extra={
+ "tenant_id": caller["tenant_id"],
+ "app_id": caller["app_id"],
+ "invoke_from": caller["invoke_from"],
+ "knowledge_set_id": knowledge_set.id,
+ "error_code": exc.error_code,
+ "status_code": exc.status_code,
+ },
+ )
+ eager_results.append(
+ DifyKnowledgeEagerResult(
+ set_id=knowledge_set.id,
+ set_name=knowledge_set.name,
+ query=query,
+ observation=TEMPORARY_UNAVAILABLE_OBSERVATION,
+ status="temporarily_unavailable",
+ )
+ )
+ continue
+ logger.error(
+ "eager knowledge retrieval failed",
+ extra={
+ "tenant_id": caller["tenant_id"],
+ "app_id": caller["app_id"],
+ "invoke_from": caller["invoke_from"],
+ "knowledge_set_id": knowledge_set.id,
+ "error_code": exc.error_code,
+ "status_code": exc.status_code,
+ },
+ )
+ raise
+
+ eager_results.append(
+ DifyKnowledgeEagerResult(
+ set_id=knowledge_set.id,
+ set_name=knowledge_set.name,
+ query=query,
+ observation=_format_observation(response, self.config, include_heading=False),
+ status="success" if response.results else "empty",
+ )
+ )
+
+ self.runtime_state.eager_results = eager_results
+ self.runtime_state.eager_config_fingerprint = fingerprint
+
+ async def _retrieve_for_set(
+ self,
+ *,
+ client: DifyKnowledgeBaseClient,
+ caller: dict[str, str],
+ knowledge_set: DifyKnowledgeSetConfig,
+ query: str,
+ retryable_observation: bool,
+ ) -> str:
+ try:
+ response = await client.retrieve(
+ tenant_id=caller["tenant_id"],
+ user_id=caller["user_id"],
+ app_id=caller["app_id"],
+ user_from=caller["user_from"],
+ invoke_from=caller["invoke_from"],
+ dataset_ids=knowledge_set.dataset_ids,
+ query=query,
+ retrieval=knowledge_set.retrieval,
+ metadata_filtering=knowledge_set.metadata_filtering,
+ )
+ except DifyKnowledgeBaseClientError as exc:
+ if exc.retryable and retryable_observation:
+ logger.warning(
+ "knowledge base search temporarily unavailable",
+ extra={
+ "tenant_id": caller["tenant_id"],
+ "app_id": caller["app_id"],
+ "invoke_from": caller["invoke_from"],
+ "knowledge_set_id": knowledge_set.id,
+ "error_code": exc.error_code,
+ "status_code": exc.status_code,
+ },
+ )
+ return TEMPORARY_UNAVAILABLE_OBSERVATION
+ logger.error(
+ "knowledge base search failed",
+ extra={
+ "tenant_id": caller["tenant_id"],
+ "app_id": caller["app_id"],
+ "invoke_from": caller["invoke_from"],
+ "knowledge_set_id": knowledge_set.id,
+ "error_code": exc.error_code,
+ "status_code": exc.status_code,
+ },
+ )
+ raise
+ return _format_observation(response, self.config)
+
def _build_caller_context(execution_context: object) -> dict[str, str]:
"""Extract the inner-API caller identity from execution-context config.
@@ -232,7 +376,56 @@ def _build_caller_context(execution_context: object) -> dict[str, str]:
}
-def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKnowledgeBaseLayerConfig) -> str:
+def _tool_schema(generated_sets: list[DifyKnowledgeSetConfig]) -> dict[str, object]:
+ return {
+ "type": "object",
+ "properties": {
+ "set_name": {
+ "type": "string",
+ "enum": [knowledge_set.name for knowledge_set in generated_sets],
+ "description": "Knowledge set to search.",
+ },
+ "query": {
+ "type": "string",
+ "description": "Search query for the selected knowledge set.",
+ },
+ },
+ "required": ["set_name", "query"],
+ "additionalProperties": False,
+ }
+
+
+def _tool_description(generated_sets: list[DifyKnowledgeSetConfig]) -> str:
+ set_descriptions = []
+ for knowledge_set in generated_sets:
+ if knowledge_set.description:
+ set_descriptions.append(f"{knowledge_set.name}: {knowledge_set.description}")
+ else:
+ set_descriptions.append(knowledge_set.name)
+ return f"{_KNOWLEDGE_BASE_TOOL_DESCRIPTION} Configured sets: {', '.join(set_descriptions)}."
+
+
+def _eager_config_fingerprint(user_query_sets: list[DifyKnowledgeSetConfig]) -> str:
+ payload = [
+ {
+ "id": knowledge_set.id,
+ "query": knowledge_set.query.model_dump(mode="json"),
+ "dataset_ids": knowledge_set.dataset_ids,
+ "retrieval": knowledge_set.retrieval.model_dump(mode="json"),
+ "metadata_filtering": knowledge_set.metadata_filtering.model_dump(mode="json", by_alias=True),
+ }
+ for knowledge_set in user_query_sets
+ ]
+ serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
+ return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
+
+
+def _format_observation(
+ response: DifyKnowledgeRetrieveResponse,
+ config: DifyKnowledgeBaseLayerConfig,
+ *,
+ include_heading: bool = True,
+) -> str:
"""Render inner-API retrieval results into the model-visible tool response.
The formatting contract is intentionally simple and stable for the model:
@@ -248,7 +441,7 @@ def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKno
if not response.results:
return NO_RESULTS_OBSERVATION
- lines = ["Knowledge base search results:"]
+ lines = ["Knowledge base search results:"] if include_heading else []
for index, result in enumerate(response.results, start=1):
metadata = result.metadata
title = result.title or metadata.document_name or "Untitled"
@@ -280,6 +473,5 @@ __all__ = [
"DifyKnowledgeBaseDeps",
"DifyKnowledgeBaseLayer",
"NO_RESULTS_OBSERVATION",
- "QUERY_TOOL_SCHEMA",
"TEMPORARY_UNAVAILABLE_OBSERVATION",
]
diff --git a/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py b/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py
index f28939e329b..dbe8fddcbec 100644
--- a/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py
+++ b/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py
@@ -6,46 +6,142 @@ from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig
def _valid_config() -> dict[str, object]:
return {
- "dataset_ids": ["dataset-1"],
- "retrieval": {
- "mode": "multiple",
- "top_k": 4,
- },
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {
+ "mode": "multiple",
+ "top_k": 4,
+ },
+ }
+ ],
}
def test_knowledge_base_config_accepts_valid_multiple_mode() -> None:
config = DifyKnowledgeBaseLayerConfig.model_validate(_valid_config())
- assert config.dataset_ids == ["dataset-1"]
- assert config.retrieval.top_k == 4
- assert config.metadata_filtering.mode == "disabled"
+ assert config.sets[0].dataset_ids == ["dataset-1"]
+ assert config.sets[0].retrieval.top_k == 4
+ assert config.sets[0].metadata_filtering.mode == "disabled"
@pytest.mark.parametrize(
"payload, expected_message",
[
- ({"dataset_ids": [], "retrieval": {"mode": "multiple", "top_k": 4}}, "dataset_ids"),
+ ({"sets": []}, "sets"),
({"tool_name": "knowledge_base_search", **_valid_config()}, "Extra inputs are not permitted"),
({"tool_description": "Search knowledge", **_valid_config()}, "Extra inputs are not permitted"),
- ({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "multiple"}}, "top_k"),
- ({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "single"}}, "retrieval.model"),
(
{
- "dataset_ids": ["dataset-1"],
- "retrieval": {"mode": "multiple", "top_k": 4},
- "metadata_filtering": {"mode": "automatic"},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": ""}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ },
+ "dataset id",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "user_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ]
+ },
+ "query.value",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple"},
+ }
+ ]
+ },
+ "top_k",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "single"},
+ }
+ ]
+ },
+ "retrieval.model",
+ ),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ "metadata_filtering": {"mode": "automatic"},
+ }
+ ],
},
"metadata_filtering.model_config",
),
(
{
- "dataset_ids": ["dataset-1"],
- "retrieval": {"mode": "multiple", "top_k": 4},
- "metadata_filtering": {"mode": "manual"},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ "metadata_filtering": {"mode": "manual"},
+ }
+ ],
},
"metadata_filtering.conditions",
),
+ (
+ {
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ {
+ "id": "docs",
+ "name": "support kb",
+ "datasets": [{"id": "dataset-2"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ },
+ ]
+ },
+ "names must be unique",
+ ),
],
)
def test_knowledge_base_config_rejects_invalid_inputs(payload: dict[str, object], expected_message: str) -> None:
@@ -57,8 +153,7 @@ def test_knowledge_base_config_rejects_observation_limit_smaller_than_result_lim
with pytest.raises(ValidationError, match="max_observation_chars"):
_ = DifyKnowledgeBaseLayerConfig.model_validate(
{
- "dataset_ids": ["dataset-1"],
- "retrieval": {"mode": "multiple", "top_k": 4},
+ **_valid_config(),
"max_result_content_chars": 50,
"max_observation_chars": 20,
}
diff --git a/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py b/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py
index 28fadcb903b..ed6c798b409 100644
--- a/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py
+++ b/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py
@@ -8,7 +8,11 @@ from pydantic_ai import Tool
from agenton.compositor import Compositor, LayerNode, LayerProvider
from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig
from dify_agent.layers.execution_context.layer import DifyExecutionContextLayer
-from dify_agent.layers.knowledge.client import DifyKnowledgeBaseClientError
+from dify_agent.layers.knowledge.client import (
+ DifyKnowledgeBaseClient,
+ DifyKnowledgeBaseClientError,
+ DifyKnowledgeRetrieveResponse,
+)
from dify_agent.layers.knowledge.configs import DifyKnowledgeBaseLayerConfig
from dify_agent.layers.knowledge.layer import (
BLANK_QUERY_OBSERVATION,
@@ -32,10 +36,23 @@ def _execution_context_config(**overrides: object) -> DifyExecutionContextLayerC
def _knowledge_config(**overrides: object) -> DifyKnowledgeBaseLayerConfig:
- payload: dict[str, object] = {
- "dataset_ids": ["dataset-1"],
+ set_payload: dict[str, object] = {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
+ for key in ("id", "name", "description", "datasets", "query", "retrieval", "metadata_filtering"):
+ if key in overrides:
+ set_payload[key] = overrides.pop(key)
+ if "dataset_ids" in overrides:
+ dataset_ids = overrides.pop("dataset_ids")
+ assert isinstance(dataset_ids, list)
+ set_payload["datasets"] = [{"id": dataset_id} for dataset_id in dataset_ids]
+ payload: dict[str, object] = {
+ "sets": [set_payload],
+ }
payload.update(overrides)
return DifyKnowledgeBaseLayerConfig.model_validate(payload)
@@ -62,7 +79,7 @@ def _knowledge_provider() -> LayerProvider[DifyKnowledgeBaseLayer]:
)
-def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
+def test_knowledge_layer_exposes_one_set_scoped_tool_definition() -> None:
async def scenario() -> None:
compositor = Compositor(
[
@@ -82,20 +99,23 @@ def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
tool_def = await tool.prepare_tool_def(None) # pyright: ignore[reportArgumentType]
assert isinstance(tool, Tool)
assert tool.name == "knowledge_base_search"
- assert tool.description == "Search configured knowledge bases for information relevant to the query."
+ assert "Pick one configured set_name" in tool.description
assert tool_def is not None
- assert (
- tool_def.description == "Search configured knowledge bases for information relevant to the query."
- )
+ assert "Pick one configured set_name" in tool_def.description
assert tool_def.parameters_json_schema == {
"type": "object",
"properties": {
+ "set_name": {
+ "type": "string",
+ "enum": ["Support KB"],
+ "description": "Knowledge set to search.",
+ },
"query": {
"type": "string",
- "description": "Search query for the configured knowledge bases.",
- }
+ "description": "Search query for the selected knowledge set.",
+ },
},
- "required": ["query"],
+ "required": ["set_name", "query"],
"additionalProperties": False,
}
@@ -119,12 +139,105 @@ def test_knowledge_layer_rejects_blank_query_locally() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
- result = await tool.function_schema.call({"query": " "}, None) # pyright: ignore[reportArgumentType]
+ result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": " "}, None
+ )
assert result == BLANK_QUERY_OBSERVATION
asyncio.run(scenario())
+def test_knowledge_layer_exposes_no_tool_when_all_sets_are_user_query(monkeypatch: pytest.MonkeyPatch) -> None:
+ async def fake_retrieve(self: DifyKnowledgeBaseClient, **_kwargs: object) -> DifyKnowledgeRetrieveResponse:
+ del self
+ return DifyKnowledgeRetrieveResponse.model_validate({"results": [], "usage": {}})
+
+ monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
+
+ async def scenario() -> None:
+ compositor = Compositor(
+ [
+ LayerNode("execution_context", _execution_context_provider()),
+ LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
+ ]
+ )
+ async with httpx.AsyncClient() as http_client:
+ async with compositor.enter(
+ configs={
+ "execution_context": _execution_context_config(),
+ "knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
+ }
+ ) as run:
+ knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
+ assert await knowledge_layer.get_tools(http_client=http_client) == []
+
+ asyncio.run(scenario())
+
+
+def test_knowledge_layer_fetches_user_query_sets_on_context_entry(monkeypatch: pytest.MonkeyPatch) -> None:
+ seen_requests: list[dict[str, object]] = []
+
+ async def fake_retrieve(self: DifyKnowledgeBaseClient, **kwargs: object) -> DifyKnowledgeRetrieveResponse:
+ del self
+ seen_requests.append(kwargs)
+ return DifyKnowledgeRetrieveResponse.model_validate(
+ {
+ "results": [
+ {
+ "metadata": {
+ "_source": "knowledge",
+ "dataset_name": "Docs",
+ "document_name": "Release.md",
+ "score": 0.8,
+ },
+ "title": "Release",
+ "files": [],
+ "content": "Version notes",
+ "summary": None,
+ }
+ ],
+ "usage": {},
+ }
+ )
+
+ monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
+
+ async def scenario() -> None:
+ compositor = Compositor(
+ [
+ LayerNode("execution_context", _execution_context_provider()),
+ LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
+ ]
+ )
+ async with compositor.enter(
+ configs={
+ "execution_context": _execution_context_config(),
+ "knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
+ }
+ ) as run:
+ knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
+ assert len(seen_requests) == 1
+ assert seen_requests[0]["query"] == "release notes"
+ assert seen_requests[0]["dataset_ids"] == ["dataset-1"]
+ assert knowledge_layer.runtime_state.eager_config_fingerprint
+ assert knowledge_layer.runtime_state.eager_results[0].status == "success"
+ assert knowledge_layer.user_prompts == [
+ "Knowledge retrieval results:\n\n"
+ "Set: Support KB\n"
+ "Query: release notes\n"
+ "Results:\n"
+ "1. Title: Release\n"
+ " Dataset: Docs\n"
+ " Document: Release.md\n"
+ " Score: 0.8\n"
+ " Content: Version notes"
+ ]
+ await knowledge_layer.on_context_resume()
+ assert len(seen_requests) == 1
+
+ asyncio.run(scenario())
+
+
@pytest.mark.parametrize(
("field_name", "field_value"),
[
@@ -199,7 +312,9 @@ def test_knowledge_layer_formats_results_and_truncates_observation() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
- result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert result.startswith("Knowledge base search results:\n1. Title: Guide")
assert "Dataset: Docs" in result
assert "Document: Guide.md" in result
@@ -229,7 +344,9 @@ def test_knowledge_layer_returns_no_results_observation() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
- result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert result == NO_RESULTS_OBSERVATION
asyncio.run(scenario())
@@ -256,7 +373,9 @@ def test_knowledge_layer_converts_retryable_failures_into_observation() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
- result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert result == TEMPORARY_UNAVAILABLE_OBSERVATION
asyncio.run(scenario())
@@ -289,7 +408,9 @@ def test_knowledge_layer_converts_retryable_transport_failures_into_observation(
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
- result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert result == TEMPORARY_UNAVAILABLE_OBSERVATION
asyncio.run(scenario())
@@ -317,7 +438,9 @@ def test_knowledge_layer_raises_non_retryable_client_errors() -> None:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
- await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert exc_info.value.status_code == 403
asyncio.run(scenario())
@@ -343,7 +466,9 @@ def test_knowledge_layer_raises_for_malformed_success_responses() -> None:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
- await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert exc_info.value.error_code == "invalid_response"
assert exc_info.value.retryable is False
@@ -411,7 +536,9 @@ def test_knowledge_layer_sends_execution_context_and_static_config_to_inner_api(
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
- result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
+ result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
+ {"set_name": "Support KB", "query": "reset"}, None
+ )
assert result == NO_RESULTS_OBSERVATION
asyncio.run(scenario())
diff --git a/dify-agent/tests/local/dify_agent/runtime/test_runner.py b/dify-agent/tests/local/dify_agent/runtime/test_runner.py
index f5ddeb72367..4a64fe9090d 100644
--- a/dify-agent/tests/local/dify_agent/runtime/test_runner.py
+++ b/dify-agent/tests/local/dify_agent/runtime/test_runner.py
@@ -995,7 +995,7 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
return TestModel(custom_output_text="done") # pyright: ignore[reportReturnType]
async def fake_get_tools(self: DifyKnowledgeBaseLayer, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
- assert self.config.dataset_ids == ["dataset-1"]
+ assert self.config.sets[0].dataset_ids == ["dataset-1"]
assert http_client.headers.get("X-Test-Client") == "dify-api"
return [Tool(knowledge_tool, name="knowledge_base_search")]
@@ -1055,8 +1055,15 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
deps={"execution_context": "execution_context"},
config=DifyKnowledgeBaseLayerConfig.model_validate(
{
- "dataset_ids": ["dataset-1"],
- "retrieval": {"mode": "multiple", "top_k": 4},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 4},
+ }
+ ],
}
),
),
diff --git a/dify-agent/tests/local/dify_agent/server/test_app.py b/dify-agent/tests/local/dify_agent/server/test_app.py
index 8e40bd683b9..ea0bc3b2977 100644
--- a/dify-agent/tests/local/dify_agent/server/test_app.py
+++ b/dify-agent/tests/local/dify_agent/server/test_app.py
@@ -231,8 +231,15 @@ def test_create_app_creates_scheduler_and_closes_after_shutdown(monkeypatch: pyt
knowledge_layer = knowledge_provider.create_layer(
DifyKnowledgeBaseLayerConfig.model_validate(
{
- "dataset_ids": ["dataset-1"],
- "retrieval": {"mode": "multiple", "top_k": 2},
+ "sets": [
+ {
+ "id": "support",
+ "name": "Support KB",
+ "datasets": [{"id": "dataset-1"}],
+ "query": {"mode": "generated_query"},
+ "retrieval": {"mode": "multiple", "top_k": 2},
+ }
+ ],
}
)
)
diff --git a/dify-agent/tests/local/dify_agent/test_import_boundaries.py b/dify-agent/tests/local/dify_agent/test_import_boundaries.py
index 104f12031f0..c24941fae7f 100644
--- a/dify-agent/tests/local/dify_agent/test_import_boundaries.py
+++ b/dify-agent/tests/local/dify_agent/test_import_boundaries.py
@@ -115,7 +115,7 @@ def test_protocol_and_dify_plugin_exports_do_not_import_server_only_modules() ->
"assert dify_agent_layers_execution_context.__all__ == ['DIFY_EXECUTION_CONTEXT_LAYER_TYPE_ID', 'DifyExecutionContextAgentMode', 'DifyExecutionContextInvokeFrom', 'DifyExecutionContextLayerConfig', 'DifyExecutionContextUserFrom']",
"assert dify_agent_layers_ask_human.__all__ == ['AskHumanAction', 'AskHumanActionStyle', 'AskHumanField', 'AskHumanFieldType', 'AskHumanFileField', 'AskHumanFileListField', 'AskHumanParagraphField', 'AskHumanResultStatus', 'AskHumanSelectField', 'AskHumanSelectOption', 'AskHumanSelectedAction', 'AskHumanToolArgs', 'AskHumanToolResult', 'AskHumanUrgency', 'DEFAULT_ASK_HUMAN_TOOL_DESCRIPTION', 'DIFY_ASK_HUMAN_LAYER_TYPE_ID', 'DifyAskHumanLayerConfig']",
"assert dify_agent_layers_dify_plugin.__all__ == ['DIFY_PLUGIN_LLM_LAYER_TYPE_ID', 'DIFY_PLUGIN_TOOLS_LAYER_TYPE_ID', 'DifyPluginCredentialValue', 'DifyPluginLLMLayerConfig', 'DifyPluginToolCredentialType', 'DifyPluginToolConfig', 'DifyPluginToolOption', 'DifyPluginToolParameter', 'DifyPluginToolParameterForm', 'DifyPluginToolParameterType', 'DifyPluginToolsLayerConfig', 'DifyPluginToolValue']",
- "assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig']",
+ "assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeDatasetConfig', 'DifyKnowledgeEagerResult', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeQueryConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig', 'DifyKnowledgeRuntimeState', 'DifyKnowledgeSetConfig']",
"assert dify_agent_layers_output.__all__ == ['DIFY_OUTPUT_LAYER_TYPE_ID', 'DifyOutputLayerConfig']",
"assert dify_agent_layers_shell.__all__ == ['DIFY_SHELL_LAYER_TYPE_ID', 'DifyShellCliToolConfig', 'DifyShellEnvVarConfig', 'DifyShellLayerConfig', 'DifyShellSandboxConfig', 'DifyShellSecretRefConfig']",
],
diff --git a/packages/contracts/generated/api/console/agent/types.gen.ts b/packages/contracts/generated/api/console/agent/types.gen.ts
index aa21f2ce651..4d4c83e1a71 100644
--- a/packages/contracts/generated/api/console/agent/types.gen.ts
+++ b/packages/contracts/generated/api/console/agent/types.gen.ts
@@ -563,7 +563,7 @@ export type AgentComposerSoulCandidatesResponse = {
cli_tools?: Array
dify_tools?: Array
human_contacts?: Array
- knowledge_datasets?: Array
+ knowledge_sets?: Array
}
export type ComposerCandidateCapabilities = {
@@ -926,9 +926,7 @@ export type AgentSoulHumanConfig = {
}
export type AgentSoulKnowledgeConfig = {
- datasets?: Array
- query_config?: AgentKnowledgeQueryConfig
- query_mode?: AgentKnowledgeQueryMode | null
+ sets?: Array
}
export type AgentSoulMemoryConfig = {
@@ -1069,11 +1067,12 @@ export type AgentComposerDifyToolCandidateResponse = {
tools_count?: number | null
}
-export type AgentKnowledgeDatasetConfig = {
+export type AgentComposerKnowledgeSetCandidateResponse = {
+ datasets?: Array
description?: string | null
- id?: string | null
- name?: string | null
- [key: string]: unknown
+ id: string
+ missing_dataset_ids?: Array
+ name: string
}
export type AgentModerationProviderConfig = {
@@ -1228,16 +1227,16 @@ export type AgentHumanToolConfig = {
[key: string]: unknown
}
-export type AgentKnowledgeQueryConfig = {
- query?: string | null
- score_threshold?: number | null
- score_threshold_enabled?: boolean | null
- top_k?: number | null
- [key: string]: unknown
+export type AgentKnowledgeSetConfig = {
+ datasets: Array
+ description?: string | null
+ id: string
+ metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
+ name: string
+ query: AgentKnowledgeQueryConfig
+ retrieval: AgentKnowledgeRetrievalConfig
}
-export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
-
export type AgentMemoryArtifactConfig = {
id?: string | null
name?: string | null
@@ -1376,6 +1375,13 @@ export type AgentPermissionConfig = {
export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'
+export type AgentComposerKnowledgeDatasetCandidateResponse = {
+ description?: string | null
+ id?: string | null
+ missing?: boolean
+ name?: string | null
+}
+
export type AgentModerationIoConfig = {
enabled?: boolean
preset_response?: string | null
@@ -1404,6 +1410,34 @@ export type FormInputConfig
export type JsonValue2 = unknown
+export type AgentKnowledgeDatasetConfig = {
+ description?: string | null
+ id?: string | null
+ name?: string | null
+}
+
+export type AgentKnowledgeMetadataFilteringConfig = {
+ conditions?: AgentKnowledgeMetadataConditions | null
+ mode?: 'automatic' | 'disabled' | 'manual'
+ model_config?: AgentKnowledgeModelConfig | null
+}
+
+export type AgentKnowledgeQueryConfig = {
+ mode: AgentKnowledgeQueryMode
+ value?: string | null
+}
+
+export type AgentKnowledgeRetrievalConfig = {
+ mode: 'multiple' | 'single'
+ model?: AgentKnowledgeModelConfig | null
+ reranking_enable?: boolean
+ reranking_mode?: string
+ reranking_model?: AgentKnowledgeRerankingModelConfig | null
+ score_threshold?: number | null
+ top_k?: number | null
+ weights?: AgentKnowledgeWeightedScoreConfig | null
+}
+
export type AgentModelResponseFormatConfig = {
type?: string | null
[key: string]: unknown
@@ -1454,6 +1488,38 @@ export type FileListInputConfig = {
type?: 'file-list'
}
+export type AgentKnowledgeMetadataConditions = {
+ conditions?: Array
+ logical_operator?: 'and' | 'or'
+}
+
+export type AgentKnowledgeModelConfig = {
+ completion_params?: {
+ [key: string]: unknown
+ }
+ mode: string
+ name: string
+ provider: string
+}
+
+export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
+
+export type AgentKnowledgeRerankingModelConfig = {
+ model: string
+ provider: string
+}
+
+export type AgentKnowledgeWeightedScoreConfig = {
+ keyword_setting?: {
+ [key: string]: unknown
+ } | null
+ vector_setting?: {
+ [key: string]: unknown
+ } | null
+ weight_type?: string | null
+ [key: string]: unknown
+}
+
export type StringSource = {
selector?: Array
type: ValueSourceType
@@ -1470,6 +1536,30 @@ export type FileType = 'audio' | 'custom' | 'document' | 'image' | 'video'
export type FileTransferMethod = 'datasource_file' | 'local_file' | 'remote_url' | 'tool_file'
+export type AgentKnowledgeMetadataCondition = {
+ comparison_operator:
+ | '<'
+ | '='
+ | '>'
+ | 'after'
+ | 'before'
+ | 'contains'
+ | 'empty'
+ | 'end with'
+ | 'in'
+ | 'is'
+ | 'is not'
+ | 'not contains'
+ | 'not empty'
+ | 'not in'
+ | 'start with'
+ | '≠'
+ | '≤'
+ | '≥'
+ name: string
+ value?: string | Array | number | null
+}
+
export type ValueSourceType = 'constant' | 'variable'
export type AgentAppPaginationWritable = {
diff --git a/packages/contracts/generated/api/console/agent/zod.gen.ts b/packages/contracts/generated/api/console/agent/zod.gen.ts
index cb4107f2d53..43e4f61fc4e 100644
--- a/packages/contracts/generated/api/console/agent/zod.gen.ts
+++ b/packages/contracts/generated/api/console/agent/zod.gen.ts
@@ -1022,15 +1022,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
tools_count: z.int().nullish(),
})
-/**
- * AgentKnowledgeDatasetConfig
- */
-export const zAgentKnowledgeDatasetConfig = z.object({
- description: z.string().nullish(),
- id: z.string().max(255).nullish(),
- name: z.string().max(255).nullish(),
-})
-
/**
* SimpleAccount
*/
@@ -1279,30 +1270,6 @@ export const zAgentSoulHumanConfig = z.object({
tools: z.array(zAgentHumanToolConfig).optional(),
})
-/**
- * AgentKnowledgeQueryConfig
- */
-export const zAgentKnowledgeQueryConfig = z.object({
- query: z.string().nullish(),
- score_threshold: z.number().gte(0).lte(1).nullish(),
- score_threshold_enabled: z.boolean().nullish(),
- top_k: z.int().gte(1).nullish(),
-})
-
-/**
- * AgentKnowledgeQueryMode
- */
-export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
-
-/**
- * AgentSoulKnowledgeConfig
- */
-export const zAgentSoulKnowledgeConfig = z.object({
- datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
- query_config: zAgentKnowledgeQueryConfig.optional(),
- query_mode: zAgentKnowledgeQueryMode.nullish(),
-})
-
/**
* AgentMemoryArtifactConfig
*/
@@ -1521,6 +1488,27 @@ export const zAgentCliToolConfig = z.object({
tool_name: z.string().max(255).nullish(),
})
+/**
+ * AgentComposerKnowledgeDatasetCandidateResponse
+ */
+export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
+ description: z.string().nullish(),
+ id: z.string().max(255).nullish(),
+ missing: z.boolean().optional().default(false),
+ name: z.string().max(255).nullish(),
+})
+
+/**
+ * AgentComposerKnowledgeSetCandidateResponse
+ */
+export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
+ datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
+ description: z.string().nullish(),
+ id: z.string(),
+ missing_dataset_ids: z.array(z.string()).optional(),
+ name: z.string(),
+})
+
/**
* AgentComposerSoulCandidatesResponse
*/
@@ -1528,7 +1516,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
cli_tools: z.array(zAgentCliToolConfig).optional(),
dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
human_contacts: z.array(zAgentHumanContactConfig).optional(),
- knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
+ knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
})
/**
@@ -1583,6 +1571,15 @@ export const zHumanInputFormSubmissionData = z.object({
submitted_data: z.record(z.string(), zJsonValue2).nullish(),
})
+/**
+ * AgentKnowledgeDatasetConfig
+ */
+export const zAgentKnowledgeDatasetConfig = z.object({
+ description: z.string().nullish(),
+ id: z.string().max(255).nullish(),
+ name: z.string().max(255).nullish(),
+})
+
/**
* AgentModelResponseFormatConfig
*/
@@ -1733,53 +1730,6 @@ export const zAgentSoulToolsConfig = z.object({
dify_tools: z.array(zAgentSoulDifyToolConfig).optional(),
})
-/**
- * AgentSoulConfig
- */
-export const zAgentSoulConfig = z.object({
- app_features: zAgentSoulAppFeaturesConfig.optional(),
- app_variables: z.array(zAppVariableConfig).optional(),
- env: zAgentSoulEnvConfig.optional(),
- human: zAgentSoulHumanConfig.optional(),
- knowledge: zAgentSoulKnowledgeConfig.optional(),
- memory: zAgentSoulMemoryConfig.optional(),
- misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
- model: zAgentSoulModelConfig.nullish(),
- prompt: zAgentSoulPromptConfig.optional(),
- sandbox: zAgentSoulSandboxConfig.optional(),
- schema_version: z.int().optional().default(1),
- tools: zAgentSoulToolsConfig.optional(),
-})
-
-/**
- * AgentAppComposerResponse
- */
-export const zAgentAppComposerResponse = z.object({
- active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
- agent: zAgentComposerAgentResponse,
- agent_soul: zAgentSoulConfig,
- save_options: z.array(zComposerSaveStrategy),
- validation: zComposerValidationFindingsResponse.nullish(),
- variant: z.literal('agent_app'),
-})
-
-/**
- * AgentConfigSnapshotDetailResponse
- */
-export const zAgentConfigSnapshotDetailResponse = z.object({
- agent_id: z.string().nullish(),
- config_snapshot: zAgentSoulConfig,
- created_at: z.int().nullish(),
- created_by: z.string().nullish(),
- display_version: z.int().nullish(),
- id: z.string(),
- revisions: z.array(zAgentConfigRevisionResponse).optional(),
- snapshot_version: z.int().nullish(),
- summary: z.string().nullish(),
- version: z.int(),
- version_note: z.string().nullish(),
-})
-
/**
* OutputErrorStrategy
*
@@ -1869,22 +1819,6 @@ export const zWorkflowNodeJobConfig = z.object({
workflow_prompt: z.string().optional().default(''),
})
-/**
- * ComposerSavePayload
- */
-export const zComposerSavePayload = z.object({
- agent_soul: zAgentSoulConfig.nullish(),
- binding: zComposerBindingPayload.nullish(),
- client_revision_id: z.string().nullish(),
- idempotency_key: z.string().nullish(),
- new_agent_name: z.string().min(1).max(255).nullish(),
- node_job: zWorkflowNodeJobConfig.nullish(),
- save_strategy: zComposerSaveStrategy,
- soul_lock: zComposerSoulLockPayload.optional(),
- variant: zComposerVariant,
- version_note: z.string().nullish(),
-})
-
/**
* ButtonStyle
*
@@ -1903,6 +1837,60 @@ export const zUserActionConfig = z.object({
title: z.string().max(100),
})
+/**
+ * AgentKnowledgeModelConfig
+ */
+export const zAgentKnowledgeModelConfig = z.object({
+ completion_params: z.record(z.string(), z.unknown()).optional(),
+ mode: z.string().min(1).max(64),
+ name: z.string().min(1).max(255),
+ provider: z.string().min(1).max(255),
+})
+
+/**
+ * AgentKnowledgeQueryMode
+ */
+export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
+
+/**
+ * AgentKnowledgeQueryConfig
+ */
+export const zAgentKnowledgeQueryConfig = z.object({
+ mode: zAgentKnowledgeQueryMode,
+ value: z.string().nullish(),
+})
+
+/**
+ * AgentKnowledgeRerankingModelConfig
+ */
+export const zAgentKnowledgeRerankingModelConfig = z.object({
+ model: z.string().min(1).max(255),
+ provider: z.string().min(1).max(255),
+})
+
+/**
+ * AgentKnowledgeWeightedScoreConfig
+ */
+export const zAgentKnowledgeWeightedScoreConfig = z.object({
+ keyword_setting: z.record(z.string(), z.unknown()).nullish(),
+ vector_setting: z.record(z.string(), z.unknown()).nullish(),
+ weight_type: z.string().max(64).nullish(),
+})
+
+/**
+ * AgentKnowledgeRetrievalConfig
+ */
+export const zAgentKnowledgeRetrievalConfig = z.object({
+ mode: z.enum(['multiple', 'single']),
+ model: zAgentKnowledgeModelConfig.nullish(),
+ reranking_enable: z.boolean().optional().default(true),
+ reranking_mode: z.string().optional().default('reranking_model'),
+ reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
+ score_threshold: z.number().gte(0).lte(1).nullish(),
+ top_k: z.int().gte(1).nullish(),
+ weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
+})
+
/**
* FileType
*/
@@ -1941,6 +1929,134 @@ export const zFileListInputConfig = z.object({
type: z.literal('file-list').optional().default('file-list'),
})
+/**
+ * AgentKnowledgeMetadataCondition
+ */
+export const zAgentKnowledgeMetadataCondition = z.object({
+ comparison_operator: z.enum([
+ '<',
+ '=',
+ '>',
+ 'after',
+ 'before',
+ 'contains',
+ 'empty',
+ 'end with',
+ 'in',
+ 'is',
+ 'is not',
+ 'not contains',
+ 'not empty',
+ 'not in',
+ 'start with',
+ '≠',
+ '≤',
+ '≥',
+ ]),
+ name: z.string().min(1).max(255),
+ value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
+})
+
+/**
+ * AgentKnowledgeMetadataConditions
+ */
+export const zAgentKnowledgeMetadataConditions = z.object({
+ conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
+ logical_operator: z.enum(['and', 'or']).optional().default('and'),
+})
+
+/**
+ * AgentKnowledgeMetadataFilteringConfig
+ */
+export const zAgentKnowledgeMetadataFilteringConfig = z.object({
+ conditions: zAgentKnowledgeMetadataConditions.nullish(),
+ mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
+ model_config: zAgentKnowledgeModelConfig.nullish(),
+})
+
+/**
+ * AgentKnowledgeSetConfig
+ */
+export const zAgentKnowledgeSetConfig = z.object({
+ datasets: z.array(zAgentKnowledgeDatasetConfig),
+ description: z.string().nullish(),
+ id: z.string().min(1).max(255),
+ metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
+ name: z.string().min(1).max(255),
+ query: zAgentKnowledgeQueryConfig,
+ retrieval: zAgentKnowledgeRetrievalConfig,
+})
+
+/**
+ * AgentSoulKnowledgeConfig
+ */
+export const zAgentSoulKnowledgeConfig = z.object({
+ sets: z.array(zAgentKnowledgeSetConfig).optional(),
+})
+
+/**
+ * AgentSoulConfig
+ */
+export const zAgentSoulConfig = z.object({
+ app_features: zAgentSoulAppFeaturesConfig.optional(),
+ app_variables: z.array(zAppVariableConfig).optional(),
+ env: zAgentSoulEnvConfig.optional(),
+ human: zAgentSoulHumanConfig.optional(),
+ knowledge: zAgentSoulKnowledgeConfig.optional(),
+ memory: zAgentSoulMemoryConfig.optional(),
+ misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
+ model: zAgentSoulModelConfig.nullish(),
+ prompt: zAgentSoulPromptConfig.optional(),
+ sandbox: zAgentSoulSandboxConfig.optional(),
+ schema_version: z.int().optional().default(1),
+ tools: zAgentSoulToolsConfig.optional(),
+})
+
+/**
+ * AgentAppComposerResponse
+ */
+export const zAgentAppComposerResponse = z.object({
+ active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
+ agent: zAgentComposerAgentResponse,
+ agent_soul: zAgentSoulConfig,
+ save_options: z.array(zComposerSaveStrategy),
+ validation: zComposerValidationFindingsResponse.nullish(),
+ variant: z.literal('agent_app'),
+})
+
+/**
+ * ComposerSavePayload
+ */
+export const zComposerSavePayload = z.object({
+ agent_soul: zAgentSoulConfig.nullish(),
+ binding: zComposerBindingPayload.nullish(),
+ client_revision_id: z.string().nullish(),
+ idempotency_key: z.string().nullish(),
+ new_agent_name: z.string().min(1).max(255).nullish(),
+ node_job: zWorkflowNodeJobConfig.nullish(),
+ save_strategy: zComposerSaveStrategy,
+ soul_lock: zComposerSoulLockPayload.optional(),
+ variant: zComposerVariant,
+ version_note: z.string().nullish(),
+})
+
+/**
+ * AgentConfigSnapshotDetailResponse
+ */
+export const zAgentConfigSnapshotDetailResponse = z.object({
+ agent_id: z.string().nullish(),
+ config_snapshot: zAgentSoulConfig,
+ created_at: z.int().nullish(),
+ created_by: z.string().nullish(),
+ display_version: z.int().nullish(),
+ id: z.string(),
+ revisions: z.array(zAgentConfigRevisionResponse).optional(),
+ snapshot_version: z.int().nullish(),
+ summary: z.string().nullish(),
+ version: z.int(),
+ version_note: z.string().nullish(),
+})
+
/**
* ValueSourceType
*
diff --git a/packages/contracts/generated/api/console/apps/types.gen.ts b/packages/contracts/generated/api/console/apps/types.gen.ts
index fa56590f0a4..9b31296275b 100644
--- a/packages/contracts/generated/api/console/apps/types.gen.ts
+++ b/packages/contracts/generated/api/console/apps/types.gen.ts
@@ -1890,7 +1890,7 @@ export type AgentComposerSoulCandidatesResponse = {
cli_tools?: Array
dify_tools?: Array
human_contacts?: Array
- knowledge_datasets?: Array
+ knowledge_sets?: Array
}
export type ComposerCandidateCapabilities = {
@@ -2124,9 +2124,7 @@ export type AgentSoulHumanConfig = {
}
export type AgentSoulKnowledgeConfig = {
- datasets?: Array
- query_config?: AgentKnowledgeQueryConfig
- query_mode?: AgentKnowledgeQueryMode | null
+ sets?: Array
}
export type AgentSoulMemoryConfig = {
@@ -2278,11 +2276,12 @@ export type AgentComposerDifyToolCandidateResponse = {
tools_count?: number | null
}
-export type AgentKnowledgeDatasetConfig = {
+export type AgentComposerKnowledgeSetCandidateResponse = {
+ datasets?: Array
description?: string | null
- id?: string | null
- name?: string | null
- [key: string]: unknown
+ id: string
+ missing_dataset_ids?: Array
+ name: string
}
export type CheckResultView = {
@@ -2393,16 +2392,16 @@ export type AgentHumanToolConfig = {
[key: string]: unknown
}
-export type AgentKnowledgeQueryConfig = {
- query?: string | null
- score_threshold?: number | null
- score_threshold_enabled?: boolean | null
- top_k?: number | null
- [key: string]: unknown
+export type AgentKnowledgeSetConfig = {
+ datasets: Array
+ description?: string | null
+ id: string
+ metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
+ name: string
+ query: AgentKnowledgeQueryConfig
+ retrieval: AgentKnowledgeRetrievalConfig
}
-export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
-
export type AgentMemoryArtifactConfig = {
id?: string | null
name?: string | null
@@ -2506,6 +2505,13 @@ export type AgentPermissionConfig = {
export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'
+export type AgentComposerKnowledgeDatasetCandidateResponse = {
+ description?: string | null
+ id?: string | null
+ missing?: boolean
+ name?: string | null
+}
+
export type ButtonStyle = 'accent' | 'default' | 'ghost' | 'primary'
export type ParagraphInputConfig = {
@@ -2545,6 +2551,34 @@ export type AgentModerationProviderConfig = {
[key: string]: unknown
}
+export type AgentKnowledgeDatasetConfig = {
+ description?: string | null
+ id?: string | null
+ name?: string | null
+}
+
+export type AgentKnowledgeMetadataFilteringConfig = {
+ conditions?: AgentKnowledgeMetadataConditions | null
+ mode?: 'automatic' | 'disabled' | 'manual'
+ model_config?: AgentKnowledgeModelConfig | null
+}
+
+export type AgentKnowledgeQueryConfig = {
+ mode: AgentKnowledgeQueryMode
+ value?: string | null
+}
+
+export type AgentKnowledgeRetrievalConfig = {
+ mode: 'multiple' | 'single'
+ model?: AgentKnowledgeModelConfig | null
+ reranking_enable?: boolean
+ reranking_mode?: string
+ reranking_model?: AgentKnowledgeRerankingModelConfig | null
+ score_threshold?: number | null
+ top_k?: number | null
+ weights?: AgentKnowledgeWeightedScoreConfig | null
+}
+
export type AgentModelResponseFormatConfig = {
type?: string | null
[key: string]: unknown
@@ -2578,8 +2612,64 @@ export type AgentModerationIoConfig = {
[key: string]: unknown
}
+export type AgentKnowledgeMetadataConditions = {
+ conditions?: Array
+ logical_operator?: 'and' | 'or'
+}
+
+export type AgentKnowledgeModelConfig = {
+ completion_params?: {
+ [key: string]: unknown
+ }
+ mode: string
+ name: string
+ provider: string
+}
+
+export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
+
+export type AgentKnowledgeRerankingModelConfig = {
+ model: string
+ provider: string
+}
+
+export type AgentKnowledgeWeightedScoreConfig = {
+ keyword_setting?: {
+ [key: string]: unknown
+ } | null
+ vector_setting?: {
+ [key: string]: unknown
+ } | null
+ weight_type?: string | null
+ [key: string]: unknown
+}
+
export type ValueSourceType = 'constant' | 'variable'
+export type AgentKnowledgeMetadataCondition = {
+ comparison_operator:
+ | '<'
+ | '='
+ | '>'
+ | 'after'
+ | 'before'
+ | 'contains'
+ | 'empty'
+ | 'end with'
+ | 'in'
+ | 'is'
+ | 'is not'
+ | 'not contains'
+ | 'not empty'
+ | 'not in'
+ | 'start with'
+ | '≠'
+ | '≤'
+ | '≥'
+ name: string
+ value?: string | Array | number | null
+}
+
export type AppPaginationWritable = {
data: Array
has_more: boolean
diff --git a/packages/contracts/generated/api/console/apps/zod.gen.ts b/packages/contracts/generated/api/console/apps/zod.gen.ts
index 043fc11261f..b3c0f05bf6b 100644
--- a/packages/contracts/generated/api/console/apps/zod.gen.ts
+++ b/packages/contracts/generated/api/console/apps/zod.gen.ts
@@ -2629,15 +2629,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
tools_count: z.int().nullish(),
})
-/**
- * AgentKnowledgeDatasetConfig
- */
-export const zAgentKnowledgeDatasetConfig = z.object({
- description: z.string().nullish(),
- id: z.string().max(255).nullish(),
- name: z.string().max(255).nullish(),
-})
-
/**
* CheckResultView
*
@@ -2767,30 +2758,6 @@ export const zAgentSoulHumanConfig = z.object({
tools: z.array(zAgentHumanToolConfig).optional(),
})
-/**
- * AgentKnowledgeQueryConfig
- */
-export const zAgentKnowledgeQueryConfig = z.object({
- query: z.string().nullish(),
- score_threshold: z.number().gte(0).lte(1).nullish(),
- score_threshold_enabled: z.boolean().nullish(),
- top_k: z.int().gte(1).nullish(),
-})
-
-/**
- * AgentKnowledgeQueryMode
- */
-export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
-
-/**
- * AgentSoulKnowledgeConfig
- */
-export const zAgentSoulKnowledgeConfig = z.object({
- datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
- query_config: zAgentKnowledgeQueryConfig.optional(),
- query_mode: zAgentKnowledgeQueryMode.nullish(),
-})
-
/**
* AgentMemoryArtifactConfig
*/
@@ -3002,6 +2969,27 @@ export const zAgentCliToolConfig = z.object({
tool_name: z.string().max(255).nullish(),
})
+/**
+ * AgentComposerKnowledgeDatasetCandidateResponse
+ */
+export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
+ description: z.string().nullish(),
+ id: z.string().max(255).nullish(),
+ missing: z.boolean().optional().default(false),
+ name: z.string().max(255).nullish(),
+})
+
+/**
+ * AgentComposerKnowledgeSetCandidateResponse
+ */
+export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
+ datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
+ description: z.string().nullish(),
+ id: z.string(),
+ missing_dataset_ids: z.array(z.string()).optional(),
+ name: z.string(),
+})
+
/**
* AgentComposerSoulCandidatesResponse
*/
@@ -3009,7 +2997,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
cli_tools: z.array(zAgentCliToolConfig).optional(),
dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
human_contacts: z.array(zAgentHumanContactConfig).optional(),
- knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
+ knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
})
/**
@@ -3041,6 +3029,15 @@ export const zUserActionConfig = z.object({
title: z.string().max(100),
})
+/**
+ * AgentKnowledgeDatasetConfig
+ */
+export const zAgentKnowledgeDatasetConfig = z.object({
+ description: z.string().nullish(),
+ id: z.string().max(255).nullish(),
+ name: z.string().max(255).nullish(),
+})
+
/**
* AgentModelResponseFormatConfig
*/
@@ -3292,57 +3289,57 @@ export const zAgentSoulAppFeaturesConfig = z.object({
})
/**
- * AgentSoulConfig
+ * AgentKnowledgeModelConfig
*/
-export const zAgentSoulConfig = z.object({
- app_features: zAgentSoulAppFeaturesConfig.optional(),
- app_variables: z.array(zAppVariableConfig).optional(),
- env: zAgentSoulEnvConfig.optional(),
- human: zAgentSoulHumanConfig.optional(),
- knowledge: zAgentSoulKnowledgeConfig.optional(),
- memory: zAgentSoulMemoryConfig.optional(),
- misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
- model: zAgentSoulModelConfig.nullish(),
- prompt: zAgentSoulPromptConfig.optional(),
- sandbox: zAgentSoulSandboxConfig.optional(),
- schema_version: z.int().optional().default(1),
- tools: zAgentSoulToolsConfig.optional(),
+export const zAgentKnowledgeModelConfig = z.object({
+ completion_params: z.record(z.string(), z.unknown()).optional(),
+ mode: z.string().min(1).max(64),
+ name: z.string().min(1).max(255),
+ provider: z.string().min(1).max(255),
})
/**
- * WorkflowAgentComposerResponse
+ * AgentKnowledgeQueryMode
*/
-export const zWorkflowAgentComposerResponse = z.object({
- active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
- agent: zAgentComposerAgentResponse.nullish(),
- agent_soul: zAgentSoulConfig,
- app_id: z.string().nullish(),
- binding: zAgentComposerBindingResponse.nullish(),
- effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
- impact_summary: zAgentComposerImpactResponse.nullish(),
- node_id: z.string().nullish(),
- node_job: zWorkflowNodeJobConfig,
- save_options: z.array(zComposerSaveStrategy),
- soul_lock: zAgentComposerSoulLockResponse,
- validation: zComposerValidationFindingsResponse.nullish(),
- variant: z.literal('workflow'),
- workflow_id: z.string().nullish(),
+export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
+
+/**
+ * AgentKnowledgeQueryConfig
+ */
+export const zAgentKnowledgeQueryConfig = z.object({
+ mode: zAgentKnowledgeQueryMode,
+ value: z.string().nullish(),
})
/**
- * ComposerSavePayload
+ * AgentKnowledgeRerankingModelConfig
*/
-export const zComposerSavePayload = z.object({
- agent_soul: zAgentSoulConfig.nullish(),
- binding: zComposerBindingPayload.nullish(),
- client_revision_id: z.string().nullish(),
- idempotency_key: z.string().nullish(),
- new_agent_name: z.string().min(1).max(255).nullish(),
- node_job: zWorkflowNodeJobConfig.nullish(),
- save_strategy: zComposerSaveStrategy,
- soul_lock: zComposerSoulLockPayload.optional(),
- variant: zComposerVariant,
- version_note: z.string().nullish(),
+export const zAgentKnowledgeRerankingModelConfig = z.object({
+ model: z.string().min(1).max(255),
+ provider: z.string().min(1).max(255),
+})
+
+/**
+ * AgentKnowledgeWeightedScoreConfig
+ */
+export const zAgentKnowledgeWeightedScoreConfig = z.object({
+ keyword_setting: z.record(z.string(), z.unknown()).nullish(),
+ vector_setting: z.record(z.string(), z.unknown()).nullish(),
+ weight_type: z.string().max(64).nullish(),
+})
+
+/**
+ * AgentKnowledgeRetrievalConfig
+ */
+export const zAgentKnowledgeRetrievalConfig = z.object({
+ mode: z.enum(['multiple', 'single']),
+ model: zAgentKnowledgeModelConfig.nullish(),
+ reranking_enable: z.boolean().optional().default(true),
+ reranking_mode: z.string().optional().default('reranking_model'),
+ reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
+ score_threshold: z.number().gte(0).lte(1).nullish(),
+ top_k: z.int().gte(1).nullish(),
+ weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
})
/**
@@ -3466,6 +3463,125 @@ export const zMessageInfiniteScrollPaginationResponse = z.object({
limit: z.int(),
})
+/**
+ * AgentKnowledgeMetadataCondition
+ */
+export const zAgentKnowledgeMetadataCondition = z.object({
+ comparison_operator: z.enum([
+ '<',
+ '=',
+ '>',
+ 'after',
+ 'before',
+ 'contains',
+ 'empty',
+ 'end with',
+ 'in',
+ 'is',
+ 'is not',
+ 'not contains',
+ 'not empty',
+ 'not in',
+ 'start with',
+ '≠',
+ '≤',
+ '≥',
+ ]),
+ name: z.string().min(1).max(255),
+ value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
+})
+
+/**
+ * AgentKnowledgeMetadataConditions
+ */
+export const zAgentKnowledgeMetadataConditions = z.object({
+ conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
+ logical_operator: z.enum(['and', 'or']).optional().default('and'),
+})
+
+/**
+ * AgentKnowledgeMetadataFilteringConfig
+ */
+export const zAgentKnowledgeMetadataFilteringConfig = z.object({
+ conditions: zAgentKnowledgeMetadataConditions.nullish(),
+ mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
+ model_config: zAgentKnowledgeModelConfig.nullish(),
+})
+
+/**
+ * AgentKnowledgeSetConfig
+ */
+export const zAgentKnowledgeSetConfig = z.object({
+ datasets: z.array(zAgentKnowledgeDatasetConfig),
+ description: z.string().nullish(),
+ id: z.string().min(1).max(255),
+ metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
+ name: z.string().min(1).max(255),
+ query: zAgentKnowledgeQueryConfig,
+ retrieval: zAgentKnowledgeRetrievalConfig,
+})
+
+/**
+ * AgentSoulKnowledgeConfig
+ */
+export const zAgentSoulKnowledgeConfig = z.object({
+ sets: z.array(zAgentKnowledgeSetConfig).optional(),
+})
+
+/**
+ * AgentSoulConfig
+ */
+export const zAgentSoulConfig = z.object({
+ app_features: zAgentSoulAppFeaturesConfig.optional(),
+ app_variables: z.array(zAppVariableConfig).optional(),
+ env: zAgentSoulEnvConfig.optional(),
+ human: zAgentSoulHumanConfig.optional(),
+ knowledge: zAgentSoulKnowledgeConfig.optional(),
+ memory: zAgentSoulMemoryConfig.optional(),
+ misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
+ model: zAgentSoulModelConfig.nullish(),
+ prompt: zAgentSoulPromptConfig.optional(),
+ sandbox: zAgentSoulSandboxConfig.optional(),
+ schema_version: z.int().optional().default(1),
+ tools: zAgentSoulToolsConfig.optional(),
+})
+
+/**
+ * WorkflowAgentComposerResponse
+ */
+export const zWorkflowAgentComposerResponse = z.object({
+ active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
+ agent: zAgentComposerAgentResponse.nullish(),
+ agent_soul: zAgentSoulConfig,
+ app_id: z.string().nullish(),
+ binding: zAgentComposerBindingResponse.nullish(),
+ effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
+ impact_summary: zAgentComposerImpactResponse.nullish(),
+ node_id: z.string().nullish(),
+ node_job: zWorkflowNodeJobConfig,
+ save_options: z.array(zComposerSaveStrategy),
+ soul_lock: zAgentComposerSoulLockResponse,
+ validation: zComposerValidationFindingsResponse.nullish(),
+ variant: z.literal('workflow'),
+ workflow_id: z.string().nullish(),
+})
+
+/**
+ * ComposerSavePayload
+ */
+export const zComposerSavePayload = z.object({
+ agent_soul: zAgentSoulConfig.nullish(),
+ binding: zComposerBindingPayload.nullish(),
+ client_revision_id: z.string().nullish(),
+ idempotency_key: z.string().nullish(),
+ new_agent_name: z.string().min(1).max(255).nullish(),
+ node_job: zWorkflowNodeJobConfig.nullish(),
+ save_strategy: zComposerSaveStrategy,
+ soul_lock: zComposerSoulLockPayload.optional(),
+ variant: zComposerVariant,
+ version_note: z.string().nullish(),
+})
+
/**
* GeneratedAppResponse
*/