feat: wire agent v2 knowledge sets backend

This commit is contained in:
Yanli 盐粒 2026-06-23 16:43:10 +08:00
parent c3cb134e73
commit c06d924094
35 changed files with 2424 additions and 530 deletions

View File

@ -312,7 +312,7 @@ class AgentBackendRunRequestBuilder:
)
)
if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
if run_input.knowledge is not None and run_input.knowledge.sets:
layers.append(
RunLayerSpec(
name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
@ -513,7 +513,7 @@ class AgentBackendRunRequestBuilder:
)
)
if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
if run_input.knowledge is not None and run_input.knowledge.sets:
layers.append(
RunLayerSpec(
name=DIFY_KNOWLEDGE_BASE_LAYER_ID,

View File

@ -105,6 +105,7 @@ class WorkflowAgentComposerValidateApi(Resource):
def post(self, tenant_id: str, app_model: App, node_id: str):
payload = ComposerSavePayload.model_validate(console_ns.payload or {})
ComposerConfigValidator.validate_save_payload(payload)
AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
findings = AgentComposerService.collect_validation_findings(
tenant_id=tenant_id,
payload=payload,
@ -239,6 +240,7 @@ class AgentComposerValidateApi(Resource):
_resolve_agent_app_id(tenant_id=tenant_id, agent_id=agent_id)
payload = ComposerSavePayload.model_validate(console_ns.payload or {})
ComposerConfigValidator.validate_save_payload(payload)
AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
findings = AgentComposerService.collect_validation_findings(
tenant_id=tenant_id,
payload=payload,

View File

@ -3,6 +3,7 @@ from __future__ import annotations
from typing import Any
from models.agent_config_entities import AgentSoulConfig
from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids
SUPPORTED_AGENT_BACKEND_FEATURES = frozenset(
{
@ -48,9 +49,7 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
)
reserved_status = dict.fromkeys(sorted(RESERVED_AGENT_BACKEND_FEATURES), "reserved_not_executed")
reserved_status["knowledge"] = (
"supported_by_knowledge_layer" if list_configured_knowledge_dataset_ids(agent_soul) else "not_configured"
)
reserved_status["knowledge"] = "supported_by_knowledge_layer" if agent_soul.knowledge.sets else "not_configured"
reserved_status["tools.dify_tools"] = "supported_when_config_valid"
reserved_status["tools.cli_tools"] = "supported_by_shell_bootstrap"
reserved_status["env"] = "supported_by_shell_bootstrap"
@ -66,14 +65,14 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
def list_configured_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
"""Return the normalized knowledge dataset ids that can produce a runtime layer.
"""Return normalized dataset ids selected by Agent v2 knowledge sets.
``build_runtime_feature_manifest()`` and ``build_knowledge_layer_config()``
must stay aligned: both decide knowledge support from this effective,
non-blank dataset-id set rather than from raw
``agent_soul.knowledge.datasets`` entries.
stay aligned on the set-based contract: DTO validation rejects blank dataset
ids before runtime, so this helper only flattens configured set datasets for
metadata/diagnostic surfaces that still need a dataset-id summary.
"""
return [dataset_id for dataset in agent_soul.knowledge.datasets if (dataset_id := (dataset.id or "").strip())]
return list_agent_soul_knowledge_dataset_ids(agent_soul)
def _get_nested(value: dict[str, Any], path: str) -> Any:

View File

@ -15,7 +15,16 @@ from dify_agent.layers.execution_context import (
DifyExecutionContextLayerConfig,
DifyExecutionContextUserFrom,
)
from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig, DifyKnowledgeRetrievalConfig
from dify_agent.layers.knowledge import (
DifyKnowledgeBaseLayerConfig,
DifyKnowledgeDatasetConfig,
DifyKnowledgeMetadataFilteringConfig,
DifyKnowledgeModelConfig,
DifyKnowledgeQueryConfig,
DifyKnowledgeRerankingModelConfig,
DifyKnowledgeRetrievalConfig,
DifyKnowledgeSetConfig,
)
from dify_agent.layers.shell import (
DifyShellCliToolConfig,
DifyShellEnvVarConfig,
@ -40,7 +49,9 @@ from graphon.file import FileTransferMethod
from graphon.variables.segments import Segment
from models.agent import Agent, AgentConfigSnapshot, WorkflowAgentNodeBinding
from models.agent_config_entities import (
AgentKnowledgeQueryConfig,
AgentKnowledgeMetadataFilteringConfig,
AgentKnowledgeModelConfig,
AgentKnowledgeRetrievalConfig,
AgentSoulConfig,
DeclaredArrayItem,
DeclaredOutputChildConfig,
@ -547,42 +558,84 @@ def build_shell_layer_config(agent_soul: AgentSoulConfig) -> DifyShellLayerConfi
def build_knowledge_layer_config(agent_soul: AgentSoulConfig) -> DifyKnowledgeBaseLayerConfig | None:
"""Map Agent Soul knowledge config into the fixed Dify knowledge-base layer.
"""Map Agent Soul knowledge sets into one Dify knowledge-base layer.
Normalization intentionally matches the current dify-agent runtime contract:
- blank or missing dataset ids are ignored;
- if no valid dataset ids remain, no knowledge layer is injected;
- retrieval mode is always forced to ``multiple`` in this first wiring pass;
- ``top_k`` falls back to a stable runtime default when the soul omits it;
- ``score_threshold`` is only forwarded when the product config explicitly
enables it, otherwise the layer keeps the disabled/default ``0.0`` value;
- metadata filtering stays at the layer DTO default (disabled).
Agent Soul DTO validation owns malformed set rejection. Runtime mapping is
intentionally lossless: every configured set is forwarded with its query
policy, dataset refs, retrieval controls, and metadata-filtering controls.
``score_threshold=None`` means disabled threshold filtering and maps to the
inner retrieval request's ``0.0`` default through the Agent backend DTO.
"""
dataset_ids = list_configured_knowledge_dataset_ids(agent_soul)
if not dataset_ids:
if not agent_soul.knowledge.sets:
return None
query_config = agent_soul.knowledge.query_config
return DifyKnowledgeBaseLayerConfig(
dataset_ids=dataset_ids,
retrieval=DifyKnowledgeRetrievalConfig(
mode="multiple",
top_k=_knowledge_top_k(query_config),
score_threshold=_knowledge_score_threshold(query_config),
),
sets=[
DifyKnowledgeSetConfig(
id=knowledge_set.id,
name=knowledge_set.name,
description=knowledge_set.description,
datasets=[
DifyKnowledgeDatasetConfig(
id=dataset.id or "",
name=dataset.name,
description=dataset.description,
)
for dataset in knowledge_set.datasets
],
query=DifyKnowledgeQueryConfig(
mode=cast(Literal["user_query", "generated_query"], knowledge_set.query.mode.value),
value=knowledge_set.query.value,
),
retrieval=_knowledge_retrieval_config(knowledge_set.retrieval),
metadata_filtering=_knowledge_metadata_filtering_config(knowledge_set.metadata_filtering),
)
for knowledge_set in agent_soul.knowledge.sets
],
)
def _knowledge_top_k(query_config: AgentKnowledgeQueryConfig) -> int:
top_k = query_config.top_k
return top_k if isinstance(top_k, int) and top_k >= 1 else 4
def _knowledge_retrieval_config(retrieval: AgentKnowledgeRetrievalConfig) -> DifyKnowledgeRetrievalConfig:
return DifyKnowledgeRetrievalConfig(
mode=retrieval.mode,
top_k=retrieval.top_k,
score_threshold=retrieval.score_threshold or 0.0,
reranking_mode=retrieval.reranking_mode,
reranking_enable=retrieval.reranking_enable,
reranking_model=DifyKnowledgeRerankingModelConfig(
provider=retrieval.reranking_model.provider,
model=retrieval.reranking_model.model,
)
if retrieval.reranking_model is not None
else None,
weights=cast(dict[str, Any], retrieval.weights.model_dump(mode="json", exclude_none=True))
if retrieval.weights is not None
else None,
model=_knowledge_model_config(retrieval.model),
)
def _knowledge_score_threshold(query_config: AgentKnowledgeQueryConfig) -> float:
if query_config.score_threshold_enabled and query_config.score_threshold is not None:
return query_config.score_threshold
return 0.0
def _knowledge_metadata_filtering_config(
metadata_filtering: AgentKnowledgeMetadataFilteringConfig,
) -> DifyKnowledgeMetadataFilteringConfig:
return DifyKnowledgeMetadataFilteringConfig(
mode=metadata_filtering.mode,
model_config=_knowledge_model_config(metadata_filtering.metadata_model_config),
conditions=cast(Any, metadata_filtering.conditions.model_dump(mode="json"))
if metadata_filtering.conditions is not None
else None,
)
def _knowledge_model_config(model: AgentKnowledgeModelConfig | None) -> DifyKnowledgeModelConfig | None:
if model is None:
return None
return DifyKnowledgeModelConfig(
provider=model.provider,
name=model.name,
mode=model.mode,
completion_params=model.completion_params,
)
def build_ask_human_layer_config(agent_soul: AgentSoulConfig) -> DifyAskHumanLayerConfig | None:

View File

@ -18,6 +18,7 @@ from models.agent_config_entities import (
)
from models.model import UploadFile
from models.workflow import Workflow
from services.agent.knowledge_datasets import list_missing_tenant_knowledge_dataset_ids
from .entities import DifyAgentNodeData
@ -146,6 +147,7 @@ class WorkflowAgentNodeValidator:
)
cls._validate_agent_soul_env(binding=binding, agent_soul=agent_soul)
cls._validate_agent_soul_tools(binding=binding, agent_soul=agent_soul)
cls._validate_agent_soul_knowledge(binding=binding, agent_soul=agent_soul)
node_job = WorkflowNodeJobConfig.model_validate(binding.node_job_config_dict)
cls.validate_node_job(session=session, binding=binding, node_job=node_job, topology=topology)
@ -364,6 +366,24 @@ class WorkflowAgentNodeValidator:
)
cli_tool_names.add(normalized_name)
@classmethod
def _validate_agent_soul_knowledge(
cls,
*,
binding: WorkflowAgentNodeBinding,
agent_soul: AgentSoulConfig,
) -> None:
"""Validate knowledge set dataset rows against the publishing tenant."""
missing_ids = list_missing_tenant_knowledge_dataset_ids(
tenant_id=binding.tenant_id,
agent_soul=agent_soul,
)
if missing_ids:
raise WorkflowAgentNodeValidationError(
f"Workflow Agent node {binding.node_id} references missing or out-of-scope knowledge datasets: "
f"{', '.join(missing_ids)}."
)
@classmethod
def _validate_agent_soul_env(
cls,

View File

@ -400,10 +400,22 @@ class AgentComposerNodeJobCandidatesResponse(ResponseModel):
human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)
class AgentComposerKnowledgeDatasetCandidateResponse(AgentKnowledgeDatasetConfig):
missing: bool = False
class AgentComposerKnowledgeSetCandidateResponse(ResponseModel):
id: str
name: str
description: str | None = None
datasets: list[AgentComposerKnowledgeDatasetCandidateResponse] = Field(default_factory=list)
missing_dataset_ids: list[str] = Field(default_factory=list)
class AgentComposerSoulCandidatesResponse(ResponseModel):
dify_tools: list[AgentComposerDifyToolCandidateResponse] = Field(default_factory=list)
cli_tools: list[AgentCliToolConfig] = Field(default_factory=list)
knowledge_datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
knowledge_sets: list[AgentComposerKnowledgeSetCandidateResponse] = Field(default_factory=list)
human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)

View File

@ -2,10 +2,11 @@ from __future__ import annotations
import re
from enum import StrEnum
from typing import Annotated, Any, Final, Literal
from typing import Annotated, Any, Final, Literal, Self
from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, field_validator, model_validator
from core.rag.entities.metadata_entities import ConditionValue, SupportedComparisonOperator
from core.workflow.file_reference import is_canonical_file_reference
from graphon.file import FileTransferMethod
@ -236,17 +237,161 @@ class AgentCliToolConfig(AgentFlexibleConfig):
inferred_from: str | None = Field(default=None, max_length=255)
class AgentKnowledgeDatasetConfig(AgentFlexibleConfig):
class AgentKnowledgeDatasetConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str | None = Field(default=None, max_length=255)
name: str | None = Field(default=None, max_length=255)
description: str | None = None
class AgentKnowledgeQueryConfig(AgentFlexibleConfig):
query: str | None = None
class AgentKnowledgeQueryConfig(BaseModel):
"""Per-set query policy for Agent v2 knowledge retrieval.
Agent v2 stores knowledge as explicit ``knowledge.sets`` rather than the
legacy flat ``datasets`` / ``query_mode`` / ``query_config`` shape. Each
set owns its own query policy, so ``user_query`` must carry an explicit
``value`` while ``generated_query`` leaves that value empty.
"""
model_config = ConfigDict(extra="forbid")
mode: AgentKnowledgeQueryMode
value: str | None = None
@model_validator(mode="after")
def validate_query(self) -> Self:
if self.mode == AgentKnowledgeQueryMode.USER_QUERY and not (self.value or "").strip():
raise ValueError("knowledge query.value is required for user_query mode")
return self
class AgentKnowledgeModelConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
provider: str = Field(min_length=1, max_length=255)
name: str = Field(min_length=1, max_length=255)
mode: str = Field(min_length=1, max_length=64)
completion_params: dict[str, Any] = Field(default_factory=dict)
class AgentKnowledgeRerankingModelConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
provider: str = Field(min_length=1, max_length=255)
model: str = Field(min_length=1, max_length=255)
class AgentKnowledgeWeightedScoreConfig(AgentFlexibleConfig):
weight_type: str | None = Field(default=None, max_length=64)
vector_setting: dict[str, Any] | None = None
keyword_setting: dict[str, Any] | None = None
class AgentKnowledgeRetrievalConfig(BaseModel):
"""Per-set retrieval policy for Agent v2 knowledge retrieval.
Retrieval settings now live on each knowledge set instead of one shared
flat config. A set may use either ``multiple`` retrieval with ``top_k`` or
``single`` retrieval with a required model config.
"""
model_config = ConfigDict(extra="forbid")
mode: Literal["single", "multiple"]
top_k: int | None = Field(default=None, ge=1)
score_threshold: float | None = Field(default=None, ge=0, le=1)
score_threshold_enabled: bool | None = None
reranking_mode: str = "reranking_model"
reranking_enable: bool = True
reranking_model: AgentKnowledgeRerankingModelConfig | None = None
weights: AgentKnowledgeWeightedScoreConfig | None = None
model: AgentKnowledgeModelConfig | None = None
@model_validator(mode="after")
def validate_mode_fields(self) -> Self:
if self.mode == "multiple" and self.top_k is None:
raise ValueError("knowledge retrieval.top_k is required for multiple mode")
if self.mode == "single" and self.model is None:
raise ValueError("knowledge retrieval.model is required for single mode")
return self
class AgentKnowledgeMetadataCondition(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str = Field(min_length=1, max_length=255)
comparison_operator: SupportedComparisonOperator
value: ConditionValue = None
class AgentKnowledgeMetadataConditions(BaseModel):
model_config = ConfigDict(extra="forbid")
logical_operator: Literal["and", "or"] = "and"
conditions: list[AgentKnowledgeMetadataCondition] = Field(default_factory=list)
class AgentKnowledgeMetadataFilteringConfig(BaseModel):
"""Per-set metadata filtering policy.
The Python attribute uses ``metadata_model_config`` for clarity because the
model belongs to metadata filtering specifically, while the external API and
generated schema keep the historical ``model_config`` field name via alias.
"""
model_config = ConfigDict(extra="forbid", populate_by_name=True)
mode: Literal["disabled", "automatic", "manual"] = "disabled"
# Internal name is explicit; wire format remains ``model_config``.
metadata_model_config: AgentKnowledgeModelConfig | None = Field(default=None, alias="model_config")
conditions: AgentKnowledgeMetadataConditions | None = None
@model_validator(mode="after")
def validate_mode_fields(self) -> Self:
if self.mode == "automatic" and self.metadata_model_config is None:
raise ValueError("metadata_filtering.model_config is required for automatic mode")
if self.mode == "manual" and (self.conditions is None or not self.conditions.conditions):
raise ValueError("metadata_filtering.conditions is required for manual mode")
return self
class AgentKnowledgeSetConfig(BaseModel):
"""One explicit knowledge set in Agent v2.
``knowledge.sets`` replaces the old flat knowledge config. Each set owns
its datasets plus query, retrieval, and metadata policies. An individual
set must contain at least one dataset id even though the overall knowledge
section may be empty, which is how callers express "no knowledge layer".
"""
model_config = ConfigDict(extra="forbid")
id: str = Field(min_length=1, max_length=255)
name: str = Field(min_length=1, max_length=255)
description: str | None = None
datasets: list[AgentKnowledgeDatasetConfig]
query: AgentKnowledgeQueryConfig
retrieval: AgentKnowledgeRetrievalConfig
metadata_filtering: AgentKnowledgeMetadataFilteringConfig = Field(
default_factory=AgentKnowledgeMetadataFilteringConfig
)
@field_validator("id", "name")
@classmethod
def validate_non_blank_identity(cls, value: str) -> str:
normalized = value.strip()
if not normalized:
raise ValueError("knowledge set id and name must not be blank")
return normalized
@model_validator(mode="after")
def validate_datasets(self) -> Self:
dataset_ids = [(dataset.id or "").strip() for dataset in self.datasets]
if not dataset_ids or any(not dataset_id for dataset_id in dataset_ids):
raise ValueError("knowledge set requires at least one dataset id")
if len(dataset_ids) != len(set(dataset_ids)):
raise ValueError("knowledge set dataset ids must be unique")
return self
class AgentHumanContactConfig(AgentFlexibleConfig):
@ -453,9 +598,28 @@ class AgentSoulToolsConfig(BaseModel):
class AgentSoulKnowledgeConfig(BaseModel):
datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
query_mode: AgentKnowledgeQueryMode | None = None
query_config: AgentKnowledgeQueryConfig = Field(default_factory=AgentKnowledgeQueryConfig)
"""Top-level Agent v2 knowledge config.
Agent v2 models knowledge as explicit sets instead of one flat
``datasets`` / ``query_mode`` / ``query_config`` block. An empty ``sets``
list means no knowledge layer should be emitted at runtime, while set-name
uniqueness stays case-insensitive because runtime selection addresses sets
by name.
"""
model_config = ConfigDict(extra="forbid")
sets: list[AgentKnowledgeSetConfig] = Field(default_factory=list)
@model_validator(mode="after")
def validate_unique_sets(self) -> Self:
set_ids = [item.id.strip() for item in self.sets]
if len(set_ids) != len(set(set_ids)):
raise ValueError("knowledge set ids must be unique")
set_names = [item.name.strip().lower() for item in self.sets]
if len(set_names) != len(set(set_names)):
raise ValueError("knowledge set names must be unique")
return self
class AgentSoulHumanConfig(BaseModel):

View File

@ -12433,6 +12433,25 @@ Risk marker for CLI tool bootstrap commands.
| current_snapshot_id | string | | No |
| workflow_node_count | integer | | Yes |
#### AgentComposerKnowledgeDatasetCandidateResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| description | string | | No |
| id | string | | No |
| missing | boolean | | No |
| name | string | | No |
#### AgentComposerKnowledgeSetCandidateResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| datasets | [ [AgentComposerKnowledgeDatasetCandidateResponse](#agentcomposerknowledgedatasetcandidateresponse) ] | | No |
| description | string | | No |
| id | string | | Yes |
| missing_dataset_ids | [ string ] | | No |
| name | string | | Yes |
#### AgentComposerNodeJobCandidatesResponse
| Name | Type | Description | Required |
@ -12448,7 +12467,7 @@ Risk marker for CLI tool bootstrap commands.
| cli_tools | [ [AgentCliToolConfig](#agentclitoolconfig) ] | | No |
| dify_tools | [ [AgentComposerDifyToolCandidateResponse](#agentcomposerdifytoolcandidateresponse) ] | | No |
| human_contacts | [ [AgentHumanContactConfig](#agenthumancontactconfig) ] | | No |
| knowledge_datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No |
| knowledge_sets | [ [AgentComposerKnowledgeSetCandidateResponse](#agentcomposerknowledgesetcandidateresponse) ] | | No |
#### AgentComposerSoulLockResponse
@ -12842,14 +12861,44 @@ the current roster/workflow APIs scoped to Dify Agent.
| id | string | | No |
| name | string | | No |
#### AgentKnowledgeMetadataCondition
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | | Yes |
| value | string<br>[ string ]<br>number | | No |
#### AgentKnowledgeMetadataConditions
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| conditions | [ [AgentKnowledgeMetadataCondition](#agentknowledgemetadatacondition) ] | | No |
| logical_operator | string, <br>**Available values:** "and", "or", <br>**Default:** and | *Enum:* `"and"`, `"or"` | No |
#### AgentKnowledgeMetadataFilteringConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| conditions | [AgentKnowledgeMetadataConditions](#agentknowledgemetadataconditions) | | No |
| mode | string, <br>**Available values:** "automatic", "disabled", "manual", <br>**Default:** disabled | *Enum:* `"automatic"`, `"disabled"`, `"manual"` | No |
| model_config | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No |
#### AgentKnowledgeModelConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| completion_params | object | | No |
| mode | string | | Yes |
| name | string | | Yes |
| provider | string | | Yes |
#### AgentKnowledgeQueryConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| query | string | | No |
| score_threshold | number | | No |
| score_threshold_enabled | boolean | | No |
| top_k | integer | | No |
| mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | Yes |
| value | string | | No |
#### AgentKnowledgeQueryMode
@ -12857,6 +12906,46 @@ the current roster/workflow APIs scoped to Dify Agent.
| ---- | ---- | ----------- | -------- |
| AgentKnowledgeQueryMode | string | | |
#### AgentKnowledgeRerankingModelConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| model | string | | Yes |
| provider | string | | Yes |
#### AgentKnowledgeRetrievalConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| mode | string, <br>**Available values:** "multiple", "single" | *Enum:* `"multiple"`, `"single"` | Yes |
| model | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No |
| reranking_enable | boolean, <br>**Default:** true | | No |
| reranking_mode | string, <br>**Default:** reranking_model | | No |
| reranking_model | [AgentKnowledgeRerankingModelConfig](#agentknowledgererankingmodelconfig) | | No |
| score_threshold | number | | No |
| top_k | integer | | No |
| weights | [AgentKnowledgeWeightedScoreConfig](#agentknowledgeweightedscoreconfig) | | No |
#### AgentKnowledgeSetConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | Yes |
| description | string | | No |
| id | string | | Yes |
| metadata_filtering | [AgentKnowledgeMetadataFilteringConfig](#agentknowledgemetadatafilteringconfig) | | No |
| name | string | | Yes |
| query | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | Yes |
| retrieval | [AgentKnowledgeRetrievalConfig](#agentknowledgeretrievalconfig) | | Yes |
#### AgentKnowledgeWeightedScoreConfig
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_setting | object | | No |
| vector_setting | object | | No |
| weight_type | string | | No |
#### AgentLogConversationItemResponse
| Name | Type | Description | Required |
@ -13258,9 +13347,7 @@ old Agent tool payloads can be read while new payloads stay explicit.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No |
| query_config | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | No |
| query_mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | No |
| sets | [ [AgentKnowledgeSetConfig](#agentknowledgesetconfig) ] | | No |
#### AgentSoulMemoryConfig

View File

@ -25,6 +25,7 @@ from models.agent_config_entities import (
AgentSoulConfig,
DeclaredOutputConfig,
)
from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids
MAX_CANDIDATES_PER_LIST = 200
@ -139,19 +140,34 @@ def soul_candidates(
cli_tools = [tool.model_dump(exclude_none=True) for tool in soul.tools.cli_tools if tool.enabled]
dataset_ids = [dataset.id for dataset in soul.knowledge.datasets if dataset.id]
dataset_ids = list_agent_soul_knowledge_dataset_ids(soul)
dataset_rows = dataset_lookup(dataset_ids) if dataset_ids else {}
knowledge_datasets: list[dict[str, Any]] = []
for dataset in soul.knowledge.datasets:
if not dataset.id:
continue
row = dataset_rows.get(dataset.id)
knowledge_datasets.append(
knowledge_sets: list[dict[str, Any]] = []
for knowledge_set in soul.knowledge.sets:
missing_dataset_ids: list[str] = []
datasets: list[dict[str, Any]] = []
for dataset in knowledge_set.datasets:
dataset_id = (dataset.id or "").strip()
if not dataset_id:
continue
row = dataset_rows.get(dataset_id)
if row is None:
missing_dataset_ids.append(dataset_id)
datasets.append(
{
"id": dataset_id,
"name": (getattr(row, "name", None) or dataset.name or dataset_id),
"description": getattr(row, "description", None) or dataset.description,
"missing": row is None,
}
)
knowledge_sets.append(
{
"id": dataset.id,
"name": (getattr(row, "name", None) or dataset.name or dataset.id),
"description": getattr(row, "description", None) or dataset.description,
"missing": row is None,
"id": knowledge_set.id,
"name": knowledge_set.name,
"description": knowledge_set.description,
"datasets": datasets,
"missing_dataset_ids": missing_dataset_ids,
}
)
@ -161,7 +177,7 @@ def soul_candidates(
lists = {
"dify_tools": dify_tools,
"cli_tools": cli_tools,
"knowledge_datasets": knowledge_datasets,
"knowledge_sets": knowledge_sets,
"human_contacts": human_contacts,
}
capped: dict[str, list[dict[str, Any]]] = {}
@ -192,7 +208,6 @@ def _ref_entry(
"inferred": inferred,
}
def _capped(values: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], bool]:
if len(values) > MAX_CANDIDATES_PER_LIST:
return values[:MAX_CANDIDATES_PER_LIST], True

View File

@ -33,6 +33,11 @@ from services.agent.errors import (
AgentNameConflictError,
AgentNotFoundError,
AgentVersionNotFoundError,
InvalidComposerConfigError,
)
from services.agent.knowledge_datasets import (
get_tenant_knowledge_dataset_rows,
list_missing_tenant_knowledge_dataset_ids,
)
from services.entities.agent_entities import (
AgentSoulConfig,
@ -101,6 +106,7 @@ class AgentComposerService:
_backfill_cli_tool_ids(payload.agent_soul)
ComposerConfigValidator.validate_save_payload(payload)
cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
workflow = cls._get_draft_workflow(tenant_id=tenant_id, app_id=app_id)
binding = cls._get_workflow_binding(tenant_id=tenant_id, workflow_id=workflow.id, node_id=node_id)
@ -195,6 +201,7 @@ class AgentComposerService:
raise ValueError("Agent App composer endpoint only accepts agent_app variant")
_backfill_cli_tool_ids(payload.agent_soul)
ComposerConfigValidator.validate_save_payload(payload)
cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
if payload.agent_soul is None:
raise ValueError("agent_soul is required")
@ -273,19 +280,15 @@ class AgentComposerService:
agent_id: str | None = None,
) -> dict[str, Any]:
"""ENG-617 soft findings, with DB-backed dataset and drive mention checks."""
from services.agent.prompt_mentions import MentionKind, parse_prompt_mentions
mentioned_ids: set[str] = set()
if payload.agent_soul is not None:
mentioned_ids |= {
mention.ref_id
for mention in parse_prompt_mentions(payload.agent_soul.prompt.system_prompt)
if mention.kind == MentionKind.KNOWLEDGE
}
existing_dataset_ids: set[str] | None = None
if mentioned_ids:
existing_dataset_ids = set(cls._dataset_rows(tenant_id=tenant_id, dataset_ids=sorted(mentioned_ids)))
findings = ComposerConfigValidator.collect_soft_findings(payload, existing_dataset_ids=existing_dataset_ids)
existing_knowledge_set_ids = (
{knowledge_set.id for knowledge_set in payload.agent_soul.knowledge.sets}
if payload.agent_soul is not None
else None
)
findings = ComposerConfigValidator.collect_soft_findings(
payload,
existing_knowledge_set_ids=existing_knowledge_set_ids,
)
if agent_id and payload.agent_soul is not None:
findings["warnings"].extend(
cls._drive_mention_findings(
@ -296,6 +299,24 @@ class AgentComposerService:
)
return findings
@classmethod
def validate_knowledge_datasets(cls, *, tenant_id: str, agent_soul: AgentSoulConfig | None) -> None:
"""Hard-validate tenant-scoped knowledge set datasets before saving.
DTO validators own set shape, duplicate set ids/names, and duplicate
dataset ids within one set. This service-level check owns database
existence and tenant ownership so invalid or cross-tenant datasets fail
before Agent Soul snapshots are persisted.
"""
if agent_soul is None:
return
missing_ids = list_missing_tenant_knowledge_dataset_ids(tenant_id=tenant_id, agent_soul=agent_soul)
if missing_ids:
raise InvalidComposerConfigError(
"knowledge_dataset_not_found: knowledge sets reference missing or out-of-scope datasets: "
+ ", ".join(missing_ids)
)
@classmethod
def resolve_bound_agent_id(cls, *, tenant_id: str, app_id: str) -> str | None:
"""The Agent App's bound roster agent id, if any (validate-endpoint context)."""
@ -410,7 +431,7 @@ class AgentComposerService:
soul_lists, soul_truncated = soul_candidates(
agent_soul=agent_soul,
dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
)
truncated = truncated or soul_truncated
@ -437,7 +458,7 @@ class AgentComposerService:
agent_soul = cls._load_agent_app_soul(tenant_id=tenant_id, app_id=app_id)
soul_lists, truncated = soul_candidates(
agent_soul=agent_soul,
dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
)
response = ComposerCandidatesResponse(
@ -530,30 +551,6 @@ class AgentComposerService:
variables = WorkflowDraftVariableService(session=session).list_system_variables(app_id, user_id)
return [(variable.name, variable.value_type.value) for variable in variables.variables]
@staticmethod
def _dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
"""Tenant-scoped dataset lookup tolerating malformed ids.
Mention ids come from user-editable prompt text; a non-UUID id can never
match a dataset row, so it is simply absent from the result (-> missing/
placeholder semantics) instead of breaking the UUID-typed query.
"""
from uuid import UUID
from services.dataset_service import DatasetService
valid_ids: list[str] = []
for dataset_id in dataset_ids:
try:
UUID(dataset_id)
except (ValueError, TypeError):
continue
valid_ids.append(dataset_id)
if not valid_ids:
return {}
rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
return {str(row.id): row for row in rows}
@staticmethod
def _workspace_dify_tools(*, tenant_id: str, user_id: str) -> list[dict[str, Any]]:
"""Workspace Dify Plugin tools, same source as the tool selector.

View File

@ -141,15 +141,15 @@ class ComposerConfigValidator:
cls,
payload: ComposerSavePayload,
*,
existing_dataset_ids: set[str] | None = None,
existing_knowledge_set_ids: set[str] | None = None,
) -> dict[str, Any]:
"""ENG-617 §5.3/§5.4 soft findings — never block save.
``warnings`` carries ``mention_target_missing`` / ``mention_malformed``
entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge
entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge-set
mentions with a placeholder name (0522 consensus) instead of dropping or
rejecting them. With ``existing_dataset_ids`` provided, configured-but-
deleted datasets surface as placeholders too.
rejecting them. With ``existing_knowledge_set_ids`` provided, mentions
that no longer exist in the current Agent Soul surface as placeholders too.
"""
warnings: list[dict[str, Any]] = []
placeholders: list[dict[str, str]] = []
@ -181,7 +181,7 @@ class ComposerConfigValidator:
resolved = resolver(mention)
if mention.kind == MentionKind.KNOWLEDGE:
dangling = resolved is None or (
existing_dataset_ids is not None and mention.ref_id not in existing_dataset_ids
existing_knowledge_set_ids is not None and mention.ref_id not in existing_knowledge_set_ids
)
if dangling:
placeholders.append(

View File

@ -0,0 +1,63 @@
from __future__ import annotations
from typing import Any
from uuid import UUID
from models.agent_config_entities import AgentSoulConfig
def list_agent_soul_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
"""Return normalized unique knowledge dataset ids in config order.
Agent v2 knowledge dataset selection is owned by ``knowledge.sets``. This
helper keeps composer, workflow validation, candidates, and runtime
diagnostics aligned on the same normalization rules: strip whitespace, drop
blanks, preserve first-seen order, and deduplicate.
"""
dataset_ids: list[str] = []
seen: set[str] = set()
for knowledge_set in agent_soul.knowledge.sets:
for dataset in knowledge_set.datasets:
dataset_id = (dataset.id or "").strip()
if not dataset_id or dataset_id in seen:
continue
seen.add(dataset_id)
dataset_ids.append(dataset_id)
return dataset_ids
def get_tenant_knowledge_dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
"""Return tenant-scoped dataset rows for normalized knowledge dataset ids.
Knowledge ids come from user-editable config. Malformed ids can never match
a dataset row, so they are treated as missing instead of breaking the
UUID-typed dataset lookup.
"""
from services.dataset_service import DatasetService
valid_ids: list[str] = []
for dataset_id in dataset_ids:
try:
UUID(dataset_id)
except (TypeError, ValueError):
continue
valid_ids.append(dataset_id)
if not valid_ids:
return {}
rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
return {str(row.id): row for row in rows}
def list_missing_tenant_knowledge_dataset_ids(*, tenant_id: str, agent_soul: AgentSoulConfig | None) -> list[str]:
"""Return normalized knowledge dataset ids missing from the tenant scope."""
if agent_soul is None:
return []
dataset_ids = list_agent_soul_knowledge_dataset_ids(agent_soul)
if not dataset_ids:
return []
rows = get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=dataset_ids)
return [dataset_id for dataset_id in dataset_ids if dataset_id not in rows]

View File

@ -6,7 +6,7 @@ Slash-menu insertions are stored inline in the plain-string prompt as tokens:
``kind`` is a fixed lowercase word; ``id`` points at an item in the Agent
runtime context. For prompt-owned entities that means Agent Soul lists such as
``tools`` / ``knowledge.datasets`` / ``human.contacts`` and workflow job lists
``tools`` / ``knowledge.sets`` / ``human.contacts`` and workflow job lists
such as ``previous_node_output_refs`` / ``declared_outputs``. For drive-backed
``skill`` / ``file`` mentions the field stores a URL-encoded drive key and is
resolved against ``agent_drive_files`` at runtime. ``label`` is an optional
@ -211,9 +211,9 @@ def build_soul_mention_resolver(agent_soul: AgentSoulConfig) -> MentionResolver:
if mention.ref_id in (cli_tool.id, cli_tool.name):
return cli_tool.name or cli_tool.id
case MentionKind.KNOWLEDGE:
for dataset in agent_soul.knowledge.datasets:
if mention.ref_id == dataset.id:
return dataset.name or dataset.id
for knowledge_set in agent_soul.knowledge.sets:
if mention.ref_id == knowledge_set.id:
return knowledge_set.name or knowledge_set.id
case MentionKind.HUMAN:
return _resolve_human_contact(agent_soul.human.contacts, mention.ref_id)
case _:

View File

@ -162,8 +162,15 @@ def test_request_builder_adds_knowledge_layer_when_configured():
run_input = _run_input()
run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
{
"dataset_ids": ["dataset-1"],
"retrieval": {"mode": "multiple", "top_k": 4},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
],
}
)
@ -174,7 +181,7 @@ def test_request_builder_adds_knowledge_layer_when_configured():
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
assert knowledge_config.dataset_ids == ["dataset-1"]
assert knowledge_config.sets[0].dataset_ids == ["dataset-1"]
def test_request_builder_can_delete_on_exit_for_cleanup_paths():
@ -386,8 +393,15 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
run_input = _agent_app_input()
run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
{
"dataset_ids": ["dataset-1", "dataset-2"],
"retrieval": {"mode": "multiple", "top_k": 2},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 2},
}
],
}
)
@ -398,7 +412,7 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
assert knowledge_config.dataset_ids == ["dataset-1", "dataset-2"]
assert knowledge_config.sets[0].dataset_ids == ["dataset-1", "dataset-2"]
# ── ENG-635 / ENG-638: ask_human layer injection + deferred_tool_results ─────

View File

@ -149,3 +149,55 @@ def test_generate_specs_is_idempotent(tmp_path):
assert [path.name for path in first_paths] == [path.name for path in second_paths]
for first_path, second_path in zip(first_paths, second_paths):
assert first_path.read_text(encoding="utf-8") == second_path.read_text(encoding="utf-8")
def test_generate_specs_include_agent_v2_knowledge_set_schema_and_query_enums(tmp_path):
module = _load_generate_swagger_specs_module()
written_paths = module.generate_specs(tmp_path)
console_path = next(path for path in written_paths if path.name == "console-openapi.json")
payload = json.loads(console_path.read_text(encoding="utf-8"))
schemas = payload["components"]["schemas"]
assert "AgentKnowledgeSetConfig" in schemas
assert schemas["AgentSoulKnowledgeConfig"]["properties"]["sets"]["items"]["$ref"] == (
"#/components/schemas/AgentKnowledgeSetConfig"
)
assert schemas["AgentKnowledgeQueryMode"]["enum"] == ["generated_query", "user_query"]
def test_checked_in_agent_v2_knowledge_openapi_and_generated_contracts_are_in_sync():
api_dir = Path(__file__).resolve().parents[3]
repo_root = api_dir.parent
markdown = (api_dir / "openapi" / "markdown" / "console-openapi.md").read_text(encoding="utf-8")
agent_types = (
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "types.gen.ts"
).read_text(encoding="utf-8")
apps_types = (
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "types.gen.ts"
).read_text(encoding="utf-8")
agent_zod = (
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "zod.gen.ts"
).read_text(encoding="utf-8")
apps_zod = (
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "zod.gen.ts"
).read_text(encoding="utf-8")
assert "#### AgentKnowledgeSetConfig" in markdown
assert "#### AgentSoulKnowledgeConfig" in markdown
assert "#### AgentKnowledgeQueryMode" in markdown
for content in (agent_types, apps_types):
assert "export type AgentKnowledgeSetConfig = {" in content
assert "export type AgentSoulKnowledgeConfig = {" in content
assert "AgentKnowledgeQueryMode" in content
assert "generated_query" in content
assert "user_query" in content
for content in (agent_zod, apps_zod):
assert "export const zAgentKnowledgeSetConfig = z.object({" in content
assert "export const zAgentSoulKnowledgeConfig = z.object({" in content
assert "zAgentKnowledgeQueryMode = z.enum([" in content
assert "generated_query" in content
assert "user_query" in content

View File

@ -153,12 +153,19 @@ class TestAgentAppRuntimeRequestBuilder:
"model": "gpt-4o-mini",
},
"knowledge": {
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
"query_config": {
"top_k": 3,
"score_threshold": 0.5,
"score_threshold_enabled": False,
},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
"query": {"mode": "generated_query"},
"retrieval": {
"mode": "multiple",
"top_k": 3,
"score_threshold": None,
},
}
],
},
}
)
@ -173,10 +180,12 @@ class TestAgentAppRuntimeRequestBuilder:
assert knowledge.type == "dify.knowledge_base"
assert knowledge.deps == {"execution_context": "execution_context"}
dumped_config = knowledge.config.model_dump(mode="json", by_alias=True)
assert dumped_config["dataset_ids"] == ["dataset-1", "dataset-2"]
assert dumped_config["retrieval"]["mode"] == "multiple"
assert dumped_config["retrieval"]["top_k"] == 3
assert dumped_config["retrieval"]["score_threshold"] == 0.0
knowledge_set = dumped_config["sets"][0]
assert [dataset["id"] for dataset in knowledge_set["datasets"]] == ["dataset-1", "dataset-2"]
assert knowledge_set["query"] == {"mode": "generated_query", "value": None}
assert knowledge_set["retrieval"]["mode"] == "multiple"
assert knowledge_set["retrieval"]["top_k"] == 3
assert knowledge_set["retrieval"]["score_threshold"] == 0.0
def test_build_raises_when_model_missing(self):
builder = AgentAppRuntimeRequestBuilder(

View File

@ -512,12 +512,55 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
"model": "gpt-test",
},
"knowledge": {
"datasets": [{"id": "dataset-1"}, {"id": " "}, {"id": "dataset-2"}],
"query_config": {
"top_k": 6,
"score_threshold": 0.4,
"score_threshold_enabled": True,
},
"sets": [
{
"id": "support",
"name": "Support KB",
"description": "Support content",
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
"query": {"mode": "generated_query"},
"retrieval": {
"mode": "multiple",
"top_k": 6,
"score_threshold": 0.4,
"reranking_model": {"provider": "cohere", "model": "rerank-v3"},
"weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
},
"metadata_filtering": {
"mode": "manual",
"conditions": {
"logical_operator": "and",
"conditions": [
{"name": "category", "comparison_operator": "contains", "value": "auth"}
],
},
},
},
{
"id": "release",
"name": "Release Notes",
"datasets": [{"id": "dataset-3"}],
"query": {"mode": "user_query", "value": "release notes"},
"retrieval": {
"mode": "single",
"model": {
"provider": "openai",
"name": "gpt-4o-mini",
"mode": "chat",
"completion_params": {"temperature": 0.2},
},
},
"metadata_filtering": {
"mode": "automatic",
"model_config": {
"provider": "openai",
"name": "gpt-4o-mini",
"mode": "chat",
"completion_params": {},
},
},
},
],
},
}
),
@ -531,25 +574,75 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
knowledge_layer = layers["knowledge"]
assert knowledge_layer["type"] == "dify.knowledge_base"
assert knowledge_layer["deps"] == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
assert knowledge_layer["config"] == {
"dataset_ids": ["dataset-1", "dataset-2"],
"retrieval": {
"mode": "multiple",
"top_k": 6,
"score_threshold": 0.4,
"reranking_mode": "reranking_model",
"reranking_enable": True,
"reranking_model": None,
"weights": None,
"model": None,
assert knowledge_layer["config"]["sets"] == [
{
"id": "support",
"name": "Support KB",
"description": "Support content",
"datasets": [
{"id": "dataset-1", "name": None, "description": None},
{"id": "dataset-2", "name": None, "description": None},
],
"query": {"mode": "generated_query", "value": None},
"retrieval": {
"mode": "multiple",
"top_k": 6,
"score_threshold": 0.4,
"reranking_mode": "reranking_model",
"reranking_enable": True,
"reranking_model": {"provider": "cohere", "model": "rerank-v3"},
"weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
"model": None,
},
"metadata_filtering": {
"mode": "manual",
"metadata_model_config": None,
"conditions": {
"logical_operator": "and",
"conditions": [
{"name": "category", "comparison_operator": "contains", "value": "auth"}
],
},
},
},
"metadata_filtering": {"mode": "disabled", "metadata_model_config": None, "conditions": None},
"max_result_content_chars": 2000,
"max_observation_chars": 12000,
}
{
"id": "release",
"name": "Release Notes",
"description": None,
"datasets": [{"id": "dataset-3", "name": None, "description": None}],
"query": {"mode": "user_query", "value": "release notes"},
"retrieval": {
"mode": "single",
"top_k": None,
"score_threshold": 0.0,
"reranking_mode": "reranking_model",
"reranking_enable": True,
"reranking_model": None,
"weights": None,
"model": {
"provider": "openai",
"name": "gpt-4o-mini",
"mode": "chat",
"completion_params": {"temperature": 0.2},
},
},
"metadata_filtering": {
"mode": "automatic",
"metadata_model_config": {
"provider": "openai",
"name": "gpt-4o-mini",
"mode": "chat",
"completion_params": {},
},
"conditions": None,
},
},
]
assert knowledge_layer["config"]["max_result_content_chars"] == 2000
assert knowledge_layer["config"]["max_observation_chars"] == 12000
def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits_it():
def test_build_knowledge_layer_maps_disabled_score_threshold_to_zero():
context = _context()
snapshot = AgentConfigSnapshot(
id="snapshot-1",
@ -565,8 +658,19 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
"model": "gpt-test",
},
"knowledge": {
"datasets": [{"id": "dataset-1"}],
"query_config": {},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {
"mode": "multiple",
"top_k": 4,
"score_threshold": None,
},
}
],
},
}
),
@ -577,10 +681,10 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
dumped = result.request.model_dump(mode="json")
knowledge_layer = next(layer for layer in dumped["composition"]["layers"] if layer["name"] == "knowledge")
assert knowledge_layer["config"]["retrieval"]["top_k"] == 4
assert knowledge_layer["config"]["sets"][0]["retrieval"]["score_threshold"] == 0.0
def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
def test_build_skips_knowledge_layer_when_agent_soul_has_no_sets():
context = _context()
snapshot = AgentConfigSnapshot(
id="snapshot-1",
@ -595,9 +699,7 @@ def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
"model_provider": "openai",
"model": "gpt-test",
},
"knowledge": {
"datasets": [{"id": " "}, {}],
},
"knowledge": {"sets": []},
}
),
)
@ -1094,7 +1196,15 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
soul = AgentSoulConfig.model_validate(
{
"knowledge": {
"datasets": [{"id": "dataset-1", "name": "Product Docs"}],
"sets": [
{
"id": "product",
"name": "Product Docs",
"datasets": [{"id": "dataset-1", "name": "Product Docs"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
],
}
}
)
@ -1106,13 +1216,13 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
assert all("knowledge" not in w["section"] for w in manifest["unsupported_runtime_warnings"])
def test_feature_manifest_treats_blank_knowledge_dataset_ids_as_not_configured():
def test_feature_manifest_treats_empty_knowledge_sets_as_not_configured():
from core.workflow.nodes.agent_v2.runtime_feature_manifest import build_runtime_feature_manifest
soul = AgentSoulConfig.model_validate(
{
"knowledge": {
"datasets": [{"id": " "}, {}],
"sets": [],
}
}
)

View File

@ -55,6 +55,33 @@ def _snapshot() -> AgentConfigSnapshot:
)
def _snapshot_with_knowledge_dataset(dataset_id: str) -> AgentConfigSnapshot:
return AgentConfigSnapshot(
id="snapshot-1",
tenant_id="tenant-1",
agent_id="agent-1",
version=1,
config_snapshot=AgentSoulConfig(
model=AgentSoulModelConfig(
plugin_id="langgenius/openai",
model_provider="openai",
model="gpt-test",
),
knowledge={
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": dataset_id}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
},
),
)
def _graph(edges: list[dict]) -> dict:
return {
"nodes": [
@ -515,6 +542,35 @@ def test_publish_validation_rejects_missing_file_ref():
)
def test_publish_validation_rejects_missing_or_out_of_scope_knowledge_datasets(
monkeypatch: pytest.MonkeyPatch,
):
dataset_id = "550e8400-e29b-41d4-a716-446655440000"
node_job = WorkflowNodeJobConfig.model_validate({})
snapshot = _snapshot_with_knowledge_dataset(dataset_id)
session = Mock()
session.scalar.side_effect = [_binding(node_job), _agent(), snapshot]
captured = {}
def fake_get_datasets_by_ids(ids, tenant_id):
captured["ids"] = ids
captured["tenant_id"] = tenant_id
return [], 0
import services.dataset_service as dataset_service_module
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
with pytest.raises(WorkflowAgentNodeValidationError, match=dataset_id):
WorkflowAgentNodeValidator.validate_published_workflow(
session=session,
workflow=_workflow(_graph([{"source": "start", "target": "agent-node"}])),
)
assert captured == {"ids": [dataset_id], "tenant_id": "tenant-1"}
def test_publish_validation_accepts_tool_node_agentic_manual_mode():
session = Mock()

View File

@ -1,4 +1,5 @@
import pytest
from pydantic import ValidationError
from models.agent_config_entities import AgentKnowledgeQueryMode, AgentSoulModelConfig, DeclaredOutputType
from services.agent.composer_service import AgentComposerService
@ -91,14 +92,144 @@ def test_knowledge_query_mode_uses_stable_backend_enums():
config = AgentSoulConfig.model_validate(
{
"knowledge": {
"datasets": [{"dataset_id": "dataset-1"}],
"query_mode": "generated_query",
"query_config": {"generation_prompt": "Create a retrieval query."},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
],
}
}
)
assert config.knowledge.query_mode == AgentKnowledgeQueryMode.GENERATED_QUERY
assert config.knowledge.sets[0].query.mode == AgentKnowledgeQueryMode.GENERATED_QUERY
@pytest.mark.parametrize(
("knowledge_payload", "match"),
[
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
{
"id": "support",
"name": "Billing KB",
"datasets": [{"id": "dataset-2"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
]
},
"knowledge set ids must be unique",
),
(
{
"sets": [
{
"id": "support",
"name": "Shared KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
{
"id": "billing",
"name": "Shared KB",
"datasets": [{"id": "dataset-2"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
]
},
"knowledge set names must be unique",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}, {"id": " dataset-1 "}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
]
},
"knowledge set dataset ids must be unique",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "user_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
]
},
"knowledge query.value is required for user_query mode",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "single"},
},
]
},
"knowledge retrieval.model is required for single mode",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
"metadata_filtering": {"mode": "automatic"},
},
]
},
"metadata_filtering.model_config is required for automatic mode",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
"metadata_filtering": {"mode": "manual"},
},
]
},
"metadata_filtering.conditions is required for manual mode",
),
],
)
def test_knowledge_sets_contract_rejects_invalid_configs(knowledge_payload, match: str):
with pytest.raises(ValidationError, match=match):
AgentSoulConfig.model_validate({"knowledge": knowledge_payload})
def test_agent_soul_model_config_is_first_class_without_credentials():

View File

@ -2594,20 +2594,151 @@ def test_dataset_rows_filters_malformed_ids(monkeypatch: pytest.MonkeyPatch):
return [], 0
import services.dataset_service as dataset_service_module
from services.agent.knowledge_datasets import get_tenant_knowledge_dataset_rows
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
valid = "550e8400-e29b-41d4-a716-446655440000"
rows = AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
rows = get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
assert rows == {}
assert captured["ids"] == [valid]
# all-malformed input never touches the DB
captured.clear()
assert AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
assert get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
assert captured == {}
@pytest.mark.parametrize(
("variant", "save_call"),
[
(
ComposerVariant.AGENT_APP,
lambda payload: AgentComposerService.save_agent_app_composer(
tenant_id="tenant-1",
app_id="app-1",
account_id="account-1",
payload=payload,
),
),
(
ComposerVariant.WORKFLOW,
lambda payload: AgentComposerService.save_workflow_composer(
tenant_id="tenant-1",
app_id="app-1",
node_id="node-1",
account_id="account-1",
payload=payload,
),
),
],
)
def test_composer_save_rejects_malformed_knowledge_dataset_ids(monkeypatch: pytest.MonkeyPatch, variant, save_call):
captured = {"calls": 0}
def fake_get_datasets_by_ids(ids, tenant_id):
captured["calls"] += 1
captured["ids"] = ids
captured["tenant_id"] = tenant_id
return [], 0
import services.dataset_service as dataset_service_module
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
payload = ComposerSavePayload.model_validate(
{
"variant": variant.value,
"save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
"soul_lock": {"locked": False},
"agent_soul": {
"knowledge": {
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "not-a-uuid"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
}
},
}
)
with pytest.raises(InvalidComposerConfigError, match="not-a-uuid"):
save_call(payload)
assert captured == {"calls": 0}
@pytest.mark.parametrize(
("variant", "save_call"),
[
(
ComposerVariant.AGENT_APP,
lambda payload: AgentComposerService.save_agent_app_composer(
tenant_id="tenant-1",
app_id="app-1",
account_id="account-1",
payload=payload,
),
),
(
ComposerVariant.WORKFLOW,
lambda payload: AgentComposerService.save_workflow_composer(
tenant_id="tenant-1",
app_id="app-1",
node_id="node-1",
account_id="account-1",
payload=payload,
),
),
],
)
def test_composer_save_rejects_missing_or_out_of_scope_knowledge_datasets(
monkeypatch: pytest.MonkeyPatch, variant, save_call
):
captured = {}
missing_dataset_id = "550e8400-e29b-41d4-a716-446655440000"
def fake_get_datasets_by_ids(ids, tenant_id):
captured["ids"] = ids
captured["tenant_id"] = tenant_id
return [], 0
import services.dataset_service as dataset_service_module
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
payload = ComposerSavePayload.model_validate(
{
"variant": variant.value,
"save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
"soul_lock": {"locked": False},
"agent_soul": {
"knowledge": {
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": missing_dataset_id}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
}
},
}
)
with pytest.raises(InvalidComposerConfigError, match=missing_dataset_id):
save_call(payload)
assert captured == {"ids": [missing_dataset_id], "tenant_id": "tenant-1"}
def test_workspace_dify_tools_returns_provider_and_tool_granularities(monkeypatch: pytest.MonkeyPatch):
"""The slash-menu Tools tab needs both selection granularities: a provider
hosts many tools (like an MCP server), so candidates return one

View File

@ -124,7 +124,18 @@ def _soul() -> AgentSoulConfig:
{"id": "ct-2", "name": "disabled-one", "enabled": False},
],
},
"knowledge": {"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}]},
"knowledge": {
"sets": [
{
"id": "kb-1",
"name": "产品知识",
"description": "knowledge set",
"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
},
"human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
}
)
@ -143,12 +154,16 @@ def test_soul_candidates_lists_configured_items_only():
assert [item["name"] for item in lists["cli_tools"]] == ["ffmpeg"]
# the stable mention id flows through so the frontend can mint [§cli_tool:<id>§]
assert [item["id"] for item in lists["cli_tools"]] == ["ct-1"]
# enriched from DB; dangling dataset kept with missing flag (placeholder, 0522)
knowledge = {item["id"]: item for item in lists["knowledge_datasets"]}
assert knowledge["ds-1"]["name"] == "产品手册"
assert knowledge["ds-1"]["missing"] is False
assert knowledge["ds-gone"]["missing"] is True
assert knowledge["ds-gone"]["name"] == "已删"
# Knowledge mentions point at set ids; nested datasets are hydrated for context.
knowledge_set = lists["knowledge_sets"][0]
assert knowledge_set["id"] == "kb-1"
assert knowledge_set["name"] == "产品知识"
assert knowledge_set["missing_dataset_ids"] == ["ds-gone"]
datasets = {item["id"]: item for item in knowledge_set["datasets"]}
assert datasets["ds-1"]["name"] == "产品手册"
assert datasets["ds-1"]["missing"] is False
assert datasets["ds-gone"]["missing"] is True
assert datasets["ds-gone"]["name"] == "已删"
assert lists["human_contacts"][0]["id"] == "c-1"
assert lists["dify_tools"][0]["id"] == "tavily/tavily_search"

View File

@ -149,22 +149,32 @@ def test_dangling_knowledge_without_label_gets_fallback_name():
]
def test_configured_but_deleted_dataset_surfaces_as_placeholder():
def test_configured_but_deleted_knowledge_set_surfaces_as_placeholder():
payload = ComposerSavePayload.model_validate(
{
"variant": "agent_app",
"agent_soul": {
"prompt": {"system_prompt": "see [§knowledge:ds-1:产品手册§]"},
"knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
"prompt": {"system_prompt": "see [§knowledge:kb-1:产品手册§]"},
"knowledge": {
"sets": [
{
"id": "kb-1",
"name": "产品手册",
"datasets": [{"id": "ds-1", "name": "产品手册"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
},
},
"save_strategy": "save_to_current_version",
}
)
# configured + DB row exists -> clean
assert _findings(payload, existing_dataset_ids={"ds-1"})["knowledge_retrieval_placeholder"] == []
# configured but deleted in DB -> placeholder
assert _findings(payload, existing_dataset_ids=set())["knowledge_retrieval_placeholder"] == [
{"id": "ds-1", "placeholder_name": "产品手册"}
# configured + current Agent Soul row exists -> clean
assert _findings(payload, existing_knowledge_set_ids={"kb-1"})["knowledge_retrieval_placeholder"] == []
# configured but removed from the current Agent Soul surface -> placeholder
assert _findings(payload, existing_knowledge_set_ids=set())["knowledge_retrieval_placeholder"] == [
{"id": "kb-1", "placeholder_name": "产品手册"}
]

View File

@ -107,7 +107,17 @@ def soul() -> AgentSoulConfig:
],
"cli_tools": [{"id": "ct-1", "name": "ffmpeg"}],
},
"knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
"knowledge": {
"sets": [
{
"id": "kb-1",
"name": "产品手册",
"datasets": [{"id": "ds-1", "name": "产品手册"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
},
"human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
}
)
@ -117,7 +127,7 @@ def test_soul_resolver_resolves_each_kind(soul: AgentSoulConfig):
resolver = build_soul_mention_resolver(soul)
prompt = (
"Use [§tool:tavily/tavily_search:tavily§], run [§cli_tool:ct-1:ffmpeg§], "
"ground in [§knowledge:ds-1§], ask [§human:c-1§]."
"ground in [§knowledge:kb-1§], ask [§human:c-1§]."
)
expanded = expand_prompt_mentions(prompt, resolver)

View File

@ -7,21 +7,31 @@ root stays import-safe for callers that only need to construct run requests.
from dify_agent.layers.knowledge.configs import (
DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
DifyKnowledgeBaseLayerConfig,
DifyKnowledgeDatasetConfig,
DifyKnowledgeEagerResult,
DifyKnowledgeMetadataCondition,
DifyKnowledgeMetadataConditions,
DifyKnowledgeMetadataFilteringConfig,
DifyKnowledgeModelConfig,
DifyKnowledgeQueryConfig,
DifyKnowledgeRerankingModelConfig,
DifyKnowledgeRetrievalConfig,
DifyKnowledgeRuntimeState,
DifyKnowledgeSetConfig,
)
__all__ = [
"DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
"DifyKnowledgeBaseLayerConfig",
"DifyKnowledgeDatasetConfig",
"DifyKnowledgeEagerResult",
"DifyKnowledgeMetadataCondition",
"DifyKnowledgeMetadataConditions",
"DifyKnowledgeMetadataFilteringConfig",
"DifyKnowledgeModelConfig",
"DifyKnowledgeQueryConfig",
"DifyKnowledgeRerankingModelConfig",
"DifyKnowledgeRetrievalConfig",
"DifyKnowledgeRuntimeState",
"DifyKnowledgeSetConfig",
]

View File

@ -1,12 +1,11 @@
"""Client-safe DTOs for the Dify knowledge-base Agenton layer.
The public layer config exposes only static retrieval controls: dataset ids,
retrieval strategy, metadata filtering, and observation-size limits. The agent
model itself should only ever see a single ``query`` tool argument; tenant/
app/user context comes from the execution-context layer and the actual
retrieval is delegated to the Dify API inner endpoint. Tool naming is not
caller-configurable: the runtime always exposes the same stable knowledge-base
search tool.
The public layer config carries one or more named knowledge sets. Each set owns
its dataset ids plus query, retrieval, and metadata-filtering policy. Generated-
query sets are exposed through one stable model-visible search tool whose
schema lets the model pick ``set_name`` and ``query``; user-query sets are
retrieved eagerly when the layer enters a run and their formatted observations
are kept only in JSON-safe ``runtime_state`` for session snapshots.
"""
from __future__ import annotations
@ -61,6 +60,44 @@ class DifyKnowledgeRerankingModelConfig(BaseModel):
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
class DifyKnowledgeDatasetConfig(BaseModel):
"""One dataset selected by a knowledge set.
Only ``id`` is used for retrieval. ``name`` and ``description`` are retained
because callers already have them and they are useful in runtime/debug
snapshots without changing the inner retrieval request contract.
"""
id: str
name: str | None = None
description: str | None = None
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
@field_validator("id")
@classmethod
def validate_id(cls, value: str) -> str:
normalized = value.strip()
if not normalized:
raise ValueError("dataset id must not be blank")
return normalized
class DifyKnowledgeQueryConfig(BaseModel):
"""Query policy for one knowledge set."""
mode: Literal["user_query", "generated_query"]
value: str | None = None
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
@model_validator(mode="after")
def validate_mode_specific_fields(self) -> DifyKnowledgeQueryConfig:
if self.mode == "user_query" and not (self.value or "").strip():
raise ValueError("query.value is required for user_query mode")
return self
class DifyKnowledgeRetrievalConfig(BaseModel):
"""Static retrieval controls mirrored into the inner API request."""
@ -151,38 +188,90 @@ class DifyKnowledgeMetadataFilteringConfig(BaseModel):
return payload
class DifyKnowledgeBaseLayerConfig(LayerConfig):
"""Public config for one model-visible knowledge search tool.
class DifyKnowledgeSetConfig(BaseModel):
"""One independently searchable or eagerly-preloaded knowledge set."""
The model only gets to choose whether to call the tool and what ``query``
to send. Dataset ids, retrieval settings, metadata filtering, and caller
context remain config/runtime concerns outside the model-visible tool
schema. The tool name and description are fixed by the layer runtime and do
not appear in the public config DTO.
"""
dataset_ids: list[str]
id: str
name: str
description: str | None = None
datasets: list[DifyKnowledgeDatasetConfig]
query: DifyKnowledgeQueryConfig
retrieval: DifyKnowledgeRetrievalConfig
metadata_filtering: DifyKnowledgeMetadataFilteringConfig = Field(
default_factory=DifyKnowledgeMetadataFilteringConfig
)
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
@field_validator("id", "name")
@classmethod
def validate_non_blank_identity(cls, value: str) -> str:
normalized = value.strip()
if not normalized:
raise ValueError("knowledge set id and name must not be blank")
return normalized
@model_validator(mode="after")
def validate_dataset_ids(self) -> DifyKnowledgeSetConfig:
if not self.datasets:
raise ValueError("knowledge set requires at least one dataset")
dataset_ids = [dataset.id for dataset in self.datasets]
if len(dataset_ids) != len(set(dataset_ids)):
raise ValueError("knowledge set dataset ids must be unique")
return self
@property
def dataset_ids(self) -> list[str]:
"""Return the selected dataset ids for the inner retrieval request."""
return [dataset.id for dataset in self.datasets]
class DifyKnowledgeEagerResult(BaseModel):
"""JSON-safe eager user-query result stored in layer runtime state."""
set_id: str
set_name: str
query: str
observation: str
status: Literal["success", "empty", "temporarily_unavailable"]
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
class DifyKnowledgeRuntimeState(BaseModel):
"""Serializable eager-retrieval state stored in Agenton session snapshots."""
eager_config_fingerprint: str | None = None
eager_results: list[DifyKnowledgeEagerResult] = Field(default_factory=list)
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", validate_assignment=True)
class DifyKnowledgeBaseLayerConfig(LayerConfig):
"""Public config for one knowledge-base layer.
The model-visible surface stays fixed to ``knowledge_base_search``. Set
names are the only model-visible selection labels; dataset ids, retrieval
controls, metadata filtering, and caller identity remain config/runtime
concerns outside the tool schema.
"""
sets: list[DifyKnowledgeSetConfig]
max_result_content_chars: int = Field(default=2000, ge=1)
max_observation_chars: int = Field(default=12000, ge=1)
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
@field_validator("dataset_ids")
@classmethod
def validate_dataset_ids(cls, value: list[str]) -> list[str]:
if not value:
raise ValueError("dataset_ids must contain at least one item")
normalized_ids = [item.strip() for item in value]
if any(not item for item in normalized_ids):
raise ValueError("dataset_ids must not contain blank items")
return normalized_ids
@model_validator(mode="after")
def validate_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
def validate_sets_and_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
if not self.sets:
raise ValueError("sets must contain at least one knowledge set")
set_ids = [knowledge_set.id for knowledge_set in self.sets]
if len(set_ids) != len(set(set_ids)):
raise ValueError("knowledge set ids must be unique")
normalized_names = [knowledge_set.name.strip().lower() for knowledge_set in self.sets]
if len(normalized_names) != len(set(normalized_names)):
raise ValueError("knowledge set names must be unique")
if self.max_observation_chars < self.max_result_content_chars:
raise ValueError("max_observation_chars must be greater than or equal to max_result_content_chars")
return self
@ -191,10 +280,15 @@ class DifyKnowledgeBaseLayerConfig(LayerConfig):
__all__ = [
"DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
"DifyKnowledgeBaseLayerConfig",
"DifyKnowledgeDatasetConfig",
"DifyKnowledgeEagerResult",
"DifyKnowledgeMetadataCondition",
"DifyKnowledgeMetadataConditions",
"DifyKnowledgeMetadataFilteringConfig",
"DifyKnowledgeModelConfig",
"DifyKnowledgeQueryConfig",
"DifyKnowledgeRerankingModelConfig",
"DifyKnowledgeRetrievalConfig",
"DifyKnowledgeRuntimeState",
"DifyKnowledgeSetConfig",
]

View File

@ -1,17 +1,18 @@
"""Dify knowledge-base layer exposing one model-visible search tool.
"""Dify knowledge-base layer exposing set-aware retrieval.
The layer depends on ``DifyExecutionContextLayer`` for tenant/app/user/invoke
identity, keeps retrieval controls in config only, and borrows a lifespan-owned
HTTP client for each tool invocation. It never owns live clients or stores
retrieved source content in layer state. Tool identity is intentionally fixed at
runtime: callers cannot rename the knowledge tool or override its description
through public layer config because the model-visible surface must stay stable
across API-side Agent Soul mappings.
identity. Generated-query sets become one stable model-visible
``knowledge_base_search(set_name, query)`` tool, while user-query sets are
retrieved eagerly during context entry and exposed as additional user prompt
content. Eager observations are persisted only as JSON-safe runtime state so
Agenton session snapshots can resume without repeating unchanged retrievals.
"""
from __future__ import annotations
from dataclasses import dataclass
import hashlib
import json
import logging
from typing import ClassVar, cast
@ -27,7 +28,13 @@ from dify_agent.layers.knowledge.client import (
DifyKnowledgeBaseClientError,
DifyKnowledgeRetrieveResponse,
)
from dify_agent.layers.knowledge.configs import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
from dify_agent.layers.knowledge.configs import (
DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
DifyKnowledgeBaseLayerConfig,
DifyKnowledgeEagerResult,
DifyKnowledgeRuntimeState,
DifyKnowledgeSetConfig,
)
logger = logging.getLogger(__name__)
@ -35,23 +42,14 @@ logger = logging.getLogger(__name__)
# public DTO cannot grow a parallel naming contract that diverges from the
# runtime knowledge-search surface.
_KNOWLEDGE_BASE_TOOL_NAME = "knowledge_base_search"
_KNOWLEDGE_BASE_TOOL_DESCRIPTION = "Search configured knowledge bases for information relevant to the query."
_KNOWLEDGE_BASE_TOOL_DESCRIPTION = (
"Search a configured knowledge set. Pick one configured set_name and provide a focused search query."
)
BLANK_QUERY_OBSERVATION = "knowledge base search requires a non-empty query"
NO_RESULTS_OBSERVATION = "No relevant knowledge base results were found."
TEMPORARY_UNAVAILABLE_OBSERVATION = (
"Knowledge base search is temporarily unavailable. Please continue without it if possible."
)
QUERY_TOOL_SCHEMA = {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query for the configured knowledge bases.",
}
},
"required": ["query"],
"additionalProperties": False,
}
class DifyKnowledgeBaseDeps(LayerDeps):
@ -61,8 +59,10 @@ class DifyKnowledgeBaseDeps(LayerDeps):
@dataclass(slots=True)
class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig]):
"""Layer that resolves one config-scoped knowledge search tool."""
class DifyKnowledgeBaseLayer(
PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig, DifyKnowledgeRuntimeState]
):
"""Layer that resolves set-scoped knowledge tools and eager user prompts."""
type_id: ClassVar[str | None] = DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
@ -95,7 +95,7 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
)
async def get_tools(self, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
"""Build one Pydantic AI tool that exposes only ``query`` to the model.
"""Build the unified generated-query Pydantic AI tool, when needed.
Knowledge tools depend on execution-context identity that is optional for
other run types but mandatory here: ``tenant_id``, ``user_id``,
@ -103,11 +103,15 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
any HTTP request is attempted. Tool execution then follows a strict
observation policy:
- unknown ``set_name`` returns a local validation observation;
- blank ``query`` returns a local validation observation;
- retryable client failures (timeouts, connection failures, HTTP
``429``/``502``) become a temporary-unavailable observation;
- non-retryable client failures are raised so the run fails fast.
"""
generated_sets = self._generated_query_sets()
if not generated_sets:
return []
if http_client.is_closed:
raise RuntimeError("DifyKnowledgeBaseLayer.get_tools() requires an open shared HTTP client.")
@ -118,54 +122,28 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
api_key=self.inner_api_key,
http_client=http_client,
)
set_by_name = {knowledge_set.name: knowledge_set for knowledge_set in generated_sets}
async def knowledge_base_search(_ctx: RunContext[object], query: str) -> str:
async def knowledge_base_search(_ctx: RunContext[object], set_name: str, query: str) -> str:
knowledge_set = set_by_name.get(set_name)
if knowledge_set is None:
return f"unknown knowledge set: {set_name}"
normalized_query = query.strip()
if not normalized_query:
return BLANK_QUERY_OBSERVATION
try:
response = await client.retrieve(
tenant_id=caller["tenant_id"],
user_id=caller["user_id"],
app_id=caller["app_id"],
user_from=caller["user_from"],
invoke_from=caller["invoke_from"],
dataset_ids=list(self.config.dataset_ids),
query=normalized_query,
retrieval=self.config.retrieval,
metadata_filtering=self.config.metadata_filtering,
)
except DifyKnowledgeBaseClientError as exc:
if exc.retryable:
logger.warning(
"knowledge base search temporarily unavailable",
extra={
"tenant_id": caller["tenant_id"],
"app_id": caller["app_id"],
"invoke_from": caller["invoke_from"],
"error_code": exc.error_code,
"status_code": exc.status_code,
},
)
return TEMPORARY_UNAVAILABLE_OBSERVATION
logger.error(
"knowledge base search failed",
extra={
"tenant_id": caller["tenant_id"],
"app_id": caller["app_id"],
"invoke_from": caller["invoke_from"],
"error_code": exc.error_code,
"status_code": exc.status_code,
},
)
raise
return _format_observation(response, self.config)
return await self._retrieve_for_set(
client=client,
caller=caller,
knowledge_set=knowledge_set,
query=normalized_query,
retryable_observation=True,
)
async def prepare_tool_definition(_ctx: RunContext[object], tool_def: ToolDefinition) -> ToolDefinition:
return ToolDefinition(
name=tool_def.name,
description=tool_def.description,
parameters_json_schema=QUERY_TOOL_SCHEMA,
parameters_json_schema=_tool_schema(generated_sets),
strict=tool_def.strict,
sequential=tool_def.sequential,
metadata=tool_def.metadata,
@ -181,11 +159,177 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
knowledge_base_search,
takes_ctx=True,
name=_KNOWLEDGE_BASE_TOOL_NAME,
description=_KNOWLEDGE_BASE_TOOL_DESCRIPTION,
description=_tool_description(generated_sets),
prepare=prepare_tool_definition,
)
]
@property
@override
def user_prompts(self) -> list[str]:
"""Expose eager user-query results as an additional user prompt."""
if not self.runtime_state.eager_results:
return []
sections: list[str] = []
for result in self.runtime_state.eager_results:
sections.append(
"\n".join(
[
f"Set: {result.set_name}",
f"Query: {result.query}",
"Results:",
result.observation,
]
)
)
return ["Knowledge retrieval results:\n\n" + "\n\n".join(sections)]
@override
async def on_context_create(self) -> None:
await self._refresh_eager_results_if_needed()
@override
async def on_context_resume(self) -> None:
await self._refresh_eager_results_if_needed()
def _generated_query_sets(self) -> list[DifyKnowledgeSetConfig]:
return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "generated_query"]
def _user_query_sets(self) -> list[DifyKnowledgeSetConfig]:
return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "user_query"]
async def _refresh_eager_results_if_needed(self) -> None:
user_query_sets = self._user_query_sets()
if not user_query_sets:
self.runtime_state.eager_config_fingerprint = None
self.runtime_state.eager_results = []
return
fingerprint = _eager_config_fingerprint(user_query_sets)
if self.runtime_state.eager_config_fingerprint == fingerprint:
return
caller = _build_caller_context(self.deps.execution_context.config)
async with httpx.AsyncClient() as http_client:
client = DifyKnowledgeBaseClient(
base_url=self.inner_api_url,
api_key=self.inner_api_key,
http_client=http_client,
)
eager_results: list[DifyKnowledgeEagerResult] = []
for knowledge_set in user_query_sets:
query = (knowledge_set.query.value or "").strip()
try:
response = await client.retrieve(
tenant_id=caller["tenant_id"],
user_id=caller["user_id"],
app_id=caller["app_id"],
user_from=caller["user_from"],
invoke_from=caller["invoke_from"],
dataset_ids=knowledge_set.dataset_ids,
query=query,
retrieval=knowledge_set.retrieval,
metadata_filtering=knowledge_set.metadata_filtering,
)
except DifyKnowledgeBaseClientError as exc:
if exc.retryable:
logger.warning(
"eager knowledge retrieval temporarily unavailable",
extra={
"tenant_id": caller["tenant_id"],
"app_id": caller["app_id"],
"invoke_from": caller["invoke_from"],
"knowledge_set_id": knowledge_set.id,
"error_code": exc.error_code,
"status_code": exc.status_code,
},
)
eager_results.append(
DifyKnowledgeEagerResult(
set_id=knowledge_set.id,
set_name=knowledge_set.name,
query=query,
observation=TEMPORARY_UNAVAILABLE_OBSERVATION,
status="temporarily_unavailable",
)
)
continue
logger.error(
"eager knowledge retrieval failed",
extra={
"tenant_id": caller["tenant_id"],
"app_id": caller["app_id"],
"invoke_from": caller["invoke_from"],
"knowledge_set_id": knowledge_set.id,
"error_code": exc.error_code,
"status_code": exc.status_code,
},
)
raise
eager_results.append(
DifyKnowledgeEagerResult(
set_id=knowledge_set.id,
set_name=knowledge_set.name,
query=query,
observation=_format_observation(response, self.config, include_heading=False),
status="success" if response.results else "empty",
)
)
self.runtime_state.eager_results = eager_results
self.runtime_state.eager_config_fingerprint = fingerprint
async def _retrieve_for_set(
self,
*,
client: DifyKnowledgeBaseClient,
caller: dict[str, str],
knowledge_set: DifyKnowledgeSetConfig,
query: str,
retryable_observation: bool,
) -> str:
try:
response = await client.retrieve(
tenant_id=caller["tenant_id"],
user_id=caller["user_id"],
app_id=caller["app_id"],
user_from=caller["user_from"],
invoke_from=caller["invoke_from"],
dataset_ids=knowledge_set.dataset_ids,
query=query,
retrieval=knowledge_set.retrieval,
metadata_filtering=knowledge_set.metadata_filtering,
)
except DifyKnowledgeBaseClientError as exc:
if exc.retryable and retryable_observation:
logger.warning(
"knowledge base search temporarily unavailable",
extra={
"tenant_id": caller["tenant_id"],
"app_id": caller["app_id"],
"invoke_from": caller["invoke_from"],
"knowledge_set_id": knowledge_set.id,
"error_code": exc.error_code,
"status_code": exc.status_code,
},
)
return TEMPORARY_UNAVAILABLE_OBSERVATION
logger.error(
"knowledge base search failed",
extra={
"tenant_id": caller["tenant_id"],
"app_id": caller["app_id"],
"invoke_from": caller["invoke_from"],
"knowledge_set_id": knowledge_set.id,
"error_code": exc.error_code,
"status_code": exc.status_code,
},
)
raise
return _format_observation(response, self.config)
def _build_caller_context(execution_context: object) -> dict[str, str]:
"""Extract the inner-API caller identity from execution-context config.
@ -232,7 +376,56 @@ def _build_caller_context(execution_context: object) -> dict[str, str]:
}
def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKnowledgeBaseLayerConfig) -> str:
def _tool_schema(generated_sets: list[DifyKnowledgeSetConfig]) -> dict[str, object]:
return {
"type": "object",
"properties": {
"set_name": {
"type": "string",
"enum": [knowledge_set.name for knowledge_set in generated_sets],
"description": "Knowledge set to search.",
},
"query": {
"type": "string",
"description": "Search query for the selected knowledge set.",
},
},
"required": ["set_name", "query"],
"additionalProperties": False,
}
def _tool_description(generated_sets: list[DifyKnowledgeSetConfig]) -> str:
set_descriptions = []
for knowledge_set in generated_sets:
if knowledge_set.description:
set_descriptions.append(f"{knowledge_set.name}: {knowledge_set.description}")
else:
set_descriptions.append(knowledge_set.name)
return f"{_KNOWLEDGE_BASE_TOOL_DESCRIPTION} Configured sets: {', '.join(set_descriptions)}."
def _eager_config_fingerprint(user_query_sets: list[DifyKnowledgeSetConfig]) -> str:
payload = [
{
"id": knowledge_set.id,
"query": knowledge_set.query.model_dump(mode="json"),
"dataset_ids": knowledge_set.dataset_ids,
"retrieval": knowledge_set.retrieval.model_dump(mode="json"),
"metadata_filtering": knowledge_set.metadata_filtering.model_dump(mode="json", by_alias=True),
}
for knowledge_set in user_query_sets
]
serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
def _format_observation(
response: DifyKnowledgeRetrieveResponse,
config: DifyKnowledgeBaseLayerConfig,
*,
include_heading: bool = True,
) -> str:
"""Render inner-API retrieval results into the model-visible tool response.
The formatting contract is intentionally simple and stable for the model:
@ -248,7 +441,7 @@ def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKno
if not response.results:
return NO_RESULTS_OBSERVATION
lines = ["Knowledge base search results:"]
lines = ["Knowledge base search results:"] if include_heading else []
for index, result in enumerate(response.results, start=1):
metadata = result.metadata
title = result.title or metadata.document_name or "Untitled"
@ -280,6 +473,5 @@ __all__ = [
"DifyKnowledgeBaseDeps",
"DifyKnowledgeBaseLayer",
"NO_RESULTS_OBSERVATION",
"QUERY_TOOL_SCHEMA",
"TEMPORARY_UNAVAILABLE_OBSERVATION",
]

View File

@ -6,46 +6,142 @@ from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig
def _valid_config() -> dict[str, object]:
return {
"dataset_ids": ["dataset-1"],
"retrieval": {
"mode": "multiple",
"top_k": 4,
},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {
"mode": "multiple",
"top_k": 4,
},
}
],
}
def test_knowledge_base_config_accepts_valid_multiple_mode() -> None:
config = DifyKnowledgeBaseLayerConfig.model_validate(_valid_config())
assert config.dataset_ids == ["dataset-1"]
assert config.retrieval.top_k == 4
assert config.metadata_filtering.mode == "disabled"
assert config.sets[0].dataset_ids == ["dataset-1"]
assert config.sets[0].retrieval.top_k == 4
assert config.sets[0].metadata_filtering.mode == "disabled"
@pytest.mark.parametrize(
"payload, expected_message",
[
({"dataset_ids": [], "retrieval": {"mode": "multiple", "top_k": 4}}, "dataset_ids"),
({"sets": []}, "sets"),
({"tool_name": "knowledge_base_search", **_valid_config()}, "Extra inputs are not permitted"),
({"tool_description": "Search knowledge", **_valid_config()}, "Extra inputs are not permitted"),
({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "multiple"}}, "top_k"),
({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "single"}}, "retrieval.model"),
(
{
"dataset_ids": ["dataset-1"],
"retrieval": {"mode": "multiple", "top_k": 4},
"metadata_filtering": {"mode": "automatic"},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": ""}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
},
"dataset id",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "user_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
]
},
"query.value",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple"},
}
]
},
"top_k",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "single"},
}
]
},
"retrieval.model",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
"metadata_filtering": {"mode": "automatic"},
}
],
},
"metadata_filtering.model_config",
),
(
{
"dataset_ids": ["dataset-1"],
"retrieval": {"mode": "multiple", "top_k": 4},
"metadata_filtering": {"mode": "manual"},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
"metadata_filtering": {"mode": "manual"},
}
],
},
"metadata_filtering.conditions",
),
(
{
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
{
"id": "docs",
"name": "support kb",
"datasets": [{"id": "dataset-2"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
},
]
},
"names must be unique",
),
],
)
def test_knowledge_base_config_rejects_invalid_inputs(payload: dict[str, object], expected_message: str) -> None:
@ -57,8 +153,7 @@ def test_knowledge_base_config_rejects_observation_limit_smaller_than_result_lim
with pytest.raises(ValidationError, match="max_observation_chars"):
_ = DifyKnowledgeBaseLayerConfig.model_validate(
{
"dataset_ids": ["dataset-1"],
"retrieval": {"mode": "multiple", "top_k": 4},
**_valid_config(),
"max_result_content_chars": 50,
"max_observation_chars": 20,
}

View File

@ -8,7 +8,11 @@ from pydantic_ai import Tool
from agenton.compositor import Compositor, LayerNode, LayerProvider
from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig
from dify_agent.layers.execution_context.layer import DifyExecutionContextLayer
from dify_agent.layers.knowledge.client import DifyKnowledgeBaseClientError
from dify_agent.layers.knowledge.client import (
DifyKnowledgeBaseClient,
DifyKnowledgeBaseClientError,
DifyKnowledgeRetrieveResponse,
)
from dify_agent.layers.knowledge.configs import DifyKnowledgeBaseLayerConfig
from dify_agent.layers.knowledge.layer import (
BLANK_QUERY_OBSERVATION,
@ -32,10 +36,23 @@ def _execution_context_config(**overrides: object) -> DifyExecutionContextLayerC
def _knowledge_config(**overrides: object) -> DifyKnowledgeBaseLayerConfig:
payload: dict[str, object] = {
"dataset_ids": ["dataset-1"],
set_payload: dict[str, object] = {
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
for key in ("id", "name", "description", "datasets", "query", "retrieval", "metadata_filtering"):
if key in overrides:
set_payload[key] = overrides.pop(key)
if "dataset_ids" in overrides:
dataset_ids = overrides.pop("dataset_ids")
assert isinstance(dataset_ids, list)
set_payload["datasets"] = [{"id": dataset_id} for dataset_id in dataset_ids]
payload: dict[str, object] = {
"sets": [set_payload],
}
payload.update(overrides)
return DifyKnowledgeBaseLayerConfig.model_validate(payload)
@ -62,7 +79,7 @@ def _knowledge_provider() -> LayerProvider[DifyKnowledgeBaseLayer]:
)
def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
def test_knowledge_layer_exposes_one_set_scoped_tool_definition() -> None:
async def scenario() -> None:
compositor = Compositor(
[
@ -82,20 +99,23 @@ def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
tool_def = await tool.prepare_tool_def(None) # pyright: ignore[reportArgumentType]
assert isinstance(tool, Tool)
assert tool.name == "knowledge_base_search"
assert tool.description == "Search configured knowledge bases for information relevant to the query."
assert "Pick one configured set_name" in tool.description
assert tool_def is not None
assert (
tool_def.description == "Search configured knowledge bases for information relevant to the query."
)
assert "Pick one configured set_name" in tool_def.description
assert tool_def.parameters_json_schema == {
"type": "object",
"properties": {
"set_name": {
"type": "string",
"enum": ["Support KB"],
"description": "Knowledge set to search.",
},
"query": {
"type": "string",
"description": "Search query for the configured knowledge bases.",
}
"description": "Search query for the selected knowledge set.",
},
},
"required": ["query"],
"required": ["set_name", "query"],
"additionalProperties": False,
}
@ -119,12 +139,105 @@ def test_knowledge_layer_rejects_blank_query_locally() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
result = await tool.function_schema.call({"query": " "}, None) # pyright: ignore[reportArgumentType]
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": " "}, None
)
assert result == BLANK_QUERY_OBSERVATION
asyncio.run(scenario())
def test_knowledge_layer_exposes_no_tool_when_all_sets_are_user_query(monkeypatch: pytest.MonkeyPatch) -> None:
async def fake_retrieve(self: DifyKnowledgeBaseClient, **_kwargs: object) -> DifyKnowledgeRetrieveResponse:
del self
return DifyKnowledgeRetrieveResponse.model_validate({"results": [], "usage": {}})
monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
async def scenario() -> None:
compositor = Compositor(
[
LayerNode("execution_context", _execution_context_provider()),
LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
]
)
async with httpx.AsyncClient() as http_client:
async with compositor.enter(
configs={
"execution_context": _execution_context_config(),
"knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
}
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
assert await knowledge_layer.get_tools(http_client=http_client) == []
asyncio.run(scenario())
def test_knowledge_layer_fetches_user_query_sets_on_context_entry(monkeypatch: pytest.MonkeyPatch) -> None:
seen_requests: list[dict[str, object]] = []
async def fake_retrieve(self: DifyKnowledgeBaseClient, **kwargs: object) -> DifyKnowledgeRetrieveResponse:
del self
seen_requests.append(kwargs)
return DifyKnowledgeRetrieveResponse.model_validate(
{
"results": [
{
"metadata": {
"_source": "knowledge",
"dataset_name": "Docs",
"document_name": "Release.md",
"score": 0.8,
},
"title": "Release",
"files": [],
"content": "Version notes",
"summary": None,
}
],
"usage": {},
}
)
monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
async def scenario() -> None:
compositor = Compositor(
[
LayerNode("execution_context", _execution_context_provider()),
LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
]
)
async with compositor.enter(
configs={
"execution_context": _execution_context_config(),
"knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
}
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
assert len(seen_requests) == 1
assert seen_requests[0]["query"] == "release notes"
assert seen_requests[0]["dataset_ids"] == ["dataset-1"]
assert knowledge_layer.runtime_state.eager_config_fingerprint
assert knowledge_layer.runtime_state.eager_results[0].status == "success"
assert knowledge_layer.user_prompts == [
"Knowledge retrieval results:\n\n"
"Set: Support KB\n"
"Query: release notes\n"
"Results:\n"
"1. Title: Release\n"
" Dataset: Docs\n"
" Document: Release.md\n"
" Score: 0.8\n"
" Content: Version notes"
]
await knowledge_layer.on_context_resume()
assert len(seen_requests) == 1
asyncio.run(scenario())
@pytest.mark.parametrize(
("field_name", "field_value"),
[
@ -199,7 +312,9 @@ def test_knowledge_layer_formats_results_and_truncates_observation() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert result.startswith("Knowledge base search results:\n1. Title: Guide")
assert "Dataset: Docs" in result
assert "Document: Guide.md" in result
@ -229,7 +344,9 @@ def test_knowledge_layer_returns_no_results_observation() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert result == NO_RESULTS_OBSERVATION
asyncio.run(scenario())
@ -256,7 +373,9 @@ def test_knowledge_layer_converts_retryable_failures_into_observation() -> None:
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert result == TEMPORARY_UNAVAILABLE_OBSERVATION
asyncio.run(scenario())
@ -289,7 +408,9 @@ def test_knowledge_layer_converts_retryable_transport_failures_into_observation(
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert result == TEMPORARY_UNAVAILABLE_OBSERVATION
asyncio.run(scenario())
@ -317,7 +438,9 @@ def test_knowledge_layer_raises_non_retryable_client_errors() -> None:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert exc_info.value.status_code == 403
asyncio.run(scenario())
@ -343,7 +466,9 @@ def test_knowledge_layer_raises_for_malformed_success_responses() -> None:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert exc_info.value.error_code == "invalid_response"
assert exc_info.value.retryable is False
@ -411,7 +536,9 @@ def test_knowledge_layer_sends_execution_context_and_static_config_to_inner_api(
) as run:
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
{"set_name": "Support KB", "query": "reset"}, None
)
assert result == NO_RESULTS_OBSERVATION
asyncio.run(scenario())

View File

@ -995,7 +995,7 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
return TestModel(custom_output_text="done") # pyright: ignore[reportReturnType]
async def fake_get_tools(self: DifyKnowledgeBaseLayer, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
assert self.config.dataset_ids == ["dataset-1"]
assert self.config.sets[0].dataset_ids == ["dataset-1"]
assert http_client.headers.get("X-Test-Client") == "dify-api"
return [Tool(knowledge_tool, name="knowledge_base_search")]
@ -1055,8 +1055,15 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
deps={"execution_context": "execution_context"},
config=DifyKnowledgeBaseLayerConfig.model_validate(
{
"dataset_ids": ["dataset-1"],
"retrieval": {"mode": "multiple", "top_k": 4},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 4},
}
],
}
),
),

View File

@ -231,8 +231,15 @@ def test_create_app_creates_scheduler_and_closes_after_shutdown(monkeypatch: pyt
knowledge_layer = knowledge_provider.create_layer(
DifyKnowledgeBaseLayerConfig.model_validate(
{
"dataset_ids": ["dataset-1"],
"retrieval": {"mode": "multiple", "top_k": 2},
"sets": [
{
"id": "support",
"name": "Support KB",
"datasets": [{"id": "dataset-1"}],
"query": {"mode": "generated_query"},
"retrieval": {"mode": "multiple", "top_k": 2},
}
],
}
)
)

View File

@ -115,7 +115,7 @@ def test_protocol_and_dify_plugin_exports_do_not_import_server_only_modules() ->
"assert dify_agent_layers_execution_context.__all__ == ['DIFY_EXECUTION_CONTEXT_LAYER_TYPE_ID', 'DifyExecutionContextAgentMode', 'DifyExecutionContextInvokeFrom', 'DifyExecutionContextLayerConfig', 'DifyExecutionContextUserFrom']",
"assert dify_agent_layers_ask_human.__all__ == ['AskHumanAction', 'AskHumanActionStyle', 'AskHumanField', 'AskHumanFieldType', 'AskHumanFileField', 'AskHumanFileListField', 'AskHumanParagraphField', 'AskHumanResultStatus', 'AskHumanSelectField', 'AskHumanSelectOption', 'AskHumanSelectedAction', 'AskHumanToolArgs', 'AskHumanToolResult', 'AskHumanUrgency', 'DEFAULT_ASK_HUMAN_TOOL_DESCRIPTION', 'DIFY_ASK_HUMAN_LAYER_TYPE_ID', 'DifyAskHumanLayerConfig']",
"assert dify_agent_layers_dify_plugin.__all__ == ['DIFY_PLUGIN_LLM_LAYER_TYPE_ID', 'DIFY_PLUGIN_TOOLS_LAYER_TYPE_ID', 'DifyPluginCredentialValue', 'DifyPluginLLMLayerConfig', 'DifyPluginToolCredentialType', 'DifyPluginToolConfig', 'DifyPluginToolOption', 'DifyPluginToolParameter', 'DifyPluginToolParameterForm', 'DifyPluginToolParameterType', 'DifyPluginToolsLayerConfig', 'DifyPluginToolValue']",
"assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig']",
"assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeDatasetConfig', 'DifyKnowledgeEagerResult', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeQueryConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig', 'DifyKnowledgeRuntimeState', 'DifyKnowledgeSetConfig']",
"assert dify_agent_layers_output.__all__ == ['DIFY_OUTPUT_LAYER_TYPE_ID', 'DifyOutputLayerConfig']",
"assert dify_agent_layers_shell.__all__ == ['DIFY_SHELL_LAYER_TYPE_ID', 'DifyShellCliToolConfig', 'DifyShellEnvVarConfig', 'DifyShellLayerConfig', 'DifyShellSandboxConfig', 'DifyShellSecretRefConfig']",
],

View File

@ -563,7 +563,7 @@ export type AgentComposerSoulCandidatesResponse = {
cli_tools?: Array<AgentCliToolConfig>
dify_tools?: Array<AgentComposerDifyToolCandidateResponse>
human_contacts?: Array<AgentHumanContactConfig>
knowledge_datasets?: Array<AgentKnowledgeDatasetConfig>
knowledge_sets?: Array<AgentComposerKnowledgeSetCandidateResponse>
}
export type ComposerCandidateCapabilities = {
@ -926,9 +926,7 @@ export type AgentSoulHumanConfig = {
}
export type AgentSoulKnowledgeConfig = {
datasets?: Array<AgentKnowledgeDatasetConfig>
query_config?: AgentKnowledgeQueryConfig
query_mode?: AgentKnowledgeQueryMode | null
sets?: Array<AgentKnowledgeSetConfig>
}
export type AgentSoulMemoryConfig = {
@ -1069,11 +1067,12 @@ export type AgentComposerDifyToolCandidateResponse = {
tools_count?: number | null
}
export type AgentKnowledgeDatasetConfig = {
export type AgentComposerKnowledgeSetCandidateResponse = {
datasets?: Array<AgentComposerKnowledgeDatasetCandidateResponse>
description?: string | null
id?: string | null
name?: string | null
[key: string]: unknown
id: string
missing_dataset_ids?: Array<string>
name: string
}
export type AgentModerationProviderConfig = {
@ -1228,16 +1227,16 @@ export type AgentHumanToolConfig = {
[key: string]: unknown
}
export type AgentKnowledgeQueryConfig = {
query?: string | null
score_threshold?: number | null
score_threshold_enabled?: boolean | null
top_k?: number | null
[key: string]: unknown
export type AgentKnowledgeSetConfig = {
datasets: Array<AgentKnowledgeDatasetConfig>
description?: string | null
id: string
metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
name: string
query: AgentKnowledgeQueryConfig
retrieval: AgentKnowledgeRetrievalConfig
}
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
export type AgentMemoryArtifactConfig = {
id?: string | null
name?: string | null
@ -1376,6 +1375,13 @@ export type AgentPermissionConfig = {
export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'
export type AgentComposerKnowledgeDatasetCandidateResponse = {
description?: string | null
id?: string | null
missing?: boolean
name?: string | null
}
export type AgentModerationIoConfig = {
enabled?: boolean
preset_response?: string | null
@ -1404,6 +1410,34 @@ export type FormInputConfig
export type JsonValue2 = unknown
export type AgentKnowledgeDatasetConfig = {
description?: string | null
id?: string | null
name?: string | null
}
export type AgentKnowledgeMetadataFilteringConfig = {
conditions?: AgentKnowledgeMetadataConditions | null
mode?: 'automatic' | 'disabled' | 'manual'
model_config?: AgentKnowledgeModelConfig | null
}
export type AgentKnowledgeQueryConfig = {
mode: AgentKnowledgeQueryMode
value?: string | null
}
export type AgentKnowledgeRetrievalConfig = {
mode: 'multiple' | 'single'
model?: AgentKnowledgeModelConfig | null
reranking_enable?: boolean
reranking_mode?: string
reranking_model?: AgentKnowledgeRerankingModelConfig | null
score_threshold?: number | null
top_k?: number | null
weights?: AgentKnowledgeWeightedScoreConfig | null
}
export type AgentModelResponseFormatConfig = {
type?: string | null
[key: string]: unknown
@ -1454,6 +1488,38 @@ export type FileListInputConfig = {
type?: 'file-list'
}
export type AgentKnowledgeMetadataConditions = {
conditions?: Array<AgentKnowledgeMetadataCondition>
logical_operator?: 'and' | 'or'
}
export type AgentKnowledgeModelConfig = {
completion_params?: {
[key: string]: unknown
}
mode: string
name: string
provider: string
}
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
export type AgentKnowledgeRerankingModelConfig = {
model: string
provider: string
}
export type AgentKnowledgeWeightedScoreConfig = {
keyword_setting?: {
[key: string]: unknown
} | null
vector_setting?: {
[key: string]: unknown
} | null
weight_type?: string | null
[key: string]: unknown
}
export type StringSource = {
selector?: Array<string>
type: ValueSourceType
@ -1470,6 +1536,30 @@ export type FileType = 'audio' | 'custom' | 'document' | 'image' | 'video'
export type FileTransferMethod = 'datasource_file' | 'local_file' | 'remote_url' | 'tool_file'
export type AgentKnowledgeMetadataCondition = {
comparison_operator:
| '<'
| '='
| '>'
| 'after'
| 'before'
| 'contains'
| 'empty'
| 'end with'
| 'in'
| 'is'
| 'is not'
| 'not contains'
| 'not empty'
| 'not in'
| 'start with'
| '≠'
| '≤'
| '≥'
name: string
value?: string | Array<string> | number | null
}
export type ValueSourceType = 'constant' | 'variable'
export type AgentAppPaginationWritable = {

View File

@ -1022,15 +1022,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
tools_count: z.int().nullish(),
})
/**
* AgentKnowledgeDatasetConfig
*/
export const zAgentKnowledgeDatasetConfig = z.object({
description: z.string().nullish(),
id: z.string().max(255).nullish(),
name: z.string().max(255).nullish(),
})
/**
* SimpleAccount
*/
@ -1279,30 +1270,6 @@ export const zAgentSoulHumanConfig = z.object({
tools: z.array(zAgentHumanToolConfig).optional(),
})
/**
* AgentKnowledgeQueryConfig
*/
export const zAgentKnowledgeQueryConfig = z.object({
query: z.string().nullish(),
score_threshold: z.number().gte(0).lte(1).nullish(),
score_threshold_enabled: z.boolean().nullish(),
top_k: z.int().gte(1).nullish(),
})
/**
* AgentKnowledgeQueryMode
*/
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
/**
* AgentSoulKnowledgeConfig
*/
export const zAgentSoulKnowledgeConfig = z.object({
datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
query_config: zAgentKnowledgeQueryConfig.optional(),
query_mode: zAgentKnowledgeQueryMode.nullish(),
})
/**
* AgentMemoryArtifactConfig
*/
@ -1521,6 +1488,27 @@ export const zAgentCliToolConfig = z.object({
tool_name: z.string().max(255).nullish(),
})
/**
* AgentComposerKnowledgeDatasetCandidateResponse
*/
export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
description: z.string().nullish(),
id: z.string().max(255).nullish(),
missing: z.boolean().optional().default(false),
name: z.string().max(255).nullish(),
})
/**
* AgentComposerKnowledgeSetCandidateResponse
*/
export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
description: z.string().nullish(),
id: z.string(),
missing_dataset_ids: z.array(z.string()).optional(),
name: z.string(),
})
/**
* AgentComposerSoulCandidatesResponse
*/
@ -1528,7 +1516,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
cli_tools: z.array(zAgentCliToolConfig).optional(),
dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
human_contacts: z.array(zAgentHumanContactConfig).optional(),
knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
})
/**
@ -1583,6 +1571,15 @@ export const zHumanInputFormSubmissionData = z.object({
submitted_data: z.record(z.string(), zJsonValue2).nullish(),
})
/**
* AgentKnowledgeDatasetConfig
*/
export const zAgentKnowledgeDatasetConfig = z.object({
description: z.string().nullish(),
id: z.string().max(255).nullish(),
name: z.string().max(255).nullish(),
})
/**
* AgentModelResponseFormatConfig
*/
@ -1733,53 +1730,6 @@ export const zAgentSoulToolsConfig = z.object({
dify_tools: z.array(zAgentSoulDifyToolConfig).optional(),
})
/**
* AgentSoulConfig
*/
export const zAgentSoulConfig = z.object({
app_features: zAgentSoulAppFeaturesConfig.optional(),
app_variables: z.array(zAppVariableConfig).optional(),
env: zAgentSoulEnvConfig.optional(),
human: zAgentSoulHumanConfig.optional(),
knowledge: zAgentSoulKnowledgeConfig.optional(),
memory: zAgentSoulMemoryConfig.optional(),
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
model: zAgentSoulModelConfig.nullish(),
prompt: zAgentSoulPromptConfig.optional(),
sandbox: zAgentSoulSandboxConfig.optional(),
schema_version: z.int().optional().default(1),
tools: zAgentSoulToolsConfig.optional(),
})
/**
* AgentAppComposerResponse
*/
export const zAgentAppComposerResponse = z.object({
active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
agent: zAgentComposerAgentResponse,
agent_soul: zAgentSoulConfig,
save_options: z.array(zComposerSaveStrategy),
validation: zComposerValidationFindingsResponse.nullish(),
variant: z.literal('agent_app'),
})
/**
* AgentConfigSnapshotDetailResponse
*/
export const zAgentConfigSnapshotDetailResponse = z.object({
agent_id: z.string().nullish(),
config_snapshot: zAgentSoulConfig,
created_at: z.int().nullish(),
created_by: z.string().nullish(),
display_version: z.int().nullish(),
id: z.string(),
revisions: z.array(zAgentConfigRevisionResponse).optional(),
snapshot_version: z.int().nullish(),
summary: z.string().nullish(),
version: z.int(),
version_note: z.string().nullish(),
})
/**
* OutputErrorStrategy
*
@ -1869,22 +1819,6 @@ export const zWorkflowNodeJobConfig = z.object({
workflow_prompt: z.string().optional().default(''),
})
/**
* ComposerSavePayload
*/
export const zComposerSavePayload = z.object({
agent_soul: zAgentSoulConfig.nullish(),
binding: zComposerBindingPayload.nullish(),
client_revision_id: z.string().nullish(),
idempotency_key: z.string().nullish(),
new_agent_name: z.string().min(1).max(255).nullish(),
node_job: zWorkflowNodeJobConfig.nullish(),
save_strategy: zComposerSaveStrategy,
soul_lock: zComposerSoulLockPayload.optional(),
variant: zComposerVariant,
version_note: z.string().nullish(),
})
/**
* ButtonStyle
*
@ -1903,6 +1837,60 @@ export const zUserActionConfig = z.object({
title: z.string().max(100),
})
/**
* AgentKnowledgeModelConfig
*/
export const zAgentKnowledgeModelConfig = z.object({
completion_params: z.record(z.string(), z.unknown()).optional(),
mode: z.string().min(1).max(64),
name: z.string().min(1).max(255),
provider: z.string().min(1).max(255),
})
/**
* AgentKnowledgeQueryMode
*/
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
/**
* AgentKnowledgeQueryConfig
*/
export const zAgentKnowledgeQueryConfig = z.object({
mode: zAgentKnowledgeQueryMode,
value: z.string().nullish(),
})
/**
* AgentKnowledgeRerankingModelConfig
*/
export const zAgentKnowledgeRerankingModelConfig = z.object({
model: z.string().min(1).max(255),
provider: z.string().min(1).max(255),
})
/**
* AgentKnowledgeWeightedScoreConfig
*/
export const zAgentKnowledgeWeightedScoreConfig = z.object({
keyword_setting: z.record(z.string(), z.unknown()).nullish(),
vector_setting: z.record(z.string(), z.unknown()).nullish(),
weight_type: z.string().max(64).nullish(),
})
/**
* AgentKnowledgeRetrievalConfig
*/
export const zAgentKnowledgeRetrievalConfig = z.object({
mode: z.enum(['multiple', 'single']),
model: zAgentKnowledgeModelConfig.nullish(),
reranking_enable: z.boolean().optional().default(true),
reranking_mode: z.string().optional().default('reranking_model'),
reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
score_threshold: z.number().gte(0).lte(1).nullish(),
top_k: z.int().gte(1).nullish(),
weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
})
/**
* FileType
*/
@ -1941,6 +1929,134 @@ export const zFileListInputConfig = z.object({
type: z.literal('file-list').optional().default('file-list'),
})
/**
* AgentKnowledgeMetadataCondition
*/
export const zAgentKnowledgeMetadataCondition = z.object({
comparison_operator: z.enum([
'<',
'=',
'>',
'after',
'before',
'contains',
'empty',
'end with',
'in',
'is',
'is not',
'not contains',
'not empty',
'not in',
'start with',
'≠',
'≤',
'≥',
]),
name: z.string().min(1).max(255),
value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
})
/**
* AgentKnowledgeMetadataConditions
*/
export const zAgentKnowledgeMetadataConditions = z.object({
conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
logical_operator: z.enum(['and', 'or']).optional().default('and'),
})
/**
* AgentKnowledgeMetadataFilteringConfig
*/
export const zAgentKnowledgeMetadataFilteringConfig = z.object({
conditions: zAgentKnowledgeMetadataConditions.nullish(),
mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
model_config: zAgentKnowledgeModelConfig.nullish(),
})
/**
* AgentKnowledgeSetConfig
*/
export const zAgentKnowledgeSetConfig = z.object({
datasets: z.array(zAgentKnowledgeDatasetConfig),
description: z.string().nullish(),
id: z.string().min(1).max(255),
metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
name: z.string().min(1).max(255),
query: zAgentKnowledgeQueryConfig,
retrieval: zAgentKnowledgeRetrievalConfig,
})
/**
* AgentSoulKnowledgeConfig
*/
export const zAgentSoulKnowledgeConfig = z.object({
sets: z.array(zAgentKnowledgeSetConfig).optional(),
})
/**
* AgentSoulConfig
*/
export const zAgentSoulConfig = z.object({
app_features: zAgentSoulAppFeaturesConfig.optional(),
app_variables: z.array(zAppVariableConfig).optional(),
env: zAgentSoulEnvConfig.optional(),
human: zAgentSoulHumanConfig.optional(),
knowledge: zAgentSoulKnowledgeConfig.optional(),
memory: zAgentSoulMemoryConfig.optional(),
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
model: zAgentSoulModelConfig.nullish(),
prompt: zAgentSoulPromptConfig.optional(),
sandbox: zAgentSoulSandboxConfig.optional(),
schema_version: z.int().optional().default(1),
tools: zAgentSoulToolsConfig.optional(),
})
/**
* AgentAppComposerResponse
*/
export const zAgentAppComposerResponse = z.object({
active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
agent: zAgentComposerAgentResponse,
agent_soul: zAgentSoulConfig,
save_options: z.array(zComposerSaveStrategy),
validation: zComposerValidationFindingsResponse.nullish(),
variant: z.literal('agent_app'),
})
/**
* ComposerSavePayload
*/
export const zComposerSavePayload = z.object({
agent_soul: zAgentSoulConfig.nullish(),
binding: zComposerBindingPayload.nullish(),
client_revision_id: z.string().nullish(),
idempotency_key: z.string().nullish(),
new_agent_name: z.string().min(1).max(255).nullish(),
node_job: zWorkflowNodeJobConfig.nullish(),
save_strategy: zComposerSaveStrategy,
soul_lock: zComposerSoulLockPayload.optional(),
variant: zComposerVariant,
version_note: z.string().nullish(),
})
/**
* AgentConfigSnapshotDetailResponse
*/
export const zAgentConfigSnapshotDetailResponse = z.object({
agent_id: z.string().nullish(),
config_snapshot: zAgentSoulConfig,
created_at: z.int().nullish(),
created_by: z.string().nullish(),
display_version: z.int().nullish(),
id: z.string(),
revisions: z.array(zAgentConfigRevisionResponse).optional(),
snapshot_version: z.int().nullish(),
summary: z.string().nullish(),
version: z.int(),
version_note: z.string().nullish(),
})
/**
* ValueSourceType
*

View File

@ -1890,7 +1890,7 @@ export type AgentComposerSoulCandidatesResponse = {
cli_tools?: Array<AgentCliToolConfig>
dify_tools?: Array<AgentComposerDifyToolCandidateResponse>
human_contacts?: Array<AgentHumanContactConfig>
knowledge_datasets?: Array<AgentKnowledgeDatasetConfig>
knowledge_sets?: Array<AgentComposerKnowledgeSetCandidateResponse>
}
export type ComposerCandidateCapabilities = {
@ -2124,9 +2124,7 @@ export type AgentSoulHumanConfig = {
}
export type AgentSoulKnowledgeConfig = {
datasets?: Array<AgentKnowledgeDatasetConfig>
query_config?: AgentKnowledgeQueryConfig
query_mode?: AgentKnowledgeQueryMode | null
sets?: Array<AgentKnowledgeSetConfig>
}
export type AgentSoulMemoryConfig = {
@ -2278,11 +2276,12 @@ export type AgentComposerDifyToolCandidateResponse = {
tools_count?: number | null
}
export type AgentKnowledgeDatasetConfig = {
export type AgentComposerKnowledgeSetCandidateResponse = {
datasets?: Array<AgentComposerKnowledgeDatasetCandidateResponse>
description?: string | null
id?: string | null
name?: string | null
[key: string]: unknown
id: string
missing_dataset_ids?: Array<string>
name: string
}
export type CheckResultView = {
@ -2393,16 +2392,16 @@ export type AgentHumanToolConfig = {
[key: string]: unknown
}
export type AgentKnowledgeQueryConfig = {
query?: string | null
score_threshold?: number | null
score_threshold_enabled?: boolean | null
top_k?: number | null
[key: string]: unknown
export type AgentKnowledgeSetConfig = {
datasets: Array<AgentKnowledgeDatasetConfig>
description?: string | null
id: string
metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
name: string
query: AgentKnowledgeQueryConfig
retrieval: AgentKnowledgeRetrievalConfig
}
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
export type AgentMemoryArtifactConfig = {
id?: string | null
name?: string | null
@ -2506,6 +2505,13 @@ export type AgentPermissionConfig = {
export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'
export type AgentComposerKnowledgeDatasetCandidateResponse = {
description?: string | null
id?: string | null
missing?: boolean
name?: string | null
}
export type ButtonStyle = 'accent' | 'default' | 'ghost' | 'primary'
export type ParagraphInputConfig = {
@ -2545,6 +2551,34 @@ export type AgentModerationProviderConfig = {
[key: string]: unknown
}
export type AgentKnowledgeDatasetConfig = {
description?: string | null
id?: string | null
name?: string | null
}
export type AgentKnowledgeMetadataFilteringConfig = {
conditions?: AgentKnowledgeMetadataConditions | null
mode?: 'automatic' | 'disabled' | 'manual'
model_config?: AgentKnowledgeModelConfig | null
}
export type AgentKnowledgeQueryConfig = {
mode: AgentKnowledgeQueryMode
value?: string | null
}
export type AgentKnowledgeRetrievalConfig = {
mode: 'multiple' | 'single'
model?: AgentKnowledgeModelConfig | null
reranking_enable?: boolean
reranking_mode?: string
reranking_model?: AgentKnowledgeRerankingModelConfig | null
score_threshold?: number | null
top_k?: number | null
weights?: AgentKnowledgeWeightedScoreConfig | null
}
export type AgentModelResponseFormatConfig = {
type?: string | null
[key: string]: unknown
@ -2578,8 +2612,64 @@ export type AgentModerationIoConfig = {
[key: string]: unknown
}
export type AgentKnowledgeMetadataConditions = {
conditions?: Array<AgentKnowledgeMetadataCondition>
logical_operator?: 'and' | 'or'
}
export type AgentKnowledgeModelConfig = {
completion_params?: {
[key: string]: unknown
}
mode: string
name: string
provider: string
}
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
export type AgentKnowledgeRerankingModelConfig = {
model: string
provider: string
}
export type AgentKnowledgeWeightedScoreConfig = {
keyword_setting?: {
[key: string]: unknown
} | null
vector_setting?: {
[key: string]: unknown
} | null
weight_type?: string | null
[key: string]: unknown
}
export type ValueSourceType = 'constant' | 'variable'
export type AgentKnowledgeMetadataCondition = {
comparison_operator:
| '<'
| '='
| '>'
| 'after'
| 'before'
| 'contains'
| 'empty'
| 'end with'
| 'in'
| 'is'
| 'is not'
| 'not contains'
| 'not empty'
| 'not in'
| 'start with'
| '≠'
| '≤'
| '≥'
name: string
value?: string | Array<string> | number | null
}
export type AppPaginationWritable = {
data: Array<AppPartialWritable>
has_more: boolean

View File

@ -2629,15 +2629,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
tools_count: z.int().nullish(),
})
/**
* AgentKnowledgeDatasetConfig
*/
export const zAgentKnowledgeDatasetConfig = z.object({
description: z.string().nullish(),
id: z.string().max(255).nullish(),
name: z.string().max(255).nullish(),
})
/**
* CheckResultView
*
@ -2767,30 +2758,6 @@ export const zAgentSoulHumanConfig = z.object({
tools: z.array(zAgentHumanToolConfig).optional(),
})
/**
* AgentKnowledgeQueryConfig
*/
export const zAgentKnowledgeQueryConfig = z.object({
query: z.string().nullish(),
score_threshold: z.number().gte(0).lte(1).nullish(),
score_threshold_enabled: z.boolean().nullish(),
top_k: z.int().gte(1).nullish(),
})
/**
* AgentKnowledgeQueryMode
*/
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
/**
* AgentSoulKnowledgeConfig
*/
export const zAgentSoulKnowledgeConfig = z.object({
datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
query_config: zAgentKnowledgeQueryConfig.optional(),
query_mode: zAgentKnowledgeQueryMode.nullish(),
})
/**
* AgentMemoryArtifactConfig
*/
@ -3002,6 +2969,27 @@ export const zAgentCliToolConfig = z.object({
tool_name: z.string().max(255).nullish(),
})
/**
* AgentComposerKnowledgeDatasetCandidateResponse
*/
export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
description: z.string().nullish(),
id: z.string().max(255).nullish(),
missing: z.boolean().optional().default(false),
name: z.string().max(255).nullish(),
})
/**
* AgentComposerKnowledgeSetCandidateResponse
*/
export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
description: z.string().nullish(),
id: z.string(),
missing_dataset_ids: z.array(z.string()).optional(),
name: z.string(),
})
/**
* AgentComposerSoulCandidatesResponse
*/
@ -3009,7 +2997,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
cli_tools: z.array(zAgentCliToolConfig).optional(),
dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
human_contacts: z.array(zAgentHumanContactConfig).optional(),
knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
})
/**
@ -3041,6 +3029,15 @@ export const zUserActionConfig = z.object({
title: z.string().max(100),
})
/**
* AgentKnowledgeDatasetConfig
*/
export const zAgentKnowledgeDatasetConfig = z.object({
description: z.string().nullish(),
id: z.string().max(255).nullish(),
name: z.string().max(255).nullish(),
})
/**
* AgentModelResponseFormatConfig
*/
@ -3292,57 +3289,57 @@ export const zAgentSoulAppFeaturesConfig = z.object({
})
/**
* AgentSoulConfig
* AgentKnowledgeModelConfig
*/
export const zAgentSoulConfig = z.object({
app_features: zAgentSoulAppFeaturesConfig.optional(),
app_variables: z.array(zAppVariableConfig).optional(),
env: zAgentSoulEnvConfig.optional(),
human: zAgentSoulHumanConfig.optional(),
knowledge: zAgentSoulKnowledgeConfig.optional(),
memory: zAgentSoulMemoryConfig.optional(),
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
model: zAgentSoulModelConfig.nullish(),
prompt: zAgentSoulPromptConfig.optional(),
sandbox: zAgentSoulSandboxConfig.optional(),
schema_version: z.int().optional().default(1),
tools: zAgentSoulToolsConfig.optional(),
export const zAgentKnowledgeModelConfig = z.object({
completion_params: z.record(z.string(), z.unknown()).optional(),
mode: z.string().min(1).max(64),
name: z.string().min(1).max(255),
provider: z.string().min(1).max(255),
})
/**
* WorkflowAgentComposerResponse
* AgentKnowledgeQueryMode
*/
export const zWorkflowAgentComposerResponse = z.object({
active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
agent: zAgentComposerAgentResponse.nullish(),
agent_soul: zAgentSoulConfig,
app_id: z.string().nullish(),
binding: zAgentComposerBindingResponse.nullish(),
effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
impact_summary: zAgentComposerImpactResponse.nullish(),
node_id: z.string().nullish(),
node_job: zWorkflowNodeJobConfig,
save_options: z.array(zComposerSaveStrategy),
soul_lock: zAgentComposerSoulLockResponse,
validation: zComposerValidationFindingsResponse.nullish(),
variant: z.literal('workflow'),
workflow_id: z.string().nullish(),
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
/**
* AgentKnowledgeQueryConfig
*/
export const zAgentKnowledgeQueryConfig = z.object({
mode: zAgentKnowledgeQueryMode,
value: z.string().nullish(),
})
/**
* ComposerSavePayload
* AgentKnowledgeRerankingModelConfig
*/
export const zComposerSavePayload = z.object({
agent_soul: zAgentSoulConfig.nullish(),
binding: zComposerBindingPayload.nullish(),
client_revision_id: z.string().nullish(),
idempotency_key: z.string().nullish(),
new_agent_name: z.string().min(1).max(255).nullish(),
node_job: zWorkflowNodeJobConfig.nullish(),
save_strategy: zComposerSaveStrategy,
soul_lock: zComposerSoulLockPayload.optional(),
variant: zComposerVariant,
version_note: z.string().nullish(),
export const zAgentKnowledgeRerankingModelConfig = z.object({
model: z.string().min(1).max(255),
provider: z.string().min(1).max(255),
})
/**
* AgentKnowledgeWeightedScoreConfig
*/
export const zAgentKnowledgeWeightedScoreConfig = z.object({
keyword_setting: z.record(z.string(), z.unknown()).nullish(),
vector_setting: z.record(z.string(), z.unknown()).nullish(),
weight_type: z.string().max(64).nullish(),
})
/**
* AgentKnowledgeRetrievalConfig
*/
export const zAgentKnowledgeRetrievalConfig = z.object({
mode: z.enum(['multiple', 'single']),
model: zAgentKnowledgeModelConfig.nullish(),
reranking_enable: z.boolean().optional().default(true),
reranking_mode: z.string().optional().default('reranking_model'),
reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
score_threshold: z.number().gte(0).lte(1).nullish(),
top_k: z.int().gte(1).nullish(),
weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
})
/**
@ -3466,6 +3463,125 @@ export const zMessageInfiniteScrollPaginationResponse = z.object({
limit: z.int(),
})
/**
* AgentKnowledgeMetadataCondition
*/
export const zAgentKnowledgeMetadataCondition = z.object({
comparison_operator: z.enum([
'<',
'=',
'>',
'after',
'before',
'contains',
'empty',
'end with',
'in',
'is',
'is not',
'not contains',
'not empty',
'not in',
'start with',
'≠',
'≤',
'≥',
]),
name: z.string().min(1).max(255),
value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
})
/**
* AgentKnowledgeMetadataConditions
*/
export const zAgentKnowledgeMetadataConditions = z.object({
conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
logical_operator: z.enum(['and', 'or']).optional().default('and'),
})
/**
* AgentKnowledgeMetadataFilteringConfig
*/
export const zAgentKnowledgeMetadataFilteringConfig = z.object({
conditions: zAgentKnowledgeMetadataConditions.nullish(),
mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
model_config: zAgentKnowledgeModelConfig.nullish(),
})
/**
* AgentKnowledgeSetConfig
*/
export const zAgentKnowledgeSetConfig = z.object({
datasets: z.array(zAgentKnowledgeDatasetConfig),
description: z.string().nullish(),
id: z.string().min(1).max(255),
metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
name: z.string().min(1).max(255),
query: zAgentKnowledgeQueryConfig,
retrieval: zAgentKnowledgeRetrievalConfig,
})
/**
* AgentSoulKnowledgeConfig
*/
export const zAgentSoulKnowledgeConfig = z.object({
sets: z.array(zAgentKnowledgeSetConfig).optional(),
})
/**
* AgentSoulConfig
*/
export const zAgentSoulConfig = z.object({
app_features: zAgentSoulAppFeaturesConfig.optional(),
app_variables: z.array(zAppVariableConfig).optional(),
env: zAgentSoulEnvConfig.optional(),
human: zAgentSoulHumanConfig.optional(),
knowledge: zAgentSoulKnowledgeConfig.optional(),
memory: zAgentSoulMemoryConfig.optional(),
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
model: zAgentSoulModelConfig.nullish(),
prompt: zAgentSoulPromptConfig.optional(),
sandbox: zAgentSoulSandboxConfig.optional(),
schema_version: z.int().optional().default(1),
tools: zAgentSoulToolsConfig.optional(),
})
/**
* WorkflowAgentComposerResponse
*/
export const zWorkflowAgentComposerResponse = z.object({
active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
agent: zAgentComposerAgentResponse.nullish(),
agent_soul: zAgentSoulConfig,
app_id: z.string().nullish(),
binding: zAgentComposerBindingResponse.nullish(),
effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
impact_summary: zAgentComposerImpactResponse.nullish(),
node_id: z.string().nullish(),
node_job: zWorkflowNodeJobConfig,
save_options: z.array(zComposerSaveStrategy),
soul_lock: zAgentComposerSoulLockResponse,
validation: zComposerValidationFindingsResponse.nullish(),
variant: z.literal('workflow'),
workflow_id: z.string().nullish(),
})
/**
* ComposerSavePayload
*/
export const zComposerSavePayload = z.object({
agent_soul: zAgentSoulConfig.nullish(),
binding: zComposerBindingPayload.nullish(),
client_revision_id: z.string().nullish(),
idempotency_key: z.string().nullish(),
new_agent_name: z.string().min(1).max(255).nullish(),
node_job: zWorkflowNodeJobConfig.nullish(),
save_strategy: zComposerSaveStrategy,
soul_lock: zComposerSoulLockPayload.optional(),
variant: zComposerVariant,
version_note: z.string().nullish(),
})
/**
* GeneratedAppResponse
*/