diff --git a/api/clients/agent_backend/request_builder.py b/api/clients/agent_backend/request_builder.py index 6eadd4ce3d8..29cc28a3179 100644 --- a/api/clients/agent_backend/request_builder.py +++ b/api/clients/agent_backend/request_builder.py @@ -312,7 +312,7 @@ class AgentBackendRunRequestBuilder: ) ) - if run_input.knowledge is not None and run_input.knowledge.dataset_ids: + if run_input.knowledge is not None and run_input.knowledge.sets: layers.append( RunLayerSpec( name=DIFY_KNOWLEDGE_BASE_LAYER_ID, @@ -513,7 +513,7 @@ class AgentBackendRunRequestBuilder: ) ) - if run_input.knowledge is not None and run_input.knowledge.dataset_ids: + if run_input.knowledge is not None and run_input.knowledge.sets: layers.append( RunLayerSpec( name=DIFY_KNOWLEDGE_BASE_LAYER_ID, diff --git a/api/controllers/console/agent/composer.py b/api/controllers/console/agent/composer.py index 2cd01e427f7..32c134b9fe3 100644 --- a/api/controllers/console/agent/composer.py +++ b/api/controllers/console/agent/composer.py @@ -105,6 +105,7 @@ class WorkflowAgentComposerValidateApi(Resource): def post(self, tenant_id: str, app_model: App, node_id: str): payload = ComposerSavePayload.model_validate(console_ns.payload or {}) ComposerConfigValidator.validate_save_payload(payload) + AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul) findings = AgentComposerService.collect_validation_findings( tenant_id=tenant_id, payload=payload, @@ -239,6 +240,7 @@ class AgentComposerValidateApi(Resource): _resolve_agent_app_id(tenant_id=tenant_id, agent_id=agent_id) payload = ComposerSavePayload.model_validate(console_ns.payload or {}) ComposerConfigValidator.validate_save_payload(payload) + AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul) findings = AgentComposerService.collect_validation_findings( tenant_id=tenant_id, payload=payload, diff --git a/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py b/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py index fa7b28cbb0a..8fd2783f61f 100644 --- a/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py +++ b/api/core/workflow/nodes/agent_v2/runtime_feature_manifest.py @@ -3,6 +3,7 @@ from __future__ import annotations from typing import Any from models.agent_config_entities import AgentSoulConfig +from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids SUPPORTED_AGENT_BACKEND_FEATURES = frozenset( { @@ -48,9 +49,7 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any ) reserved_status = dict.fromkeys(sorted(RESERVED_AGENT_BACKEND_FEATURES), "reserved_not_executed") - reserved_status["knowledge"] = ( - "supported_by_knowledge_layer" if list_configured_knowledge_dataset_ids(agent_soul) else "not_configured" - ) + reserved_status["knowledge"] = "supported_by_knowledge_layer" if agent_soul.knowledge.sets else "not_configured" reserved_status["tools.dify_tools"] = "supported_when_config_valid" reserved_status["tools.cli_tools"] = "supported_by_shell_bootstrap" reserved_status["env"] = "supported_by_shell_bootstrap" @@ -66,14 +65,14 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any def list_configured_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]: - """Return the normalized knowledge dataset ids that can produce a runtime layer. + """Return normalized dataset ids selected by Agent v2 knowledge sets. ``build_runtime_feature_manifest()`` and ``build_knowledge_layer_config()`` - must stay aligned: both decide knowledge support from this effective, - non-blank dataset-id set rather than from raw - ``agent_soul.knowledge.datasets`` entries. + stay aligned on the set-based contract: DTO validation rejects blank dataset + ids before runtime, so this helper only flattens configured set datasets for + metadata/diagnostic surfaces that still need a dataset-id summary. """ - return [dataset_id for dataset in agent_soul.knowledge.datasets if (dataset_id := (dataset.id or "").strip())] + return list_agent_soul_knowledge_dataset_ids(agent_soul) def _get_nested(value: dict[str, Any], path: str) -> Any: diff --git a/api/core/workflow/nodes/agent_v2/runtime_request_builder.py b/api/core/workflow/nodes/agent_v2/runtime_request_builder.py index e3c2dcee839..366076a7c68 100644 --- a/api/core/workflow/nodes/agent_v2/runtime_request_builder.py +++ b/api/core/workflow/nodes/agent_v2/runtime_request_builder.py @@ -15,7 +15,16 @@ from dify_agent.layers.execution_context import ( DifyExecutionContextLayerConfig, DifyExecutionContextUserFrom, ) -from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig, DifyKnowledgeRetrievalConfig +from dify_agent.layers.knowledge import ( + DifyKnowledgeBaseLayerConfig, + DifyKnowledgeDatasetConfig, + DifyKnowledgeMetadataFilteringConfig, + DifyKnowledgeModelConfig, + DifyKnowledgeQueryConfig, + DifyKnowledgeRerankingModelConfig, + DifyKnowledgeRetrievalConfig, + DifyKnowledgeSetConfig, +) from dify_agent.layers.shell import ( DifyShellCliToolConfig, DifyShellEnvVarConfig, @@ -40,7 +49,9 @@ from graphon.file import FileTransferMethod from graphon.variables.segments import Segment from models.agent import Agent, AgentConfigSnapshot, WorkflowAgentNodeBinding from models.agent_config_entities import ( - AgentKnowledgeQueryConfig, + AgentKnowledgeMetadataFilteringConfig, + AgentKnowledgeModelConfig, + AgentKnowledgeRetrievalConfig, AgentSoulConfig, DeclaredArrayItem, DeclaredOutputChildConfig, @@ -547,42 +558,84 @@ def build_shell_layer_config(agent_soul: AgentSoulConfig) -> DifyShellLayerConfi def build_knowledge_layer_config(agent_soul: AgentSoulConfig) -> DifyKnowledgeBaseLayerConfig | None: - """Map Agent Soul knowledge config into the fixed Dify knowledge-base layer. + """Map Agent Soul knowledge sets into one Dify knowledge-base layer. - Normalization intentionally matches the current dify-agent runtime contract: - - - blank or missing dataset ids are ignored; - - if no valid dataset ids remain, no knowledge layer is injected; - - retrieval mode is always forced to ``multiple`` in this first wiring pass; - - ``top_k`` falls back to a stable runtime default when the soul omits it; - - ``score_threshold`` is only forwarded when the product config explicitly - enables it, otherwise the layer keeps the disabled/default ``0.0`` value; - - metadata filtering stays at the layer DTO default (disabled). + Agent Soul DTO validation owns malformed set rejection. Runtime mapping is + intentionally lossless: every configured set is forwarded with its query + policy, dataset refs, retrieval controls, and metadata-filtering controls. + ``score_threshold=None`` means disabled threshold filtering and maps to the + inner retrieval request's ``0.0`` default through the Agent backend DTO. """ - dataset_ids = list_configured_knowledge_dataset_ids(agent_soul) - if not dataset_ids: + if not agent_soul.knowledge.sets: return None - query_config = agent_soul.knowledge.query_config return DifyKnowledgeBaseLayerConfig( - dataset_ids=dataset_ids, - retrieval=DifyKnowledgeRetrievalConfig( - mode="multiple", - top_k=_knowledge_top_k(query_config), - score_threshold=_knowledge_score_threshold(query_config), - ), + sets=[ + DifyKnowledgeSetConfig( + id=knowledge_set.id, + name=knowledge_set.name, + description=knowledge_set.description, + datasets=[ + DifyKnowledgeDatasetConfig( + id=dataset.id or "", + name=dataset.name, + description=dataset.description, + ) + for dataset in knowledge_set.datasets + ], + query=DifyKnowledgeQueryConfig( + mode=cast(Literal["user_query", "generated_query"], knowledge_set.query.mode.value), + value=knowledge_set.query.value, + ), + retrieval=_knowledge_retrieval_config(knowledge_set.retrieval), + metadata_filtering=_knowledge_metadata_filtering_config(knowledge_set.metadata_filtering), + ) + for knowledge_set in agent_soul.knowledge.sets + ], ) -def _knowledge_top_k(query_config: AgentKnowledgeQueryConfig) -> int: - top_k = query_config.top_k - return top_k if isinstance(top_k, int) and top_k >= 1 else 4 +def _knowledge_retrieval_config(retrieval: AgentKnowledgeRetrievalConfig) -> DifyKnowledgeRetrievalConfig: + return DifyKnowledgeRetrievalConfig( + mode=retrieval.mode, + top_k=retrieval.top_k, + score_threshold=retrieval.score_threshold or 0.0, + reranking_mode=retrieval.reranking_mode, + reranking_enable=retrieval.reranking_enable, + reranking_model=DifyKnowledgeRerankingModelConfig( + provider=retrieval.reranking_model.provider, + model=retrieval.reranking_model.model, + ) + if retrieval.reranking_model is not None + else None, + weights=cast(dict[str, Any], retrieval.weights.model_dump(mode="json", exclude_none=True)) + if retrieval.weights is not None + else None, + model=_knowledge_model_config(retrieval.model), + ) -def _knowledge_score_threshold(query_config: AgentKnowledgeQueryConfig) -> float: - if query_config.score_threshold_enabled and query_config.score_threshold is not None: - return query_config.score_threshold - return 0.0 +def _knowledge_metadata_filtering_config( + metadata_filtering: AgentKnowledgeMetadataFilteringConfig, +) -> DifyKnowledgeMetadataFilteringConfig: + return DifyKnowledgeMetadataFilteringConfig( + mode=metadata_filtering.mode, + model_config=_knowledge_model_config(metadata_filtering.metadata_model_config), + conditions=cast(Any, metadata_filtering.conditions.model_dump(mode="json")) + if metadata_filtering.conditions is not None + else None, + ) + + +def _knowledge_model_config(model: AgentKnowledgeModelConfig | None) -> DifyKnowledgeModelConfig | None: + if model is None: + return None + return DifyKnowledgeModelConfig( + provider=model.provider, + name=model.name, + mode=model.mode, + completion_params=model.completion_params, + ) def build_ask_human_layer_config(agent_soul: AgentSoulConfig) -> DifyAskHumanLayerConfig | None: diff --git a/api/core/workflow/nodes/agent_v2/validators.py b/api/core/workflow/nodes/agent_v2/validators.py index 2eabac10dd6..7b915fe02be 100644 --- a/api/core/workflow/nodes/agent_v2/validators.py +++ b/api/core/workflow/nodes/agent_v2/validators.py @@ -18,6 +18,7 @@ from models.agent_config_entities import ( ) from models.model import UploadFile from models.workflow import Workflow +from services.agent.knowledge_datasets import list_missing_tenant_knowledge_dataset_ids from .entities import DifyAgentNodeData @@ -146,6 +147,7 @@ class WorkflowAgentNodeValidator: ) cls._validate_agent_soul_env(binding=binding, agent_soul=agent_soul) cls._validate_agent_soul_tools(binding=binding, agent_soul=agent_soul) + cls._validate_agent_soul_knowledge(binding=binding, agent_soul=agent_soul) node_job = WorkflowNodeJobConfig.model_validate(binding.node_job_config_dict) cls.validate_node_job(session=session, binding=binding, node_job=node_job, topology=topology) @@ -364,6 +366,24 @@ class WorkflowAgentNodeValidator: ) cli_tool_names.add(normalized_name) + @classmethod + def _validate_agent_soul_knowledge( + cls, + *, + binding: WorkflowAgentNodeBinding, + agent_soul: AgentSoulConfig, + ) -> None: + """Validate knowledge set dataset rows against the publishing tenant.""" + missing_ids = list_missing_tenant_knowledge_dataset_ids( + tenant_id=binding.tenant_id, + agent_soul=agent_soul, + ) + if missing_ids: + raise WorkflowAgentNodeValidationError( + f"Workflow Agent node {binding.node_id} references missing or out-of-scope knowledge datasets: " + f"{', '.join(missing_ids)}." + ) + @classmethod def _validate_agent_soul_env( cls, diff --git a/api/fields/agent_fields.py b/api/fields/agent_fields.py index e60a6b01426..d664a2af12a 100644 --- a/api/fields/agent_fields.py +++ b/api/fields/agent_fields.py @@ -400,10 +400,22 @@ class AgentComposerNodeJobCandidatesResponse(ResponseModel): human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list) +class AgentComposerKnowledgeDatasetCandidateResponse(AgentKnowledgeDatasetConfig): + missing: bool = False + + +class AgentComposerKnowledgeSetCandidateResponse(ResponseModel): + id: str + name: str + description: str | None = None + datasets: list[AgentComposerKnowledgeDatasetCandidateResponse] = Field(default_factory=list) + missing_dataset_ids: list[str] = Field(default_factory=list) + + class AgentComposerSoulCandidatesResponse(ResponseModel): dify_tools: list[AgentComposerDifyToolCandidateResponse] = Field(default_factory=list) cli_tools: list[AgentCliToolConfig] = Field(default_factory=list) - knowledge_datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list) + knowledge_sets: list[AgentComposerKnowledgeSetCandidateResponse] = Field(default_factory=list) human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list) diff --git a/api/models/agent_config_entities.py b/api/models/agent_config_entities.py index 2503ba66f06..2f81495e9f9 100644 --- a/api/models/agent_config_entities.py +++ b/api/models/agent_config_entities.py @@ -2,10 +2,11 @@ from __future__ import annotations import re from enum import StrEnum -from typing import Annotated, Any, Final, Literal +from typing import Annotated, Any, Final, Literal, Self from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, field_validator, model_validator +from core.rag.entities.metadata_entities import ConditionValue, SupportedComparisonOperator from core.workflow.file_reference import is_canonical_file_reference from graphon.file import FileTransferMethod @@ -236,17 +237,161 @@ class AgentCliToolConfig(AgentFlexibleConfig): inferred_from: str | None = Field(default=None, max_length=255) -class AgentKnowledgeDatasetConfig(AgentFlexibleConfig): +class AgentKnowledgeDatasetConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + id: str | None = Field(default=None, max_length=255) name: str | None = Field(default=None, max_length=255) description: str | None = None -class AgentKnowledgeQueryConfig(AgentFlexibleConfig): - query: str | None = None +class AgentKnowledgeQueryConfig(BaseModel): + """Per-set query policy for Agent v2 knowledge retrieval. + + Agent v2 stores knowledge as explicit ``knowledge.sets`` rather than the + legacy flat ``datasets`` / ``query_mode`` / ``query_config`` shape. Each + set owns its own query policy, so ``user_query`` must carry an explicit + ``value`` while ``generated_query`` leaves that value empty. + """ + + model_config = ConfigDict(extra="forbid") + + mode: AgentKnowledgeQueryMode + value: str | None = None + + @model_validator(mode="after") + def validate_query(self) -> Self: + if self.mode == AgentKnowledgeQueryMode.USER_QUERY and not (self.value or "").strip(): + raise ValueError("knowledge query.value is required for user_query mode") + return self + + +class AgentKnowledgeModelConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + provider: str = Field(min_length=1, max_length=255) + name: str = Field(min_length=1, max_length=255) + mode: str = Field(min_length=1, max_length=64) + completion_params: dict[str, Any] = Field(default_factory=dict) + + +class AgentKnowledgeRerankingModelConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + provider: str = Field(min_length=1, max_length=255) + model: str = Field(min_length=1, max_length=255) + + +class AgentKnowledgeWeightedScoreConfig(AgentFlexibleConfig): + weight_type: str | None = Field(default=None, max_length=64) + vector_setting: dict[str, Any] | None = None + keyword_setting: dict[str, Any] | None = None + + +class AgentKnowledgeRetrievalConfig(BaseModel): + """Per-set retrieval policy for Agent v2 knowledge retrieval. + + Retrieval settings now live on each knowledge set instead of one shared + flat config. A set may use either ``multiple`` retrieval with ``top_k`` or + ``single`` retrieval with a required model config. + """ + + model_config = ConfigDict(extra="forbid") + + mode: Literal["single", "multiple"] top_k: int | None = Field(default=None, ge=1) score_threshold: float | None = Field(default=None, ge=0, le=1) - score_threshold_enabled: bool | None = None + reranking_mode: str = "reranking_model" + reranking_enable: bool = True + reranking_model: AgentKnowledgeRerankingModelConfig | None = None + weights: AgentKnowledgeWeightedScoreConfig | None = None + model: AgentKnowledgeModelConfig | None = None + + @model_validator(mode="after") + def validate_mode_fields(self) -> Self: + if self.mode == "multiple" and self.top_k is None: + raise ValueError("knowledge retrieval.top_k is required for multiple mode") + if self.mode == "single" and self.model is None: + raise ValueError("knowledge retrieval.model is required for single mode") + return self + + +class AgentKnowledgeMetadataCondition(BaseModel): + model_config = ConfigDict(extra="forbid") + + name: str = Field(min_length=1, max_length=255) + comparison_operator: SupportedComparisonOperator + value: ConditionValue = None + + +class AgentKnowledgeMetadataConditions(BaseModel): + model_config = ConfigDict(extra="forbid") + + logical_operator: Literal["and", "or"] = "and" + conditions: list[AgentKnowledgeMetadataCondition] = Field(default_factory=list) + + +class AgentKnowledgeMetadataFilteringConfig(BaseModel): + """Per-set metadata filtering policy. + + The Python attribute uses ``metadata_model_config`` for clarity because the + model belongs to metadata filtering specifically, while the external API and + generated schema keep the historical ``model_config`` field name via alias. + """ + + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + mode: Literal["disabled", "automatic", "manual"] = "disabled" + # Internal name is explicit; wire format remains ``model_config``. + metadata_model_config: AgentKnowledgeModelConfig | None = Field(default=None, alias="model_config") + conditions: AgentKnowledgeMetadataConditions | None = None + + @model_validator(mode="after") + def validate_mode_fields(self) -> Self: + if self.mode == "automatic" and self.metadata_model_config is None: + raise ValueError("metadata_filtering.model_config is required for automatic mode") + if self.mode == "manual" and (self.conditions is None or not self.conditions.conditions): + raise ValueError("metadata_filtering.conditions is required for manual mode") + return self + + +class AgentKnowledgeSetConfig(BaseModel): + """One explicit knowledge set in Agent v2. + + ``knowledge.sets`` replaces the old flat knowledge config. Each set owns + its datasets plus query, retrieval, and metadata policies. An individual + set must contain at least one dataset id even though the overall knowledge + section may be empty, which is how callers express "no knowledge layer". + """ + + model_config = ConfigDict(extra="forbid") + + id: str = Field(min_length=1, max_length=255) + name: str = Field(min_length=1, max_length=255) + description: str | None = None + datasets: list[AgentKnowledgeDatasetConfig] + query: AgentKnowledgeQueryConfig + retrieval: AgentKnowledgeRetrievalConfig + metadata_filtering: AgentKnowledgeMetadataFilteringConfig = Field( + default_factory=AgentKnowledgeMetadataFilteringConfig + ) + + @field_validator("id", "name") + @classmethod + def validate_non_blank_identity(cls, value: str) -> str: + normalized = value.strip() + if not normalized: + raise ValueError("knowledge set id and name must not be blank") + return normalized + + @model_validator(mode="after") + def validate_datasets(self) -> Self: + dataset_ids = [(dataset.id or "").strip() for dataset in self.datasets] + if not dataset_ids or any(not dataset_id for dataset_id in dataset_ids): + raise ValueError("knowledge set requires at least one dataset id") + if len(dataset_ids) != len(set(dataset_ids)): + raise ValueError("knowledge set dataset ids must be unique") + return self class AgentHumanContactConfig(AgentFlexibleConfig): @@ -453,9 +598,28 @@ class AgentSoulToolsConfig(BaseModel): class AgentSoulKnowledgeConfig(BaseModel): - datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list) - query_mode: AgentKnowledgeQueryMode | None = None - query_config: AgentKnowledgeQueryConfig = Field(default_factory=AgentKnowledgeQueryConfig) + """Top-level Agent v2 knowledge config. + + Agent v2 models knowledge as explicit sets instead of one flat + ``datasets`` / ``query_mode`` / ``query_config`` block. An empty ``sets`` + list means no knowledge layer should be emitted at runtime, while set-name + uniqueness stays case-insensitive because runtime selection addresses sets + by name. + """ + + model_config = ConfigDict(extra="forbid") + + sets: list[AgentKnowledgeSetConfig] = Field(default_factory=list) + + @model_validator(mode="after") + def validate_unique_sets(self) -> Self: + set_ids = [item.id.strip() for item in self.sets] + if len(set_ids) != len(set(set_ids)): + raise ValueError("knowledge set ids must be unique") + set_names = [item.name.strip().lower() for item in self.sets] + if len(set_names) != len(set(set_names)): + raise ValueError("knowledge set names must be unique") + return self class AgentSoulHumanConfig(BaseModel): diff --git a/api/openapi/markdown/console-openapi.md b/api/openapi/markdown/console-openapi.md index c600984c089..f37e05f8c2c 100644 --- a/api/openapi/markdown/console-openapi.md +++ b/api/openapi/markdown/console-openapi.md @@ -12433,6 +12433,25 @@ Risk marker for CLI tool bootstrap commands. | current_snapshot_id | string | | No | | workflow_node_count | integer | | Yes | +#### AgentComposerKnowledgeDatasetCandidateResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| description | string | | No | +| id | string | | No | +| missing | boolean | | No | +| name | string | | No | + +#### AgentComposerKnowledgeSetCandidateResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| datasets | [ [AgentComposerKnowledgeDatasetCandidateResponse](#agentcomposerknowledgedatasetcandidateresponse) ] | | No | +| description | string | | No | +| id | string | | Yes | +| missing_dataset_ids | [ string ] | | No | +| name | string | | Yes | + #### AgentComposerNodeJobCandidatesResponse | Name | Type | Description | Required | @@ -12448,7 +12467,7 @@ Risk marker for CLI tool bootstrap commands. | cli_tools | [ [AgentCliToolConfig](#agentclitoolconfig) ] | | No | | dify_tools | [ [AgentComposerDifyToolCandidateResponse](#agentcomposerdifytoolcandidateresponse) ] | | No | | human_contacts | [ [AgentHumanContactConfig](#agenthumancontactconfig) ] | | No | -| knowledge_datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No | +| knowledge_sets | [ [AgentComposerKnowledgeSetCandidateResponse](#agentcomposerknowledgesetcandidateresponse) ] | | No | #### AgentComposerSoulLockResponse @@ -12842,14 +12861,44 @@ the current roster/workflow APIs scoped to Dify Agent. | id | string | | No | | name | string | | No | +#### AgentKnowledgeMetadataCondition + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| comparison_operator | string,
**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | +| name | string | | Yes | +| value | string
[ string ]
number | | No | + +#### AgentKnowledgeMetadataConditions + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| conditions | [ [AgentKnowledgeMetadataCondition](#agentknowledgemetadatacondition) ] | | No | +| logical_operator | string,
**Available values:** "and", "or",
**Default:** and | *Enum:* `"and"`, `"or"` | No | + +#### AgentKnowledgeMetadataFilteringConfig + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| conditions | [AgentKnowledgeMetadataConditions](#agentknowledgemetadataconditions) | | No | +| mode | string,
**Available values:** "automatic", "disabled", "manual",
**Default:** disabled | *Enum:* `"automatic"`, `"disabled"`, `"manual"` | No | +| model_config | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No | + +#### AgentKnowledgeModelConfig + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| completion_params | object | | No | +| mode | string | | Yes | +| name | string | | Yes | +| provider | string | | Yes | + #### AgentKnowledgeQueryConfig | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| query | string | | No | -| score_threshold | number | | No | -| score_threshold_enabled | boolean | | No | -| top_k | integer | | No | +| mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | Yes | +| value | string | | No | #### AgentKnowledgeQueryMode @@ -12857,6 +12906,46 @@ the current roster/workflow APIs scoped to Dify Agent. | ---- | ---- | ----------- | -------- | | AgentKnowledgeQueryMode | string | | | +#### AgentKnowledgeRerankingModelConfig + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| model | string | | Yes | +| provider | string | | Yes | + +#### AgentKnowledgeRetrievalConfig + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| mode | string,
**Available values:** "multiple", "single" | *Enum:* `"multiple"`, `"single"` | Yes | +| model | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No | +| reranking_enable | boolean,
**Default:** true | | No | +| reranking_mode | string,
**Default:** reranking_model | | No | +| reranking_model | [AgentKnowledgeRerankingModelConfig](#agentknowledgererankingmodelconfig) | | No | +| score_threshold | number | | No | +| top_k | integer | | No | +| weights | [AgentKnowledgeWeightedScoreConfig](#agentknowledgeweightedscoreconfig) | | No | + +#### AgentKnowledgeSetConfig + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | Yes | +| description | string | | No | +| id | string | | Yes | +| metadata_filtering | [AgentKnowledgeMetadataFilteringConfig](#agentknowledgemetadatafilteringconfig) | | No | +| name | string | | Yes | +| query | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | Yes | +| retrieval | [AgentKnowledgeRetrievalConfig](#agentknowledgeretrievalconfig) | | Yes | + +#### AgentKnowledgeWeightedScoreConfig + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| keyword_setting | object | | No | +| vector_setting | object | | No | +| weight_type | string | | No | + #### AgentLogConversationItemResponse | Name | Type | Description | Required | @@ -13258,9 +13347,7 @@ old Agent tool payloads can be read while new payloads stay explicit. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No | -| query_config | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | No | -| query_mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | No | +| sets | [ [AgentKnowledgeSetConfig](#agentknowledgesetconfig) ] | | No | #### AgentSoulMemoryConfig diff --git a/api/services/agent/composer_candidates.py b/api/services/agent/composer_candidates.py index 7868f2a2f63..b897ffc7b9a 100644 --- a/api/services/agent/composer_candidates.py +++ b/api/services/agent/composer_candidates.py @@ -25,6 +25,7 @@ from models.agent_config_entities import ( AgentSoulConfig, DeclaredOutputConfig, ) +from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids MAX_CANDIDATES_PER_LIST = 200 @@ -139,19 +140,34 @@ def soul_candidates( cli_tools = [tool.model_dump(exclude_none=True) for tool in soul.tools.cli_tools if tool.enabled] - dataset_ids = [dataset.id for dataset in soul.knowledge.datasets if dataset.id] + dataset_ids = list_agent_soul_knowledge_dataset_ids(soul) dataset_rows = dataset_lookup(dataset_ids) if dataset_ids else {} - knowledge_datasets: list[dict[str, Any]] = [] - for dataset in soul.knowledge.datasets: - if not dataset.id: - continue - row = dataset_rows.get(dataset.id) - knowledge_datasets.append( + knowledge_sets: list[dict[str, Any]] = [] + for knowledge_set in soul.knowledge.sets: + missing_dataset_ids: list[str] = [] + datasets: list[dict[str, Any]] = [] + for dataset in knowledge_set.datasets: + dataset_id = (dataset.id or "").strip() + if not dataset_id: + continue + row = dataset_rows.get(dataset_id) + if row is None: + missing_dataset_ids.append(dataset_id) + datasets.append( + { + "id": dataset_id, + "name": (getattr(row, "name", None) or dataset.name or dataset_id), + "description": getattr(row, "description", None) or dataset.description, + "missing": row is None, + } + ) + knowledge_sets.append( { - "id": dataset.id, - "name": (getattr(row, "name", None) or dataset.name or dataset.id), - "description": getattr(row, "description", None) or dataset.description, - "missing": row is None, + "id": knowledge_set.id, + "name": knowledge_set.name, + "description": knowledge_set.description, + "datasets": datasets, + "missing_dataset_ids": missing_dataset_ids, } ) @@ -161,7 +177,7 @@ def soul_candidates( lists = { "dify_tools": dify_tools, "cli_tools": cli_tools, - "knowledge_datasets": knowledge_datasets, + "knowledge_sets": knowledge_sets, "human_contacts": human_contacts, } capped: dict[str, list[dict[str, Any]]] = {} @@ -192,7 +208,6 @@ def _ref_entry( "inferred": inferred, } - def _capped(values: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], bool]: if len(values) > MAX_CANDIDATES_PER_LIST: return values[:MAX_CANDIDATES_PER_LIST], True diff --git a/api/services/agent/composer_service.py b/api/services/agent/composer_service.py index 815fdcc4420..ae47cce23c1 100644 --- a/api/services/agent/composer_service.py +++ b/api/services/agent/composer_service.py @@ -33,6 +33,11 @@ from services.agent.errors import ( AgentNameConflictError, AgentNotFoundError, AgentVersionNotFoundError, + InvalidComposerConfigError, +) +from services.agent.knowledge_datasets import ( + get_tenant_knowledge_dataset_rows, + list_missing_tenant_knowledge_dataset_ids, ) from services.entities.agent_entities import ( AgentSoulConfig, @@ -101,6 +106,7 @@ class AgentComposerService: _backfill_cli_tool_ids(payload.agent_soul) ComposerConfigValidator.validate_save_payload(payload) + cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul) workflow = cls._get_draft_workflow(tenant_id=tenant_id, app_id=app_id) binding = cls._get_workflow_binding(tenant_id=tenant_id, workflow_id=workflow.id, node_id=node_id) @@ -195,6 +201,7 @@ class AgentComposerService: raise ValueError("Agent App composer endpoint only accepts agent_app variant") _backfill_cli_tool_ids(payload.agent_soul) ComposerConfigValidator.validate_save_payload(payload) + cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul) if payload.agent_soul is None: raise ValueError("agent_soul is required") @@ -273,19 +280,15 @@ class AgentComposerService: agent_id: str | None = None, ) -> dict[str, Any]: """ENG-617 soft findings, with DB-backed dataset and drive mention checks.""" - from services.agent.prompt_mentions import MentionKind, parse_prompt_mentions - - mentioned_ids: set[str] = set() - if payload.agent_soul is not None: - mentioned_ids |= { - mention.ref_id - for mention in parse_prompt_mentions(payload.agent_soul.prompt.system_prompt) - if mention.kind == MentionKind.KNOWLEDGE - } - existing_dataset_ids: set[str] | None = None - if mentioned_ids: - existing_dataset_ids = set(cls._dataset_rows(tenant_id=tenant_id, dataset_ids=sorted(mentioned_ids))) - findings = ComposerConfigValidator.collect_soft_findings(payload, existing_dataset_ids=existing_dataset_ids) + existing_knowledge_set_ids = ( + {knowledge_set.id for knowledge_set in payload.agent_soul.knowledge.sets} + if payload.agent_soul is not None + else None + ) + findings = ComposerConfigValidator.collect_soft_findings( + payload, + existing_knowledge_set_ids=existing_knowledge_set_ids, + ) if agent_id and payload.agent_soul is not None: findings["warnings"].extend( cls._drive_mention_findings( @@ -296,6 +299,24 @@ class AgentComposerService: ) return findings + @classmethod + def validate_knowledge_datasets(cls, *, tenant_id: str, agent_soul: AgentSoulConfig | None) -> None: + """Hard-validate tenant-scoped knowledge set datasets before saving. + + DTO validators own set shape, duplicate set ids/names, and duplicate + dataset ids within one set. This service-level check owns database + existence and tenant ownership so invalid or cross-tenant datasets fail + before Agent Soul snapshots are persisted. + """ + if agent_soul is None: + return + missing_ids = list_missing_tenant_knowledge_dataset_ids(tenant_id=tenant_id, agent_soul=agent_soul) + if missing_ids: + raise InvalidComposerConfigError( + "knowledge_dataset_not_found: knowledge sets reference missing or out-of-scope datasets: " + + ", ".join(missing_ids) + ) + @classmethod def resolve_bound_agent_id(cls, *, tenant_id: str, app_id: str) -> str | None: """The Agent App's bound roster agent id, if any (validate-endpoint context).""" @@ -410,7 +431,7 @@ class AgentComposerService: soul_lists, soul_truncated = soul_candidates( agent_soul=agent_soul, - dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids), + dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids), workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id), ) truncated = truncated or soul_truncated @@ -437,7 +458,7 @@ class AgentComposerService: agent_soul = cls._load_agent_app_soul(tenant_id=tenant_id, app_id=app_id) soul_lists, truncated = soul_candidates( agent_soul=agent_soul, - dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids), + dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids), workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id), ) response = ComposerCandidatesResponse( @@ -530,30 +551,6 @@ class AgentComposerService: variables = WorkflowDraftVariableService(session=session).list_system_variables(app_id, user_id) return [(variable.name, variable.value_type.value) for variable in variables.variables] - @staticmethod - def _dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]: - """Tenant-scoped dataset lookup tolerating malformed ids. - - Mention ids come from user-editable prompt text; a non-UUID id can never - match a dataset row, so it is simply absent from the result (-> missing/ - placeholder semantics) instead of breaking the UUID-typed query. - """ - from uuid import UUID - - from services.dataset_service import DatasetService - - valid_ids: list[str] = [] - for dataset_id in dataset_ids: - try: - UUID(dataset_id) - except (ValueError, TypeError): - continue - valid_ids.append(dataset_id) - if not valid_ids: - return {} - rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id) - return {str(row.id): row for row in rows} - @staticmethod def _workspace_dify_tools(*, tenant_id: str, user_id: str) -> list[dict[str, Any]]: """Workspace Dify Plugin tools, same source as the tool selector. diff --git a/api/services/agent/composer_validator.py b/api/services/agent/composer_validator.py index a1d5ce07655..4a9d0a5a9a9 100644 --- a/api/services/agent/composer_validator.py +++ b/api/services/agent/composer_validator.py @@ -141,15 +141,15 @@ class ComposerConfigValidator: cls, payload: ComposerSavePayload, *, - existing_dataset_ids: set[str] | None = None, + existing_knowledge_set_ids: set[str] | None = None, ) -> dict[str, Any]: """ENG-617 §5.3/§5.4 soft findings — never block save. ``warnings`` carries ``mention_target_missing`` / ``mention_malformed`` - entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge + entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge-set mentions with a placeholder name (0522 consensus) instead of dropping or - rejecting them. With ``existing_dataset_ids`` provided, configured-but- - deleted datasets surface as placeholders too. + rejecting them. With ``existing_knowledge_set_ids`` provided, mentions + that no longer exist in the current Agent Soul surface as placeholders too. """ warnings: list[dict[str, Any]] = [] placeholders: list[dict[str, str]] = [] @@ -181,7 +181,7 @@ class ComposerConfigValidator: resolved = resolver(mention) if mention.kind == MentionKind.KNOWLEDGE: dangling = resolved is None or ( - existing_dataset_ids is not None and mention.ref_id not in existing_dataset_ids + existing_knowledge_set_ids is not None and mention.ref_id not in existing_knowledge_set_ids ) if dangling: placeholders.append( diff --git a/api/services/agent/knowledge_datasets.py b/api/services/agent/knowledge_datasets.py new file mode 100644 index 00000000000..962c562ce15 --- /dev/null +++ b/api/services/agent/knowledge_datasets.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from typing import Any +from uuid import UUID + +from models.agent_config_entities import AgentSoulConfig + + +def list_agent_soul_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]: + """Return normalized unique knowledge dataset ids in config order. + + Agent v2 knowledge dataset selection is owned by ``knowledge.sets``. This + helper keeps composer, workflow validation, candidates, and runtime + diagnostics aligned on the same normalization rules: strip whitespace, drop + blanks, preserve first-seen order, and deduplicate. + """ + dataset_ids: list[str] = [] + seen: set[str] = set() + for knowledge_set in agent_soul.knowledge.sets: + for dataset in knowledge_set.datasets: + dataset_id = (dataset.id or "").strip() + if not dataset_id or dataset_id in seen: + continue + seen.add(dataset_id) + dataset_ids.append(dataset_id) + return dataset_ids + + +def get_tenant_knowledge_dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]: + """Return tenant-scoped dataset rows for normalized knowledge dataset ids. + + Knowledge ids come from user-editable config. Malformed ids can never match + a dataset row, so they are treated as missing instead of breaking the + UUID-typed dataset lookup. + """ + from services.dataset_service import DatasetService + + valid_ids: list[str] = [] + for dataset_id in dataset_ids: + try: + UUID(dataset_id) + except (TypeError, ValueError): + continue + valid_ids.append(dataset_id) + + if not valid_ids: + return {} + + rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id) + return {str(row.id): row for row in rows} + + +def list_missing_tenant_knowledge_dataset_ids(*, tenant_id: str, agent_soul: AgentSoulConfig | None) -> list[str]: + """Return normalized knowledge dataset ids missing from the tenant scope.""" + if agent_soul is None: + return [] + + dataset_ids = list_agent_soul_knowledge_dataset_ids(agent_soul) + if not dataset_ids: + return [] + + rows = get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=dataset_ids) + return [dataset_id for dataset_id in dataset_ids if dataset_id not in rows] diff --git a/api/services/agent/prompt_mentions.py b/api/services/agent/prompt_mentions.py index 27bed49c53b..cc35979a644 100644 --- a/api/services/agent/prompt_mentions.py +++ b/api/services/agent/prompt_mentions.py @@ -6,7 +6,7 @@ Slash-menu insertions are stored inline in the plain-string prompt as tokens: ``kind`` is a fixed lowercase word; ``id`` points at an item in the Agent runtime context. For prompt-owned entities that means Agent Soul lists such as -``tools`` / ``knowledge.datasets`` / ``human.contacts`` and workflow job lists +``tools`` / ``knowledge.sets`` / ``human.contacts`` and workflow job lists such as ``previous_node_output_refs`` / ``declared_outputs``. For drive-backed ``skill`` / ``file`` mentions the field stores a URL-encoded drive key and is resolved against ``agent_drive_files`` at runtime. ``label`` is an optional @@ -211,9 +211,9 @@ def build_soul_mention_resolver(agent_soul: AgentSoulConfig) -> MentionResolver: if mention.ref_id in (cli_tool.id, cli_tool.name): return cli_tool.name or cli_tool.id case MentionKind.KNOWLEDGE: - for dataset in agent_soul.knowledge.datasets: - if mention.ref_id == dataset.id: - return dataset.name or dataset.id + for knowledge_set in agent_soul.knowledge.sets: + if mention.ref_id == knowledge_set.id: + return knowledge_set.name or knowledge_set.id case MentionKind.HUMAN: return _resolve_human_contact(agent_soul.human.contacts, mention.ref_id) case _: diff --git a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py index c91d0fd3e8a..3bb73289580 100644 --- a/api/tests/unit_tests/clients/agent_backend/test_request_builder.py +++ b/api/tests/unit_tests/clients/agent_backend/test_request_builder.py @@ -162,8 +162,15 @@ def test_request_builder_adds_knowledge_layer_when_configured(): run_input = _run_input() run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate( { - "dataset_ids": ["dataset-1"], - "retrieval": {"mode": "multiple", "top_k": 4}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ], } ) @@ -174,7 +181,7 @@ def test_request_builder_adds_knowledge_layer_when_configured(): assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID} knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config) - assert knowledge_config.dataset_ids == ["dataset-1"] + assert knowledge_config.sets[0].dataset_ids == ["dataset-1"] def test_request_builder_can_delete_on_exit_for_cleanup_paths(): @@ -386,8 +393,15 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured(): run_input = _agent_app_input() run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate( { - "dataset_ids": ["dataset-1", "dataset-2"], - "retrieval": {"mode": "multiple", "top_k": 2}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 2}, + } + ], } ) @@ -398,7 +412,7 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured(): assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID} knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config) - assert knowledge_config.dataset_ids == ["dataset-1", "dataset-2"] + assert knowledge_config.sets[0].dataset_ids == ["dataset-1", "dataset-2"] # ── ENG-635 / ENG-638: ask_human layer injection + deferred_tool_results ───── diff --git a/api/tests/unit_tests/commands/test_generate_swagger_specs.py b/api/tests/unit_tests/commands/test_generate_swagger_specs.py index c30386c9d65..403fb0e94a4 100644 --- a/api/tests/unit_tests/commands/test_generate_swagger_specs.py +++ b/api/tests/unit_tests/commands/test_generate_swagger_specs.py @@ -149,3 +149,55 @@ def test_generate_specs_is_idempotent(tmp_path): assert [path.name for path in first_paths] == [path.name for path in second_paths] for first_path, second_path in zip(first_paths, second_paths): assert first_path.read_text(encoding="utf-8") == second_path.read_text(encoding="utf-8") + + +def test_generate_specs_include_agent_v2_knowledge_set_schema_and_query_enums(tmp_path): + module = _load_generate_swagger_specs_module() + + written_paths = module.generate_specs(tmp_path) + console_path = next(path for path in written_paths if path.name == "console-openapi.json") + payload = json.loads(console_path.read_text(encoding="utf-8")) + schemas = payload["components"]["schemas"] + + assert "AgentKnowledgeSetConfig" in schemas + assert schemas["AgentSoulKnowledgeConfig"]["properties"]["sets"]["items"]["$ref"] == ( + "#/components/schemas/AgentKnowledgeSetConfig" + ) + assert schemas["AgentKnowledgeQueryMode"]["enum"] == ["generated_query", "user_query"] + + +def test_checked_in_agent_v2_knowledge_openapi_and_generated_contracts_are_in_sync(): + api_dir = Path(__file__).resolve().parents[3] + repo_root = api_dir.parent + + markdown = (api_dir / "openapi" / "markdown" / "console-openapi.md").read_text(encoding="utf-8") + agent_types = ( + repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "types.gen.ts" + ).read_text(encoding="utf-8") + apps_types = ( + repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "types.gen.ts" + ).read_text(encoding="utf-8") + agent_zod = ( + repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "zod.gen.ts" + ).read_text(encoding="utf-8") + apps_zod = ( + repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "zod.gen.ts" + ).read_text(encoding="utf-8") + + assert "#### AgentKnowledgeSetConfig" in markdown + assert "#### AgentSoulKnowledgeConfig" in markdown + assert "#### AgentKnowledgeQueryMode" in markdown + + for content in (agent_types, apps_types): + assert "export type AgentKnowledgeSetConfig = {" in content + assert "export type AgentSoulKnowledgeConfig = {" in content + assert "AgentKnowledgeQueryMode" in content + assert "generated_query" in content + assert "user_query" in content + + for content in (agent_zod, apps_zod): + assert "export const zAgentKnowledgeSetConfig = z.object({" in content + assert "export const zAgentSoulKnowledgeConfig = z.object({" in content + assert "zAgentKnowledgeQueryMode = z.enum([" in content + assert "generated_query" in content + assert "user_query" in content diff --git a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py index 0d1483e1b79..43b6710f41d 100644 --- a/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py +++ b/api/tests/unit_tests/core/app/apps/agent_app/test_runtime_request_builder.py @@ -153,12 +153,19 @@ class TestAgentAppRuntimeRequestBuilder: "model": "gpt-4o-mini", }, "knowledge": { - "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}], - "query_config": { - "top_k": 3, - "score_threshold": 0.5, - "score_threshold_enabled": False, - }, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}], + "query": {"mode": "generated_query"}, + "retrieval": { + "mode": "multiple", + "top_k": 3, + "score_threshold": None, + }, + } + ], }, } ) @@ -173,10 +180,12 @@ class TestAgentAppRuntimeRequestBuilder: assert knowledge.type == "dify.knowledge_base" assert knowledge.deps == {"execution_context": "execution_context"} dumped_config = knowledge.config.model_dump(mode="json", by_alias=True) - assert dumped_config["dataset_ids"] == ["dataset-1", "dataset-2"] - assert dumped_config["retrieval"]["mode"] == "multiple" - assert dumped_config["retrieval"]["top_k"] == 3 - assert dumped_config["retrieval"]["score_threshold"] == 0.0 + knowledge_set = dumped_config["sets"][0] + assert [dataset["id"] for dataset in knowledge_set["datasets"]] == ["dataset-1", "dataset-2"] + assert knowledge_set["query"] == {"mode": "generated_query", "value": None} + assert knowledge_set["retrieval"]["mode"] == "multiple" + assert knowledge_set["retrieval"]["top_k"] == 3 + assert knowledge_set["retrieval"]["score_threshold"] == 0.0 def test_build_raises_when_model_missing(self): builder = AgentAppRuntimeRequestBuilder( diff --git a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py index 78e49769159..cf9d1b797e3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py +++ b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_runtime_request_builder.py @@ -512,12 +512,55 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config(): "model": "gpt-test", }, "knowledge": { - "datasets": [{"id": "dataset-1"}, {"id": " "}, {"id": "dataset-2"}], - "query_config": { - "top_k": 6, - "score_threshold": 0.4, - "score_threshold_enabled": True, - }, + "sets": [ + { + "id": "support", + "name": "Support KB", + "description": "Support content", + "datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}], + "query": {"mode": "generated_query"}, + "retrieval": { + "mode": "multiple", + "top_k": 6, + "score_threshold": 0.4, + "reranking_model": {"provider": "cohere", "model": "rerank-v3"}, + "weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}}, + }, + "metadata_filtering": { + "mode": "manual", + "conditions": { + "logical_operator": "and", + "conditions": [ + {"name": "category", "comparison_operator": "contains", "value": "auth"} + ], + }, + }, + }, + { + "id": "release", + "name": "Release Notes", + "datasets": [{"id": "dataset-3"}], + "query": {"mode": "user_query", "value": "release notes"}, + "retrieval": { + "mode": "single", + "model": { + "provider": "openai", + "name": "gpt-4o-mini", + "mode": "chat", + "completion_params": {"temperature": 0.2}, + }, + }, + "metadata_filtering": { + "mode": "automatic", + "model_config": { + "provider": "openai", + "name": "gpt-4o-mini", + "mode": "chat", + "completion_params": {}, + }, + }, + }, + ], }, } ), @@ -531,25 +574,75 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config(): knowledge_layer = layers["knowledge"] assert knowledge_layer["type"] == "dify.knowledge_base" assert knowledge_layer["deps"] == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID} - assert knowledge_layer["config"] == { - "dataset_ids": ["dataset-1", "dataset-2"], - "retrieval": { - "mode": "multiple", - "top_k": 6, - "score_threshold": 0.4, - "reranking_mode": "reranking_model", - "reranking_enable": True, - "reranking_model": None, - "weights": None, - "model": None, + assert knowledge_layer["config"]["sets"] == [ + { + "id": "support", + "name": "Support KB", + "description": "Support content", + "datasets": [ + {"id": "dataset-1", "name": None, "description": None}, + {"id": "dataset-2", "name": None, "description": None}, + ], + "query": {"mode": "generated_query", "value": None}, + "retrieval": { + "mode": "multiple", + "top_k": 6, + "score_threshold": 0.4, + "reranking_mode": "reranking_model", + "reranking_enable": True, + "reranking_model": {"provider": "cohere", "model": "rerank-v3"}, + "weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}}, + "model": None, + }, + "metadata_filtering": { + "mode": "manual", + "metadata_model_config": None, + "conditions": { + "logical_operator": "and", + "conditions": [ + {"name": "category", "comparison_operator": "contains", "value": "auth"} + ], + }, + }, }, - "metadata_filtering": {"mode": "disabled", "metadata_model_config": None, "conditions": None}, - "max_result_content_chars": 2000, - "max_observation_chars": 12000, - } + { + "id": "release", + "name": "Release Notes", + "description": None, + "datasets": [{"id": "dataset-3", "name": None, "description": None}], + "query": {"mode": "user_query", "value": "release notes"}, + "retrieval": { + "mode": "single", + "top_k": None, + "score_threshold": 0.0, + "reranking_mode": "reranking_model", + "reranking_enable": True, + "reranking_model": None, + "weights": None, + "model": { + "provider": "openai", + "name": "gpt-4o-mini", + "mode": "chat", + "completion_params": {"temperature": 0.2}, + }, + }, + "metadata_filtering": { + "mode": "automatic", + "metadata_model_config": { + "provider": "openai", + "name": "gpt-4o-mini", + "mode": "chat", + "completion_params": {}, + }, + "conditions": None, + }, + }, + ] + assert knowledge_layer["config"]["max_result_content_chars"] == 2000 + assert knowledge_layer["config"]["max_observation_chars"] == 12000 -def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits_it(): +def test_build_knowledge_layer_maps_disabled_score_threshold_to_zero(): context = _context() snapshot = AgentConfigSnapshot( id="snapshot-1", @@ -565,8 +658,19 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits "model": "gpt-test", }, "knowledge": { - "datasets": [{"id": "dataset-1"}], - "query_config": {}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": { + "mode": "multiple", + "top_k": 4, + "score_threshold": None, + }, + } + ], }, } ), @@ -577,10 +681,10 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits dumped = result.request.model_dump(mode="json") knowledge_layer = next(layer for layer in dumped["composition"]["layers"] if layer["name"] == "knowledge") - assert knowledge_layer["config"]["retrieval"]["top_k"] == 4 + assert knowledge_layer["config"]["sets"][0]["retrieval"]["score_threshold"] == 0.0 -def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids(): +def test_build_skips_knowledge_layer_when_agent_soul_has_no_sets(): context = _context() snapshot = AgentConfigSnapshot( id="snapshot-1", @@ -595,9 +699,7 @@ def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids(): "model_provider": "openai", "model": "gpt-test", }, - "knowledge": { - "datasets": [{"id": " "}, {}], - }, + "knowledge": {"sets": []}, } ), ) @@ -1094,7 +1196,15 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu soul = AgentSoulConfig.model_validate( { "knowledge": { - "datasets": [{"id": "dataset-1", "name": "Product Docs"}], + "sets": [ + { + "id": "product", + "name": "Product Docs", + "datasets": [{"id": "dataset-1", "name": "Product Docs"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ], } } ) @@ -1106,13 +1216,13 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu assert all("knowledge" not in w["section"] for w in manifest["unsupported_runtime_warnings"]) -def test_feature_manifest_treats_blank_knowledge_dataset_ids_as_not_configured(): +def test_feature_manifest_treats_empty_knowledge_sets_as_not_configured(): from core.workflow.nodes.agent_v2.runtime_feature_manifest import build_runtime_feature_manifest soul = AgentSoulConfig.model_validate( { "knowledge": { - "datasets": [{"id": " "}, {}], + "sets": [], } } ) diff --git a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py index 440bd49e5c0..2254cd16d49 100644 --- a/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py +++ b/api/tests/unit_tests/core/workflow/nodes/agent_v2/test_validators.py @@ -55,6 +55,33 @@ def _snapshot() -> AgentConfigSnapshot: ) +def _snapshot_with_knowledge_dataset(dataset_id: str) -> AgentConfigSnapshot: + return AgentConfigSnapshot( + id="snapshot-1", + tenant_id="tenant-1", + agent_id="agent-1", + version=1, + config_snapshot=AgentSoulConfig( + model=AgentSoulModelConfig( + plugin_id="langgenius/openai", + model_provider="openai", + model="gpt-test", + ), + knowledge={ + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": dataset_id}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + }, + ), + ) + + def _graph(edges: list[dict]) -> dict: return { "nodes": [ @@ -515,6 +542,35 @@ def test_publish_validation_rejects_missing_file_ref(): ) +def test_publish_validation_rejects_missing_or_out_of_scope_knowledge_datasets( + monkeypatch: pytest.MonkeyPatch, +): + dataset_id = "550e8400-e29b-41d4-a716-446655440000" + node_job = WorkflowNodeJobConfig.model_validate({}) + snapshot = _snapshot_with_knowledge_dataset(dataset_id) + session = Mock() + session.scalar.side_effect = [_binding(node_job), _agent(), snapshot] + + captured = {} + + def fake_get_datasets_by_ids(ids, tenant_id): + captured["ids"] = ids + captured["tenant_id"] = tenant_id + return [], 0 + + import services.dataset_service as dataset_service_module + + monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids) + + with pytest.raises(WorkflowAgentNodeValidationError, match=dataset_id): + WorkflowAgentNodeValidator.validate_published_workflow( + session=session, + workflow=_workflow(_graph([{"source": "start", "target": "agent-node"}])), + ) + + assert captured == {"ids": [dataset_id], "tenant_id": "tenant-1"} + + def test_publish_validation_accepts_tool_node_agentic_manual_mode(): session = Mock() diff --git a/api/tests/unit_tests/services/agent/test_agent_composer_entities.py b/api/tests/unit_tests/services/agent/test_agent_composer_entities.py index 089a5c74f3a..ba32222b70a 100644 --- a/api/tests/unit_tests/services/agent/test_agent_composer_entities.py +++ b/api/tests/unit_tests/services/agent/test_agent_composer_entities.py @@ -1,4 +1,5 @@ import pytest +from pydantic import ValidationError from models.agent_config_entities import AgentKnowledgeQueryMode, AgentSoulModelConfig, DeclaredOutputType from services.agent.composer_service import AgentComposerService @@ -91,14 +92,144 @@ def test_knowledge_query_mode_uses_stable_backend_enums(): config = AgentSoulConfig.model_validate( { "knowledge": { - "datasets": [{"dataset_id": "dataset-1"}], - "query_mode": "generated_query", - "query_config": {"generation_prompt": "Create a retrieval query."}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ], } } ) - assert config.knowledge.query_mode == AgentKnowledgeQueryMode.GENERATED_QUERY + assert config.knowledge.sets[0].query.mode == AgentKnowledgeQueryMode.GENERATED_QUERY + + +@pytest.mark.parametrize( + ("knowledge_payload", "match"), + [ + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + { + "id": "support", + "name": "Billing KB", + "datasets": [{"id": "dataset-2"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + ] + }, + "knowledge set ids must be unique", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Shared KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + { + "id": "billing", + "name": "Shared KB", + "datasets": [{"id": "dataset-2"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + ] + }, + "knowledge set names must be unique", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}, {"id": " dataset-1 "}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + ] + }, + "knowledge set dataset ids must be unique", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "user_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + ] + }, + "knowledge query.value is required for user_query mode", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "single"}, + }, + ] + }, + "knowledge retrieval.model is required for single mode", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + "metadata_filtering": {"mode": "automatic"}, + }, + ] + }, + "metadata_filtering.model_config is required for automatic mode", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + "metadata_filtering": {"mode": "manual"}, + }, + ] + }, + "metadata_filtering.conditions is required for manual mode", + ), + ], +) +def test_knowledge_sets_contract_rejects_invalid_configs(knowledge_payload, match: str): + with pytest.raises(ValidationError, match=match): + AgentSoulConfig.model_validate({"knowledge": knowledge_payload}) def test_agent_soul_model_config_is_first_class_without_credentials(): diff --git a/api/tests/unit_tests/services/agent/test_agent_services.py b/api/tests/unit_tests/services/agent/test_agent_services.py index 2cad3d81af1..36e7c2736bc 100644 --- a/api/tests/unit_tests/services/agent/test_agent_services.py +++ b/api/tests/unit_tests/services/agent/test_agent_services.py @@ -2594,20 +2594,151 @@ def test_dataset_rows_filters_malformed_ids(monkeypatch: pytest.MonkeyPatch): return [], 0 import services.dataset_service as dataset_service_module + from services.agent.knowledge_datasets import get_tenant_knowledge_dataset_rows monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids) valid = "550e8400-e29b-41d4-a716-446655440000" - rows = AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid]) + rows = get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid]) assert rows == {} assert captured["ids"] == [valid] # all-malformed input never touches the DB captured.clear() - assert AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {} + assert get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {} assert captured == {} +@pytest.mark.parametrize( + ("variant", "save_call"), + [ + ( + ComposerVariant.AGENT_APP, + lambda payload: AgentComposerService.save_agent_app_composer( + tenant_id="tenant-1", + app_id="app-1", + account_id="account-1", + payload=payload, + ), + ), + ( + ComposerVariant.WORKFLOW, + lambda payload: AgentComposerService.save_workflow_composer( + tenant_id="tenant-1", + app_id="app-1", + node_id="node-1", + account_id="account-1", + payload=payload, + ), + ), + ], +) +def test_composer_save_rejects_malformed_knowledge_dataset_ids(monkeypatch: pytest.MonkeyPatch, variant, save_call): + captured = {"calls": 0} + + def fake_get_datasets_by_ids(ids, tenant_id): + captured["calls"] += 1 + captured["ids"] = ids + captured["tenant_id"] = tenant_id + return [], 0 + + import services.dataset_service as dataset_service_module + + monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids) + + payload = ComposerSavePayload.model_validate( + { + "variant": variant.value, + "save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value, + "soul_lock": {"locked": False}, + "agent_soul": { + "knowledge": { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "not-a-uuid"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + } + }, + } + ) + + with pytest.raises(InvalidComposerConfigError, match="not-a-uuid"): + save_call(payload) + + assert captured == {"calls": 0} + + +@pytest.mark.parametrize( + ("variant", "save_call"), + [ + ( + ComposerVariant.AGENT_APP, + lambda payload: AgentComposerService.save_agent_app_composer( + tenant_id="tenant-1", + app_id="app-1", + account_id="account-1", + payload=payload, + ), + ), + ( + ComposerVariant.WORKFLOW, + lambda payload: AgentComposerService.save_workflow_composer( + tenant_id="tenant-1", + app_id="app-1", + node_id="node-1", + account_id="account-1", + payload=payload, + ), + ), + ], +) +def test_composer_save_rejects_missing_or_out_of_scope_knowledge_datasets( + monkeypatch: pytest.MonkeyPatch, variant, save_call +): + captured = {} + missing_dataset_id = "550e8400-e29b-41d4-a716-446655440000" + + def fake_get_datasets_by_ids(ids, tenant_id): + captured["ids"] = ids + captured["tenant_id"] = tenant_id + return [], 0 + + import services.dataset_service as dataset_service_module + + monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids) + + payload = ComposerSavePayload.model_validate( + { + "variant": variant.value, + "save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value, + "soul_lock": {"locked": False}, + "agent_soul": { + "knowledge": { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": missing_dataset_id}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + } + }, + } + ) + + with pytest.raises(InvalidComposerConfigError, match=missing_dataset_id): + save_call(payload) + + assert captured == {"ids": [missing_dataset_id], "tenant_id": "tenant-1"} + + def test_workspace_dify_tools_returns_provider_and_tool_granularities(monkeypatch: pytest.MonkeyPatch): """The slash-menu Tools tab needs both selection granularities: a provider hosts many tools (like an MCP server), so candidates return one diff --git a/api/tests/unit_tests/services/agent/test_composer_candidates.py b/api/tests/unit_tests/services/agent/test_composer_candidates.py index 863ebafc994..5566d48c690 100644 --- a/api/tests/unit_tests/services/agent/test_composer_candidates.py +++ b/api/tests/unit_tests/services/agent/test_composer_candidates.py @@ -124,7 +124,18 @@ def _soul() -> AgentSoulConfig: {"id": "ct-2", "name": "disabled-one", "enabled": False}, ], }, - "knowledge": {"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}]}, + "knowledge": { + "sets": [ + { + "id": "kb-1", + "name": "产品知识", + "description": "knowledge set", + "datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + }, "human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]}, } ) @@ -143,12 +154,16 @@ def test_soul_candidates_lists_configured_items_only(): assert [item["name"] for item in lists["cli_tools"]] == ["ffmpeg"] # the stable mention id flows through so the frontend can mint [§cli_tool:§] assert [item["id"] for item in lists["cli_tools"]] == ["ct-1"] - # enriched from DB; dangling dataset kept with missing flag (placeholder, 0522) - knowledge = {item["id"]: item for item in lists["knowledge_datasets"]} - assert knowledge["ds-1"]["name"] == "产品手册" - assert knowledge["ds-1"]["missing"] is False - assert knowledge["ds-gone"]["missing"] is True - assert knowledge["ds-gone"]["name"] == "已删" + # Knowledge mentions point at set ids; nested datasets are hydrated for context. + knowledge_set = lists["knowledge_sets"][0] + assert knowledge_set["id"] == "kb-1" + assert knowledge_set["name"] == "产品知识" + assert knowledge_set["missing_dataset_ids"] == ["ds-gone"] + datasets = {item["id"]: item for item in knowledge_set["datasets"]} + assert datasets["ds-1"]["name"] == "产品手册" + assert datasets["ds-1"]["missing"] is False + assert datasets["ds-gone"]["missing"] is True + assert datasets["ds-gone"]["name"] == "已删" assert lists["human_contacts"][0]["id"] == "c-1" assert lists["dify_tools"][0]["id"] == "tavily/tavily_search" diff --git a/api/tests/unit_tests/services/agent/test_composer_mention_validation.py b/api/tests/unit_tests/services/agent/test_composer_mention_validation.py index ffbec86f4e6..f56ae3751e8 100644 --- a/api/tests/unit_tests/services/agent/test_composer_mention_validation.py +++ b/api/tests/unit_tests/services/agent/test_composer_mention_validation.py @@ -149,22 +149,32 @@ def test_dangling_knowledge_without_label_gets_fallback_name(): ] -def test_configured_but_deleted_dataset_surfaces_as_placeholder(): +def test_configured_but_deleted_knowledge_set_surfaces_as_placeholder(): payload = ComposerSavePayload.model_validate( { "variant": "agent_app", "agent_soul": { - "prompt": {"system_prompt": "see [§knowledge:ds-1:产品手册§]"}, - "knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]}, + "prompt": {"system_prompt": "see [§knowledge:kb-1:产品手册§]"}, + "knowledge": { + "sets": [ + { + "id": "kb-1", + "name": "产品手册", + "datasets": [{"id": "ds-1", "name": "产品手册"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + }, }, "save_strategy": "save_to_current_version", } ) - # configured + DB row exists -> clean - assert _findings(payload, existing_dataset_ids={"ds-1"})["knowledge_retrieval_placeholder"] == [] - # configured but deleted in DB -> placeholder - assert _findings(payload, existing_dataset_ids=set())["knowledge_retrieval_placeholder"] == [ - {"id": "ds-1", "placeholder_name": "产品手册"} + # configured + current Agent Soul row exists -> clean + assert _findings(payload, existing_knowledge_set_ids={"kb-1"})["knowledge_retrieval_placeholder"] == [] + # configured but removed from the current Agent Soul surface -> placeholder + assert _findings(payload, existing_knowledge_set_ids=set())["knowledge_retrieval_placeholder"] == [ + {"id": "kb-1", "placeholder_name": "产品手册"} ] diff --git a/api/tests/unit_tests/services/agent/test_prompt_mentions.py b/api/tests/unit_tests/services/agent/test_prompt_mentions.py index b8b908d432f..5bc614f4e49 100644 --- a/api/tests/unit_tests/services/agent/test_prompt_mentions.py +++ b/api/tests/unit_tests/services/agent/test_prompt_mentions.py @@ -107,7 +107,17 @@ def soul() -> AgentSoulConfig: ], "cli_tools": [{"id": "ct-1", "name": "ffmpeg"}], }, - "knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]}, + "knowledge": { + "sets": [ + { + "id": "kb-1", + "name": "产品手册", + "datasets": [{"id": "ds-1", "name": "产品手册"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + }, "human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]}, } ) @@ -117,7 +127,7 @@ def test_soul_resolver_resolves_each_kind(soul: AgentSoulConfig): resolver = build_soul_mention_resolver(soul) prompt = ( "Use [§tool:tavily/tavily_search:tavily§], run [§cli_tool:ct-1:ffmpeg§], " - "ground in [§knowledge:ds-1§], ask [§human:c-1§]." + "ground in [§knowledge:kb-1§], ask [§human:c-1§]." ) expanded = expand_prompt_mentions(prompt, resolver) diff --git a/dify-agent/src/dify_agent/layers/knowledge/__init__.py b/dify-agent/src/dify_agent/layers/knowledge/__init__.py index 569512d8004..86a9405bce3 100644 --- a/dify-agent/src/dify_agent/layers/knowledge/__init__.py +++ b/dify-agent/src/dify_agent/layers/knowledge/__init__.py @@ -7,21 +7,31 @@ root stays import-safe for callers that only need to construct run requests. from dify_agent.layers.knowledge.configs import ( DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig, + DifyKnowledgeDatasetConfig, + DifyKnowledgeEagerResult, DifyKnowledgeMetadataCondition, DifyKnowledgeMetadataConditions, DifyKnowledgeMetadataFilteringConfig, DifyKnowledgeModelConfig, + DifyKnowledgeQueryConfig, DifyKnowledgeRerankingModelConfig, DifyKnowledgeRetrievalConfig, + DifyKnowledgeRuntimeState, + DifyKnowledgeSetConfig, ) __all__ = [ "DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID", "DifyKnowledgeBaseLayerConfig", + "DifyKnowledgeDatasetConfig", + "DifyKnowledgeEagerResult", "DifyKnowledgeMetadataCondition", "DifyKnowledgeMetadataConditions", "DifyKnowledgeMetadataFilteringConfig", "DifyKnowledgeModelConfig", + "DifyKnowledgeQueryConfig", "DifyKnowledgeRerankingModelConfig", "DifyKnowledgeRetrievalConfig", + "DifyKnowledgeRuntimeState", + "DifyKnowledgeSetConfig", ] diff --git a/dify-agent/src/dify_agent/layers/knowledge/configs.py b/dify-agent/src/dify_agent/layers/knowledge/configs.py index 9ada075d1cc..b7b71ab9c42 100644 --- a/dify-agent/src/dify_agent/layers/knowledge/configs.py +++ b/dify-agent/src/dify_agent/layers/knowledge/configs.py @@ -1,12 +1,11 @@ """Client-safe DTOs for the Dify knowledge-base Agenton layer. -The public layer config exposes only static retrieval controls: dataset ids, -retrieval strategy, metadata filtering, and observation-size limits. The agent -model itself should only ever see a single ``query`` tool argument; tenant/ -app/user context comes from the execution-context layer and the actual -retrieval is delegated to the Dify API inner endpoint. Tool naming is not -caller-configurable: the runtime always exposes the same stable knowledge-base -search tool. +The public layer config carries one or more named knowledge sets. Each set owns +its dataset ids plus query, retrieval, and metadata-filtering policy. Generated- +query sets are exposed through one stable model-visible search tool whose +schema lets the model pick ``set_name`` and ``query``; user-query sets are +retrieved eagerly when the layer enters a run and their formatted observations +are kept only in JSON-safe ``runtime_state`` for session snapshots. """ from __future__ import annotations @@ -61,6 +60,44 @@ class DifyKnowledgeRerankingModelConfig(BaseModel): model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") +class DifyKnowledgeDatasetConfig(BaseModel): + """One dataset selected by a knowledge set. + + Only ``id`` is used for retrieval. ``name`` and ``description`` are retained + because callers already have them and they are useful in runtime/debug + snapshots without changing the inner retrieval request contract. + """ + + id: str + name: str | None = None + description: str | None = None + + model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") + + @field_validator("id") + @classmethod + def validate_id(cls, value: str) -> str: + normalized = value.strip() + if not normalized: + raise ValueError("dataset id must not be blank") + return normalized + + +class DifyKnowledgeQueryConfig(BaseModel): + """Query policy for one knowledge set.""" + + mode: Literal["user_query", "generated_query"] + value: str | None = None + + model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") + + @model_validator(mode="after") + def validate_mode_specific_fields(self) -> DifyKnowledgeQueryConfig: + if self.mode == "user_query" and not (self.value or "").strip(): + raise ValueError("query.value is required for user_query mode") + return self + + class DifyKnowledgeRetrievalConfig(BaseModel): """Static retrieval controls mirrored into the inner API request.""" @@ -151,38 +188,90 @@ class DifyKnowledgeMetadataFilteringConfig(BaseModel): return payload -class DifyKnowledgeBaseLayerConfig(LayerConfig): - """Public config for one model-visible knowledge search tool. +class DifyKnowledgeSetConfig(BaseModel): + """One independently searchable or eagerly-preloaded knowledge set.""" - The model only gets to choose whether to call the tool and what ``query`` - to send. Dataset ids, retrieval settings, metadata filtering, and caller - context remain config/runtime concerns outside the model-visible tool - schema. The tool name and description are fixed by the layer runtime and do - not appear in the public config DTO. - """ - - dataset_ids: list[str] + id: str + name: str + description: str | None = None + datasets: list[DifyKnowledgeDatasetConfig] + query: DifyKnowledgeQueryConfig retrieval: DifyKnowledgeRetrievalConfig metadata_filtering: DifyKnowledgeMetadataFilteringConfig = Field( default_factory=DifyKnowledgeMetadataFilteringConfig ) + + model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") + + @field_validator("id", "name") + @classmethod + def validate_non_blank_identity(cls, value: str) -> str: + normalized = value.strip() + if not normalized: + raise ValueError("knowledge set id and name must not be blank") + return normalized + + @model_validator(mode="after") + def validate_dataset_ids(self) -> DifyKnowledgeSetConfig: + if not self.datasets: + raise ValueError("knowledge set requires at least one dataset") + dataset_ids = [dataset.id for dataset in self.datasets] + if len(dataset_ids) != len(set(dataset_ids)): + raise ValueError("knowledge set dataset ids must be unique") + return self + + @property + def dataset_ids(self) -> list[str]: + """Return the selected dataset ids for the inner retrieval request.""" + return [dataset.id for dataset in self.datasets] + + +class DifyKnowledgeEagerResult(BaseModel): + """JSON-safe eager user-query result stored in layer runtime state.""" + + set_id: str + set_name: str + query: str + observation: str + status: Literal["success", "empty", "temporarily_unavailable"] + + model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") + + +class DifyKnowledgeRuntimeState(BaseModel): + """Serializable eager-retrieval state stored in Agenton session snapshots.""" + + eager_config_fingerprint: str | None = None + eager_results: list[DifyKnowledgeEagerResult] = Field(default_factory=list) + + model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", validate_assignment=True) + + +class DifyKnowledgeBaseLayerConfig(LayerConfig): + """Public config for one knowledge-base layer. + + The model-visible surface stays fixed to ``knowledge_base_search``. Set + names are the only model-visible selection labels; dataset ids, retrieval + controls, metadata filtering, and caller identity remain config/runtime + concerns outside the tool schema. + """ + + sets: list[DifyKnowledgeSetConfig] max_result_content_chars: int = Field(default=2000, ge=1) max_observation_chars: int = Field(default=12000, ge=1) model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid") - @field_validator("dataset_ids") - @classmethod - def validate_dataset_ids(cls, value: list[str]) -> list[str]: - if not value: - raise ValueError("dataset_ids must contain at least one item") - normalized_ids = [item.strip() for item in value] - if any(not item for item in normalized_ids): - raise ValueError("dataset_ids must not contain blank items") - return normalized_ids - @model_validator(mode="after") - def validate_observation_limits(self) -> DifyKnowledgeBaseLayerConfig: + def validate_sets_and_observation_limits(self) -> DifyKnowledgeBaseLayerConfig: + if not self.sets: + raise ValueError("sets must contain at least one knowledge set") + set_ids = [knowledge_set.id for knowledge_set in self.sets] + if len(set_ids) != len(set(set_ids)): + raise ValueError("knowledge set ids must be unique") + normalized_names = [knowledge_set.name.strip().lower() for knowledge_set in self.sets] + if len(normalized_names) != len(set(normalized_names)): + raise ValueError("knowledge set names must be unique") if self.max_observation_chars < self.max_result_content_chars: raise ValueError("max_observation_chars must be greater than or equal to max_result_content_chars") return self @@ -191,10 +280,15 @@ class DifyKnowledgeBaseLayerConfig(LayerConfig): __all__ = [ "DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID", "DifyKnowledgeBaseLayerConfig", + "DifyKnowledgeDatasetConfig", + "DifyKnowledgeEagerResult", "DifyKnowledgeMetadataCondition", "DifyKnowledgeMetadataConditions", "DifyKnowledgeMetadataFilteringConfig", "DifyKnowledgeModelConfig", + "DifyKnowledgeQueryConfig", "DifyKnowledgeRerankingModelConfig", "DifyKnowledgeRetrievalConfig", + "DifyKnowledgeRuntimeState", + "DifyKnowledgeSetConfig", ] diff --git a/dify-agent/src/dify_agent/layers/knowledge/layer.py b/dify-agent/src/dify_agent/layers/knowledge/layer.py index 02c9f07dd56..df07dc3cd36 100644 --- a/dify-agent/src/dify_agent/layers/knowledge/layer.py +++ b/dify-agent/src/dify_agent/layers/knowledge/layer.py @@ -1,17 +1,18 @@ -"""Dify knowledge-base layer exposing one model-visible search tool. +"""Dify knowledge-base layer exposing set-aware retrieval. The layer depends on ``DifyExecutionContextLayer`` for tenant/app/user/invoke -identity, keeps retrieval controls in config only, and borrows a lifespan-owned -HTTP client for each tool invocation. It never owns live clients or stores -retrieved source content in layer state. Tool identity is intentionally fixed at -runtime: callers cannot rename the knowledge tool or override its description -through public layer config because the model-visible surface must stay stable -across API-side Agent Soul mappings. +identity. Generated-query sets become one stable model-visible +``knowledge_base_search(set_name, query)`` tool, while user-query sets are +retrieved eagerly during context entry and exposed as additional user prompt +content. Eager observations are persisted only as JSON-safe runtime state so +Agenton session snapshots can resume without repeating unchanged retrievals. """ from __future__ import annotations from dataclasses import dataclass +import hashlib +import json import logging from typing import ClassVar, cast @@ -27,7 +28,13 @@ from dify_agent.layers.knowledge.client import ( DifyKnowledgeBaseClientError, DifyKnowledgeRetrieveResponse, ) -from dify_agent.layers.knowledge.configs import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig +from dify_agent.layers.knowledge.configs import ( + DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, + DifyKnowledgeBaseLayerConfig, + DifyKnowledgeEagerResult, + DifyKnowledgeRuntimeState, + DifyKnowledgeSetConfig, +) logger = logging.getLogger(__name__) @@ -35,23 +42,14 @@ logger = logging.getLogger(__name__) # public DTO cannot grow a parallel naming contract that diverges from the # runtime knowledge-search surface. _KNOWLEDGE_BASE_TOOL_NAME = "knowledge_base_search" -_KNOWLEDGE_BASE_TOOL_DESCRIPTION = "Search configured knowledge bases for information relevant to the query." +_KNOWLEDGE_BASE_TOOL_DESCRIPTION = ( + "Search a configured knowledge set. Pick one configured set_name and provide a focused search query." +) BLANK_QUERY_OBSERVATION = "knowledge base search requires a non-empty query" NO_RESULTS_OBSERVATION = "No relevant knowledge base results were found." TEMPORARY_UNAVAILABLE_OBSERVATION = ( "Knowledge base search is temporarily unavailable. Please continue without it if possible." ) -QUERY_TOOL_SCHEMA = { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Search query for the configured knowledge bases.", - } - }, - "required": ["query"], - "additionalProperties": False, -} class DifyKnowledgeBaseDeps(LayerDeps): @@ -61,8 +59,10 @@ class DifyKnowledgeBaseDeps(LayerDeps): @dataclass(slots=True) -class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig]): - """Layer that resolves one config-scoped knowledge search tool.""" +class DifyKnowledgeBaseLayer( + PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig, DifyKnowledgeRuntimeState] +): + """Layer that resolves set-scoped knowledge tools and eager user prompts.""" type_id: ClassVar[str | None] = DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID @@ -95,7 +95,7 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase ) async def get_tools(self, *, http_client: httpx.AsyncClient) -> list[Tool[object]]: - """Build one Pydantic AI tool that exposes only ``query`` to the model. + """Build the unified generated-query Pydantic AI tool, when needed. Knowledge tools depend on execution-context identity that is optional for other run types but mandatory here: ``tenant_id``, ``user_id``, @@ -103,11 +103,15 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase any HTTP request is attempted. Tool execution then follows a strict observation policy: + - unknown ``set_name`` returns a local validation observation; - blank ``query`` returns a local validation observation; - retryable client failures (timeouts, connection failures, HTTP ``429``/``502``) become a temporary-unavailable observation; - non-retryable client failures are raised so the run fails fast. """ + generated_sets = self._generated_query_sets() + if not generated_sets: + return [] if http_client.is_closed: raise RuntimeError("DifyKnowledgeBaseLayer.get_tools() requires an open shared HTTP client.") @@ -118,54 +122,28 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase api_key=self.inner_api_key, http_client=http_client, ) + set_by_name = {knowledge_set.name: knowledge_set for knowledge_set in generated_sets} - async def knowledge_base_search(_ctx: RunContext[object], query: str) -> str: + async def knowledge_base_search(_ctx: RunContext[object], set_name: str, query: str) -> str: + knowledge_set = set_by_name.get(set_name) + if knowledge_set is None: + return f"unknown knowledge set: {set_name}" normalized_query = query.strip() if not normalized_query: return BLANK_QUERY_OBSERVATION - try: - response = await client.retrieve( - tenant_id=caller["tenant_id"], - user_id=caller["user_id"], - app_id=caller["app_id"], - user_from=caller["user_from"], - invoke_from=caller["invoke_from"], - dataset_ids=list(self.config.dataset_ids), - query=normalized_query, - retrieval=self.config.retrieval, - metadata_filtering=self.config.metadata_filtering, - ) - except DifyKnowledgeBaseClientError as exc: - if exc.retryable: - logger.warning( - "knowledge base search temporarily unavailable", - extra={ - "tenant_id": caller["tenant_id"], - "app_id": caller["app_id"], - "invoke_from": caller["invoke_from"], - "error_code": exc.error_code, - "status_code": exc.status_code, - }, - ) - return TEMPORARY_UNAVAILABLE_OBSERVATION - logger.error( - "knowledge base search failed", - extra={ - "tenant_id": caller["tenant_id"], - "app_id": caller["app_id"], - "invoke_from": caller["invoke_from"], - "error_code": exc.error_code, - "status_code": exc.status_code, - }, - ) - raise - return _format_observation(response, self.config) + return await self._retrieve_for_set( + client=client, + caller=caller, + knowledge_set=knowledge_set, + query=normalized_query, + retryable_observation=True, + ) async def prepare_tool_definition(_ctx: RunContext[object], tool_def: ToolDefinition) -> ToolDefinition: return ToolDefinition( name=tool_def.name, description=tool_def.description, - parameters_json_schema=QUERY_TOOL_SCHEMA, + parameters_json_schema=_tool_schema(generated_sets), strict=tool_def.strict, sequential=tool_def.sequential, metadata=tool_def.metadata, @@ -181,11 +159,177 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase knowledge_base_search, takes_ctx=True, name=_KNOWLEDGE_BASE_TOOL_NAME, - description=_KNOWLEDGE_BASE_TOOL_DESCRIPTION, + description=_tool_description(generated_sets), prepare=prepare_tool_definition, ) ] + @property + @override + def user_prompts(self) -> list[str]: + """Expose eager user-query results as an additional user prompt.""" + if not self.runtime_state.eager_results: + return [] + + sections: list[str] = [] + for result in self.runtime_state.eager_results: + sections.append( + "\n".join( + [ + f"Set: {result.set_name}", + f"Query: {result.query}", + "Results:", + result.observation, + ] + ) + ) + return ["Knowledge retrieval results:\n\n" + "\n\n".join(sections)] + + @override + async def on_context_create(self) -> None: + await self._refresh_eager_results_if_needed() + + @override + async def on_context_resume(self) -> None: + await self._refresh_eager_results_if_needed() + + def _generated_query_sets(self) -> list[DifyKnowledgeSetConfig]: + return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "generated_query"] + + def _user_query_sets(self) -> list[DifyKnowledgeSetConfig]: + return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "user_query"] + + async def _refresh_eager_results_if_needed(self) -> None: + user_query_sets = self._user_query_sets() + if not user_query_sets: + self.runtime_state.eager_config_fingerprint = None + self.runtime_state.eager_results = [] + return + + fingerprint = _eager_config_fingerprint(user_query_sets) + if self.runtime_state.eager_config_fingerprint == fingerprint: + return + + caller = _build_caller_context(self.deps.execution_context.config) + async with httpx.AsyncClient() as http_client: + client = DifyKnowledgeBaseClient( + base_url=self.inner_api_url, + api_key=self.inner_api_key, + http_client=http_client, + ) + eager_results: list[DifyKnowledgeEagerResult] = [] + for knowledge_set in user_query_sets: + query = (knowledge_set.query.value or "").strip() + try: + response = await client.retrieve( + tenant_id=caller["tenant_id"], + user_id=caller["user_id"], + app_id=caller["app_id"], + user_from=caller["user_from"], + invoke_from=caller["invoke_from"], + dataset_ids=knowledge_set.dataset_ids, + query=query, + retrieval=knowledge_set.retrieval, + metadata_filtering=knowledge_set.metadata_filtering, + ) + except DifyKnowledgeBaseClientError as exc: + if exc.retryable: + logger.warning( + "eager knowledge retrieval temporarily unavailable", + extra={ + "tenant_id": caller["tenant_id"], + "app_id": caller["app_id"], + "invoke_from": caller["invoke_from"], + "knowledge_set_id": knowledge_set.id, + "error_code": exc.error_code, + "status_code": exc.status_code, + }, + ) + eager_results.append( + DifyKnowledgeEagerResult( + set_id=knowledge_set.id, + set_name=knowledge_set.name, + query=query, + observation=TEMPORARY_UNAVAILABLE_OBSERVATION, + status="temporarily_unavailable", + ) + ) + continue + logger.error( + "eager knowledge retrieval failed", + extra={ + "tenant_id": caller["tenant_id"], + "app_id": caller["app_id"], + "invoke_from": caller["invoke_from"], + "knowledge_set_id": knowledge_set.id, + "error_code": exc.error_code, + "status_code": exc.status_code, + }, + ) + raise + + eager_results.append( + DifyKnowledgeEagerResult( + set_id=knowledge_set.id, + set_name=knowledge_set.name, + query=query, + observation=_format_observation(response, self.config, include_heading=False), + status="success" if response.results else "empty", + ) + ) + + self.runtime_state.eager_results = eager_results + self.runtime_state.eager_config_fingerprint = fingerprint + + async def _retrieve_for_set( + self, + *, + client: DifyKnowledgeBaseClient, + caller: dict[str, str], + knowledge_set: DifyKnowledgeSetConfig, + query: str, + retryable_observation: bool, + ) -> str: + try: + response = await client.retrieve( + tenant_id=caller["tenant_id"], + user_id=caller["user_id"], + app_id=caller["app_id"], + user_from=caller["user_from"], + invoke_from=caller["invoke_from"], + dataset_ids=knowledge_set.dataset_ids, + query=query, + retrieval=knowledge_set.retrieval, + metadata_filtering=knowledge_set.metadata_filtering, + ) + except DifyKnowledgeBaseClientError as exc: + if exc.retryable and retryable_observation: + logger.warning( + "knowledge base search temporarily unavailable", + extra={ + "tenant_id": caller["tenant_id"], + "app_id": caller["app_id"], + "invoke_from": caller["invoke_from"], + "knowledge_set_id": knowledge_set.id, + "error_code": exc.error_code, + "status_code": exc.status_code, + }, + ) + return TEMPORARY_UNAVAILABLE_OBSERVATION + logger.error( + "knowledge base search failed", + extra={ + "tenant_id": caller["tenant_id"], + "app_id": caller["app_id"], + "invoke_from": caller["invoke_from"], + "knowledge_set_id": knowledge_set.id, + "error_code": exc.error_code, + "status_code": exc.status_code, + }, + ) + raise + return _format_observation(response, self.config) + def _build_caller_context(execution_context: object) -> dict[str, str]: """Extract the inner-API caller identity from execution-context config. @@ -232,7 +376,56 @@ def _build_caller_context(execution_context: object) -> dict[str, str]: } -def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKnowledgeBaseLayerConfig) -> str: +def _tool_schema(generated_sets: list[DifyKnowledgeSetConfig]) -> dict[str, object]: + return { + "type": "object", + "properties": { + "set_name": { + "type": "string", + "enum": [knowledge_set.name for knowledge_set in generated_sets], + "description": "Knowledge set to search.", + }, + "query": { + "type": "string", + "description": "Search query for the selected knowledge set.", + }, + }, + "required": ["set_name", "query"], + "additionalProperties": False, + } + + +def _tool_description(generated_sets: list[DifyKnowledgeSetConfig]) -> str: + set_descriptions = [] + for knowledge_set in generated_sets: + if knowledge_set.description: + set_descriptions.append(f"{knowledge_set.name}: {knowledge_set.description}") + else: + set_descriptions.append(knowledge_set.name) + return f"{_KNOWLEDGE_BASE_TOOL_DESCRIPTION} Configured sets: {', '.join(set_descriptions)}." + + +def _eager_config_fingerprint(user_query_sets: list[DifyKnowledgeSetConfig]) -> str: + payload = [ + { + "id": knowledge_set.id, + "query": knowledge_set.query.model_dump(mode="json"), + "dataset_ids": knowledge_set.dataset_ids, + "retrieval": knowledge_set.retrieval.model_dump(mode="json"), + "metadata_filtering": knowledge_set.metadata_filtering.model_dump(mode="json", by_alias=True), + } + for knowledge_set in user_query_sets + ] + serialized = json.dumps(payload, sort_keys=True, separators=(",", ":")) + return hashlib.sha256(serialized.encode("utf-8")).hexdigest() + + +def _format_observation( + response: DifyKnowledgeRetrieveResponse, + config: DifyKnowledgeBaseLayerConfig, + *, + include_heading: bool = True, +) -> str: """Render inner-API retrieval results into the model-visible tool response. The formatting contract is intentionally simple and stable for the model: @@ -248,7 +441,7 @@ def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKno if not response.results: return NO_RESULTS_OBSERVATION - lines = ["Knowledge base search results:"] + lines = ["Knowledge base search results:"] if include_heading else [] for index, result in enumerate(response.results, start=1): metadata = result.metadata title = result.title or metadata.document_name or "Untitled" @@ -280,6 +473,5 @@ __all__ = [ "DifyKnowledgeBaseDeps", "DifyKnowledgeBaseLayer", "NO_RESULTS_OBSERVATION", - "QUERY_TOOL_SCHEMA", "TEMPORARY_UNAVAILABLE_OBSERVATION", ] diff --git a/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py b/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py index f28939e329b..dbe8fddcbec 100644 --- a/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py +++ b/dify-agent/tests/local/dify_agent/layers/knowledge/test_configs.py @@ -6,46 +6,142 @@ from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig def _valid_config() -> dict[str, object]: return { - "dataset_ids": ["dataset-1"], - "retrieval": { - "mode": "multiple", - "top_k": 4, - }, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": { + "mode": "multiple", + "top_k": 4, + }, + } + ], } def test_knowledge_base_config_accepts_valid_multiple_mode() -> None: config = DifyKnowledgeBaseLayerConfig.model_validate(_valid_config()) - assert config.dataset_ids == ["dataset-1"] - assert config.retrieval.top_k == 4 - assert config.metadata_filtering.mode == "disabled" + assert config.sets[0].dataset_ids == ["dataset-1"] + assert config.sets[0].retrieval.top_k == 4 + assert config.sets[0].metadata_filtering.mode == "disabled" @pytest.mark.parametrize( "payload, expected_message", [ - ({"dataset_ids": [], "retrieval": {"mode": "multiple", "top_k": 4}}, "dataset_ids"), + ({"sets": []}, "sets"), ({"tool_name": "knowledge_base_search", **_valid_config()}, "Extra inputs are not permitted"), ({"tool_description": "Search knowledge", **_valid_config()}, "Extra inputs are not permitted"), - ({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "multiple"}}, "top_k"), - ({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "single"}}, "retrieval.model"), ( { - "dataset_ids": ["dataset-1"], - "retrieval": {"mode": "multiple", "top_k": 4}, - "metadata_filtering": {"mode": "automatic"}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": ""}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + }, + "dataset id", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "user_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ] + }, + "query.value", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple"}, + } + ] + }, + "top_k", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "single"}, + } + ] + }, + "retrieval.model", + ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + "metadata_filtering": {"mode": "automatic"}, + } + ], }, "metadata_filtering.model_config", ), ( { - "dataset_ids": ["dataset-1"], - "retrieval": {"mode": "multiple", "top_k": 4}, - "metadata_filtering": {"mode": "manual"}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + "metadata_filtering": {"mode": "manual"}, + } + ], }, "metadata_filtering.conditions", ), + ( + { + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + { + "id": "docs", + "name": "support kb", + "datasets": [{"id": "dataset-2"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + }, + ] + }, + "names must be unique", + ), ], ) def test_knowledge_base_config_rejects_invalid_inputs(payload: dict[str, object], expected_message: str) -> None: @@ -57,8 +153,7 @@ def test_knowledge_base_config_rejects_observation_limit_smaller_than_result_lim with pytest.raises(ValidationError, match="max_observation_chars"): _ = DifyKnowledgeBaseLayerConfig.model_validate( { - "dataset_ids": ["dataset-1"], - "retrieval": {"mode": "multiple", "top_k": 4}, + **_valid_config(), "max_result_content_chars": 50, "max_observation_chars": 20, } diff --git a/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py b/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py index 28fadcb903b..ed6c798b409 100644 --- a/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py +++ b/dify-agent/tests/local/dify_agent/layers/knowledge/test_layer.py @@ -8,7 +8,11 @@ from pydantic_ai import Tool from agenton.compositor import Compositor, LayerNode, LayerProvider from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig from dify_agent.layers.execution_context.layer import DifyExecutionContextLayer -from dify_agent.layers.knowledge.client import DifyKnowledgeBaseClientError +from dify_agent.layers.knowledge.client import ( + DifyKnowledgeBaseClient, + DifyKnowledgeBaseClientError, + DifyKnowledgeRetrieveResponse, +) from dify_agent.layers.knowledge.configs import DifyKnowledgeBaseLayerConfig from dify_agent.layers.knowledge.layer import ( BLANK_QUERY_OBSERVATION, @@ -32,10 +36,23 @@ def _execution_context_config(**overrides: object) -> DifyExecutionContextLayerC def _knowledge_config(**overrides: object) -> DifyKnowledgeBaseLayerConfig: - payload: dict[str, object] = { - "dataset_ids": ["dataset-1"], + set_payload: dict[str, object] = { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, "retrieval": {"mode": "multiple", "top_k": 4}, } + for key in ("id", "name", "description", "datasets", "query", "retrieval", "metadata_filtering"): + if key in overrides: + set_payload[key] = overrides.pop(key) + if "dataset_ids" in overrides: + dataset_ids = overrides.pop("dataset_ids") + assert isinstance(dataset_ids, list) + set_payload["datasets"] = [{"id": dataset_id} for dataset_id in dataset_ids] + payload: dict[str, object] = { + "sets": [set_payload], + } payload.update(overrides) return DifyKnowledgeBaseLayerConfig.model_validate(payload) @@ -62,7 +79,7 @@ def _knowledge_provider() -> LayerProvider[DifyKnowledgeBaseLayer]: ) -def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None: +def test_knowledge_layer_exposes_one_set_scoped_tool_definition() -> None: async def scenario() -> None: compositor = Compositor( [ @@ -82,20 +99,23 @@ def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None: tool_def = await tool.prepare_tool_def(None) # pyright: ignore[reportArgumentType] assert isinstance(tool, Tool) assert tool.name == "knowledge_base_search" - assert tool.description == "Search configured knowledge bases for information relevant to the query." + assert "Pick one configured set_name" in tool.description assert tool_def is not None - assert ( - tool_def.description == "Search configured knowledge bases for information relevant to the query." - ) + assert "Pick one configured set_name" in tool_def.description assert tool_def.parameters_json_schema == { "type": "object", "properties": { + "set_name": { + "type": "string", + "enum": ["Support KB"], + "description": "Knowledge set to search.", + }, "query": { "type": "string", - "description": "Search query for the configured knowledge bases.", - } + "description": "Search query for the selected knowledge set.", + }, }, - "required": ["query"], + "required": ["set_name", "query"], "additionalProperties": False, } @@ -119,12 +139,105 @@ def test_knowledge_layer_rejects_blank_query_locally() -> None: ) as run: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] - result = await tool.function_schema.call({"query": " "}, None) # pyright: ignore[reportArgumentType] + result = await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": " "}, None + ) assert result == BLANK_QUERY_OBSERVATION asyncio.run(scenario()) +def test_knowledge_layer_exposes_no_tool_when_all_sets_are_user_query(monkeypatch: pytest.MonkeyPatch) -> None: + async def fake_retrieve(self: DifyKnowledgeBaseClient, **_kwargs: object) -> DifyKnowledgeRetrieveResponse: + del self + return DifyKnowledgeRetrieveResponse.model_validate({"results": [], "usage": {}}) + + monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve) + + async def scenario() -> None: + compositor = Compositor( + [ + LayerNode("execution_context", _execution_context_provider()), + LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}), + ] + ) + async with httpx.AsyncClient() as http_client: + async with compositor.enter( + configs={ + "execution_context": _execution_context_config(), + "knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}), + } + ) as run: + knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) + assert await knowledge_layer.get_tools(http_client=http_client) == [] + + asyncio.run(scenario()) + + +def test_knowledge_layer_fetches_user_query_sets_on_context_entry(monkeypatch: pytest.MonkeyPatch) -> None: + seen_requests: list[dict[str, object]] = [] + + async def fake_retrieve(self: DifyKnowledgeBaseClient, **kwargs: object) -> DifyKnowledgeRetrieveResponse: + del self + seen_requests.append(kwargs) + return DifyKnowledgeRetrieveResponse.model_validate( + { + "results": [ + { + "metadata": { + "_source": "knowledge", + "dataset_name": "Docs", + "document_name": "Release.md", + "score": 0.8, + }, + "title": "Release", + "files": [], + "content": "Version notes", + "summary": None, + } + ], + "usage": {}, + } + ) + + monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve) + + async def scenario() -> None: + compositor = Compositor( + [ + LayerNode("execution_context", _execution_context_provider()), + LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}), + ] + ) + async with compositor.enter( + configs={ + "execution_context": _execution_context_config(), + "knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}), + } + ) as run: + knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) + assert len(seen_requests) == 1 + assert seen_requests[0]["query"] == "release notes" + assert seen_requests[0]["dataset_ids"] == ["dataset-1"] + assert knowledge_layer.runtime_state.eager_config_fingerprint + assert knowledge_layer.runtime_state.eager_results[0].status == "success" + assert knowledge_layer.user_prompts == [ + "Knowledge retrieval results:\n\n" + "Set: Support KB\n" + "Query: release notes\n" + "Results:\n" + "1. Title: Release\n" + " Dataset: Docs\n" + " Document: Release.md\n" + " Score: 0.8\n" + " Content: Version notes" + ] + await knowledge_layer.on_context_resume() + assert len(seen_requests) == 1 + + asyncio.run(scenario()) + + @pytest.mark.parametrize( ("field_name", "field_value"), [ @@ -199,7 +312,9 @@ def test_knowledge_layer_formats_results_and_truncates_observation() -> None: ) as run: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] - result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + result = await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert result.startswith("Knowledge base search results:\n1. Title: Guide") assert "Dataset: Docs" in result assert "Document: Guide.md" in result @@ -229,7 +344,9 @@ def test_knowledge_layer_returns_no_results_observation() -> None: ) as run: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] - result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + result = await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert result == NO_RESULTS_OBSERVATION asyncio.run(scenario()) @@ -256,7 +373,9 @@ def test_knowledge_layer_converts_retryable_failures_into_observation() -> None: ) as run: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] - result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + result = await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert result == TEMPORARY_UNAVAILABLE_OBSERVATION asyncio.run(scenario()) @@ -289,7 +408,9 @@ def test_knowledge_layer_converts_retryable_transport_failures_into_observation( ) as run: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] - result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + result = await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert result == TEMPORARY_UNAVAILABLE_OBSERVATION asyncio.run(scenario()) @@ -317,7 +438,9 @@ def test_knowledge_layer_raises_non_retryable_client_errors() -> None: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] with pytest.raises(DifyKnowledgeBaseClientError) as exc_info: - await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert exc_info.value.status_code == 403 asyncio.run(scenario()) @@ -343,7 +466,9 @@ def test_knowledge_layer_raises_for_malformed_success_responses() -> None: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] with pytest.raises(DifyKnowledgeBaseClientError) as exc_info: - await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert exc_info.value.error_code == "invalid_response" assert exc_info.value.retryable is False @@ -411,7 +536,9 @@ def test_knowledge_layer_sends_execution_context_and_static_config_to_inner_api( ) as run: knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer) tool = (await knowledge_layer.get_tools(http_client=http_client))[0] - result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType] + result = await tool.function_schema.call( # pyright: ignore[reportArgumentType] + {"set_name": "Support KB", "query": "reset"}, None + ) assert result == NO_RESULTS_OBSERVATION asyncio.run(scenario()) diff --git a/dify-agent/tests/local/dify_agent/runtime/test_runner.py b/dify-agent/tests/local/dify_agent/runtime/test_runner.py index f5ddeb72367..4a64fe9090d 100644 --- a/dify-agent/tests/local/dify_agent/runtime/test_runner.py +++ b/dify-agent/tests/local/dify_agent/runtime/test_runner.py @@ -995,7 +995,7 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest return TestModel(custom_output_text="done") # pyright: ignore[reportReturnType] async def fake_get_tools(self: DifyKnowledgeBaseLayer, *, http_client: httpx.AsyncClient) -> list[Tool[object]]: - assert self.config.dataset_ids == ["dataset-1"] + assert self.config.sets[0].dataset_ids == ["dataset-1"] assert http_client.headers.get("X-Test-Client") == "dify-api" return [Tool(knowledge_tool, name="knowledge_base_search")] @@ -1055,8 +1055,15 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest deps={"execution_context": "execution_context"}, config=DifyKnowledgeBaseLayerConfig.model_validate( { - "dataset_ids": ["dataset-1"], - "retrieval": {"mode": "multiple", "top_k": 4}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 4}, + } + ], } ), ), diff --git a/dify-agent/tests/local/dify_agent/server/test_app.py b/dify-agent/tests/local/dify_agent/server/test_app.py index 8e40bd683b9..ea0bc3b2977 100644 --- a/dify-agent/tests/local/dify_agent/server/test_app.py +++ b/dify-agent/tests/local/dify_agent/server/test_app.py @@ -231,8 +231,15 @@ def test_create_app_creates_scheduler_and_closes_after_shutdown(monkeypatch: pyt knowledge_layer = knowledge_provider.create_layer( DifyKnowledgeBaseLayerConfig.model_validate( { - "dataset_ids": ["dataset-1"], - "retrieval": {"mode": "multiple", "top_k": 2}, + "sets": [ + { + "id": "support", + "name": "Support KB", + "datasets": [{"id": "dataset-1"}], + "query": {"mode": "generated_query"}, + "retrieval": {"mode": "multiple", "top_k": 2}, + } + ], } ) ) diff --git a/dify-agent/tests/local/dify_agent/test_import_boundaries.py b/dify-agent/tests/local/dify_agent/test_import_boundaries.py index 104f12031f0..c24941fae7f 100644 --- a/dify-agent/tests/local/dify_agent/test_import_boundaries.py +++ b/dify-agent/tests/local/dify_agent/test_import_boundaries.py @@ -115,7 +115,7 @@ def test_protocol_and_dify_plugin_exports_do_not_import_server_only_modules() -> "assert dify_agent_layers_execution_context.__all__ == ['DIFY_EXECUTION_CONTEXT_LAYER_TYPE_ID', 'DifyExecutionContextAgentMode', 'DifyExecutionContextInvokeFrom', 'DifyExecutionContextLayerConfig', 'DifyExecutionContextUserFrom']", "assert dify_agent_layers_ask_human.__all__ == ['AskHumanAction', 'AskHumanActionStyle', 'AskHumanField', 'AskHumanFieldType', 'AskHumanFileField', 'AskHumanFileListField', 'AskHumanParagraphField', 'AskHumanResultStatus', 'AskHumanSelectField', 'AskHumanSelectOption', 'AskHumanSelectedAction', 'AskHumanToolArgs', 'AskHumanToolResult', 'AskHumanUrgency', 'DEFAULT_ASK_HUMAN_TOOL_DESCRIPTION', 'DIFY_ASK_HUMAN_LAYER_TYPE_ID', 'DifyAskHumanLayerConfig']", "assert dify_agent_layers_dify_plugin.__all__ == ['DIFY_PLUGIN_LLM_LAYER_TYPE_ID', 'DIFY_PLUGIN_TOOLS_LAYER_TYPE_ID', 'DifyPluginCredentialValue', 'DifyPluginLLMLayerConfig', 'DifyPluginToolCredentialType', 'DifyPluginToolConfig', 'DifyPluginToolOption', 'DifyPluginToolParameter', 'DifyPluginToolParameterForm', 'DifyPluginToolParameterType', 'DifyPluginToolsLayerConfig', 'DifyPluginToolValue']", - "assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig']", + "assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeDatasetConfig', 'DifyKnowledgeEagerResult', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeQueryConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig', 'DifyKnowledgeRuntimeState', 'DifyKnowledgeSetConfig']", "assert dify_agent_layers_output.__all__ == ['DIFY_OUTPUT_LAYER_TYPE_ID', 'DifyOutputLayerConfig']", "assert dify_agent_layers_shell.__all__ == ['DIFY_SHELL_LAYER_TYPE_ID', 'DifyShellCliToolConfig', 'DifyShellEnvVarConfig', 'DifyShellLayerConfig', 'DifyShellSandboxConfig', 'DifyShellSecretRefConfig']", ], diff --git a/packages/contracts/generated/api/console/agent/types.gen.ts b/packages/contracts/generated/api/console/agent/types.gen.ts index aa21f2ce651..4d4c83e1a71 100644 --- a/packages/contracts/generated/api/console/agent/types.gen.ts +++ b/packages/contracts/generated/api/console/agent/types.gen.ts @@ -563,7 +563,7 @@ export type AgentComposerSoulCandidatesResponse = { cli_tools?: Array dify_tools?: Array human_contacts?: Array - knowledge_datasets?: Array + knowledge_sets?: Array } export type ComposerCandidateCapabilities = { @@ -926,9 +926,7 @@ export type AgentSoulHumanConfig = { } export type AgentSoulKnowledgeConfig = { - datasets?: Array - query_config?: AgentKnowledgeQueryConfig - query_mode?: AgentKnowledgeQueryMode | null + sets?: Array } export type AgentSoulMemoryConfig = { @@ -1069,11 +1067,12 @@ export type AgentComposerDifyToolCandidateResponse = { tools_count?: number | null } -export type AgentKnowledgeDatasetConfig = { +export type AgentComposerKnowledgeSetCandidateResponse = { + datasets?: Array description?: string | null - id?: string | null - name?: string | null - [key: string]: unknown + id: string + missing_dataset_ids?: Array + name: string } export type AgentModerationProviderConfig = { @@ -1228,16 +1227,16 @@ export type AgentHumanToolConfig = { [key: string]: unknown } -export type AgentKnowledgeQueryConfig = { - query?: string | null - score_threshold?: number | null - score_threshold_enabled?: boolean | null - top_k?: number | null - [key: string]: unknown +export type AgentKnowledgeSetConfig = { + datasets: Array + description?: string | null + id: string + metadata_filtering?: AgentKnowledgeMetadataFilteringConfig + name: string + query: AgentKnowledgeQueryConfig + retrieval: AgentKnowledgeRetrievalConfig } -export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query' - export type AgentMemoryArtifactConfig = { id?: string | null name?: string | null @@ -1376,6 +1375,13 @@ export type AgentPermissionConfig = { export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown' +export type AgentComposerKnowledgeDatasetCandidateResponse = { + description?: string | null + id?: string | null + missing?: boolean + name?: string | null +} + export type AgentModerationIoConfig = { enabled?: boolean preset_response?: string | null @@ -1404,6 +1410,34 @@ export type FormInputConfig export type JsonValue2 = unknown +export type AgentKnowledgeDatasetConfig = { + description?: string | null + id?: string | null + name?: string | null +} + +export type AgentKnowledgeMetadataFilteringConfig = { + conditions?: AgentKnowledgeMetadataConditions | null + mode?: 'automatic' | 'disabled' | 'manual' + model_config?: AgentKnowledgeModelConfig | null +} + +export type AgentKnowledgeQueryConfig = { + mode: AgentKnowledgeQueryMode + value?: string | null +} + +export type AgentKnowledgeRetrievalConfig = { + mode: 'multiple' | 'single' + model?: AgentKnowledgeModelConfig | null + reranking_enable?: boolean + reranking_mode?: string + reranking_model?: AgentKnowledgeRerankingModelConfig | null + score_threshold?: number | null + top_k?: number | null + weights?: AgentKnowledgeWeightedScoreConfig | null +} + export type AgentModelResponseFormatConfig = { type?: string | null [key: string]: unknown @@ -1454,6 +1488,38 @@ export type FileListInputConfig = { type?: 'file-list' } +export type AgentKnowledgeMetadataConditions = { + conditions?: Array + logical_operator?: 'and' | 'or' +} + +export type AgentKnowledgeModelConfig = { + completion_params?: { + [key: string]: unknown + } + mode: string + name: string + provider: string +} + +export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query' + +export type AgentKnowledgeRerankingModelConfig = { + model: string + provider: string +} + +export type AgentKnowledgeWeightedScoreConfig = { + keyword_setting?: { + [key: string]: unknown + } | null + vector_setting?: { + [key: string]: unknown + } | null + weight_type?: string | null + [key: string]: unknown +} + export type StringSource = { selector?: Array type: ValueSourceType @@ -1470,6 +1536,30 @@ export type FileType = 'audio' | 'custom' | 'document' | 'image' | 'video' export type FileTransferMethod = 'datasource_file' | 'local_file' | 'remote_url' | 'tool_file' +export type AgentKnowledgeMetadataCondition = { + comparison_operator: + | '<' + | '=' + | '>' + | 'after' + | 'before' + | 'contains' + | 'empty' + | 'end with' + | 'in' + | 'is' + | 'is not' + | 'not contains' + | 'not empty' + | 'not in' + | 'start with' + | '≠' + | '≤' + | '≥' + name: string + value?: string | Array | number | null +} + export type ValueSourceType = 'constant' | 'variable' export type AgentAppPaginationWritable = { diff --git a/packages/contracts/generated/api/console/agent/zod.gen.ts b/packages/contracts/generated/api/console/agent/zod.gen.ts index cb4107f2d53..43e4f61fc4e 100644 --- a/packages/contracts/generated/api/console/agent/zod.gen.ts +++ b/packages/contracts/generated/api/console/agent/zod.gen.ts @@ -1022,15 +1022,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({ tools_count: z.int().nullish(), }) -/** - * AgentKnowledgeDatasetConfig - */ -export const zAgentKnowledgeDatasetConfig = z.object({ - description: z.string().nullish(), - id: z.string().max(255).nullish(), - name: z.string().max(255).nullish(), -}) - /** * SimpleAccount */ @@ -1279,30 +1270,6 @@ export const zAgentSoulHumanConfig = z.object({ tools: z.array(zAgentHumanToolConfig).optional(), }) -/** - * AgentKnowledgeQueryConfig - */ -export const zAgentKnowledgeQueryConfig = z.object({ - query: z.string().nullish(), - score_threshold: z.number().gte(0).lte(1).nullish(), - score_threshold_enabled: z.boolean().nullish(), - top_k: z.int().gte(1).nullish(), -}) - -/** - * AgentKnowledgeQueryMode - */ -export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query']) - -/** - * AgentSoulKnowledgeConfig - */ -export const zAgentSoulKnowledgeConfig = z.object({ - datasets: z.array(zAgentKnowledgeDatasetConfig).optional(), - query_config: zAgentKnowledgeQueryConfig.optional(), - query_mode: zAgentKnowledgeQueryMode.nullish(), -}) - /** * AgentMemoryArtifactConfig */ @@ -1521,6 +1488,27 @@ export const zAgentCliToolConfig = z.object({ tool_name: z.string().max(255).nullish(), }) +/** + * AgentComposerKnowledgeDatasetCandidateResponse + */ +export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({ + description: z.string().nullish(), + id: z.string().max(255).nullish(), + missing: z.boolean().optional().default(false), + name: z.string().max(255).nullish(), +}) + +/** + * AgentComposerKnowledgeSetCandidateResponse + */ +export const zAgentComposerKnowledgeSetCandidateResponse = z.object({ + datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(), + description: z.string().nullish(), + id: z.string(), + missing_dataset_ids: z.array(z.string()).optional(), + name: z.string(), +}) + /** * AgentComposerSoulCandidatesResponse */ @@ -1528,7 +1516,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({ cli_tools: z.array(zAgentCliToolConfig).optional(), dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(), human_contacts: z.array(zAgentHumanContactConfig).optional(), - knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(), + knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(), }) /** @@ -1583,6 +1571,15 @@ export const zHumanInputFormSubmissionData = z.object({ submitted_data: z.record(z.string(), zJsonValue2).nullish(), }) +/** + * AgentKnowledgeDatasetConfig + */ +export const zAgentKnowledgeDatasetConfig = z.object({ + description: z.string().nullish(), + id: z.string().max(255).nullish(), + name: z.string().max(255).nullish(), +}) + /** * AgentModelResponseFormatConfig */ @@ -1733,53 +1730,6 @@ export const zAgentSoulToolsConfig = z.object({ dify_tools: z.array(zAgentSoulDifyToolConfig).optional(), }) -/** - * AgentSoulConfig - */ -export const zAgentSoulConfig = z.object({ - app_features: zAgentSoulAppFeaturesConfig.optional(), - app_variables: z.array(zAppVariableConfig).optional(), - env: zAgentSoulEnvConfig.optional(), - human: zAgentSoulHumanConfig.optional(), - knowledge: zAgentSoulKnowledgeConfig.optional(), - memory: zAgentSoulMemoryConfig.optional(), - misc_legacy: zAgentSoulAppFeaturesConfig.optional(), - model: zAgentSoulModelConfig.nullish(), - prompt: zAgentSoulPromptConfig.optional(), - sandbox: zAgentSoulSandboxConfig.optional(), - schema_version: z.int().optional().default(1), - tools: zAgentSoulToolsConfig.optional(), -}) - -/** - * AgentAppComposerResponse - */ -export const zAgentAppComposerResponse = z.object({ - active_config_snapshot: zAgentConfigSnapshotSummaryResponse, - agent: zAgentComposerAgentResponse, - agent_soul: zAgentSoulConfig, - save_options: z.array(zComposerSaveStrategy), - validation: zComposerValidationFindingsResponse.nullish(), - variant: z.literal('agent_app'), -}) - -/** - * AgentConfigSnapshotDetailResponse - */ -export const zAgentConfigSnapshotDetailResponse = z.object({ - agent_id: z.string().nullish(), - config_snapshot: zAgentSoulConfig, - created_at: z.int().nullish(), - created_by: z.string().nullish(), - display_version: z.int().nullish(), - id: z.string(), - revisions: z.array(zAgentConfigRevisionResponse).optional(), - snapshot_version: z.int().nullish(), - summary: z.string().nullish(), - version: z.int(), - version_note: z.string().nullish(), -}) - /** * OutputErrorStrategy * @@ -1869,22 +1819,6 @@ export const zWorkflowNodeJobConfig = z.object({ workflow_prompt: z.string().optional().default(''), }) -/** - * ComposerSavePayload - */ -export const zComposerSavePayload = z.object({ - agent_soul: zAgentSoulConfig.nullish(), - binding: zComposerBindingPayload.nullish(), - client_revision_id: z.string().nullish(), - idempotency_key: z.string().nullish(), - new_agent_name: z.string().min(1).max(255).nullish(), - node_job: zWorkflowNodeJobConfig.nullish(), - save_strategy: zComposerSaveStrategy, - soul_lock: zComposerSoulLockPayload.optional(), - variant: zComposerVariant, - version_note: z.string().nullish(), -}) - /** * ButtonStyle * @@ -1903,6 +1837,60 @@ export const zUserActionConfig = z.object({ title: z.string().max(100), }) +/** + * AgentKnowledgeModelConfig + */ +export const zAgentKnowledgeModelConfig = z.object({ + completion_params: z.record(z.string(), z.unknown()).optional(), + mode: z.string().min(1).max(64), + name: z.string().min(1).max(255), + provider: z.string().min(1).max(255), +}) + +/** + * AgentKnowledgeQueryMode + */ +export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query']) + +/** + * AgentKnowledgeQueryConfig + */ +export const zAgentKnowledgeQueryConfig = z.object({ + mode: zAgentKnowledgeQueryMode, + value: z.string().nullish(), +}) + +/** + * AgentKnowledgeRerankingModelConfig + */ +export const zAgentKnowledgeRerankingModelConfig = z.object({ + model: z.string().min(1).max(255), + provider: z.string().min(1).max(255), +}) + +/** + * AgentKnowledgeWeightedScoreConfig + */ +export const zAgentKnowledgeWeightedScoreConfig = z.object({ + keyword_setting: z.record(z.string(), z.unknown()).nullish(), + vector_setting: z.record(z.string(), z.unknown()).nullish(), + weight_type: z.string().max(64).nullish(), +}) + +/** + * AgentKnowledgeRetrievalConfig + */ +export const zAgentKnowledgeRetrievalConfig = z.object({ + mode: z.enum(['multiple', 'single']), + model: zAgentKnowledgeModelConfig.nullish(), + reranking_enable: z.boolean().optional().default(true), + reranking_mode: z.string().optional().default('reranking_model'), + reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(), + score_threshold: z.number().gte(0).lte(1).nullish(), + top_k: z.int().gte(1).nullish(), + weights: zAgentKnowledgeWeightedScoreConfig.nullish(), +}) + /** * FileType */ @@ -1941,6 +1929,134 @@ export const zFileListInputConfig = z.object({ type: z.literal('file-list').optional().default('file-list'), }) +/** + * AgentKnowledgeMetadataCondition + */ +export const zAgentKnowledgeMetadataCondition = z.object({ + comparison_operator: z.enum([ + '<', + '=', + '>', + 'after', + 'before', + 'contains', + 'empty', + 'end with', + 'in', + 'is', + 'is not', + 'not contains', + 'not empty', + 'not in', + 'start with', + '≠', + '≤', + '≥', + ]), + name: z.string().min(1).max(255), + value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(), +}) + +/** + * AgentKnowledgeMetadataConditions + */ +export const zAgentKnowledgeMetadataConditions = z.object({ + conditions: z.array(zAgentKnowledgeMetadataCondition).optional(), + logical_operator: z.enum(['and', 'or']).optional().default('and'), +}) + +/** + * AgentKnowledgeMetadataFilteringConfig + */ +export const zAgentKnowledgeMetadataFilteringConfig = z.object({ + conditions: zAgentKnowledgeMetadataConditions.nullish(), + mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'), + model_config: zAgentKnowledgeModelConfig.nullish(), +}) + +/** + * AgentKnowledgeSetConfig + */ +export const zAgentKnowledgeSetConfig = z.object({ + datasets: z.array(zAgentKnowledgeDatasetConfig), + description: z.string().nullish(), + id: z.string().min(1).max(255), + metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(), + name: z.string().min(1).max(255), + query: zAgentKnowledgeQueryConfig, + retrieval: zAgentKnowledgeRetrievalConfig, +}) + +/** + * AgentSoulKnowledgeConfig + */ +export const zAgentSoulKnowledgeConfig = z.object({ + sets: z.array(zAgentKnowledgeSetConfig).optional(), +}) + +/** + * AgentSoulConfig + */ +export const zAgentSoulConfig = z.object({ + app_features: zAgentSoulAppFeaturesConfig.optional(), + app_variables: z.array(zAppVariableConfig).optional(), + env: zAgentSoulEnvConfig.optional(), + human: zAgentSoulHumanConfig.optional(), + knowledge: zAgentSoulKnowledgeConfig.optional(), + memory: zAgentSoulMemoryConfig.optional(), + misc_legacy: zAgentSoulAppFeaturesConfig.optional(), + model: zAgentSoulModelConfig.nullish(), + prompt: zAgentSoulPromptConfig.optional(), + sandbox: zAgentSoulSandboxConfig.optional(), + schema_version: z.int().optional().default(1), + tools: zAgentSoulToolsConfig.optional(), +}) + +/** + * AgentAppComposerResponse + */ +export const zAgentAppComposerResponse = z.object({ + active_config_snapshot: zAgentConfigSnapshotSummaryResponse, + agent: zAgentComposerAgentResponse, + agent_soul: zAgentSoulConfig, + save_options: z.array(zComposerSaveStrategy), + validation: zComposerValidationFindingsResponse.nullish(), + variant: z.literal('agent_app'), +}) + +/** + * ComposerSavePayload + */ +export const zComposerSavePayload = z.object({ + agent_soul: zAgentSoulConfig.nullish(), + binding: zComposerBindingPayload.nullish(), + client_revision_id: z.string().nullish(), + idempotency_key: z.string().nullish(), + new_agent_name: z.string().min(1).max(255).nullish(), + node_job: zWorkflowNodeJobConfig.nullish(), + save_strategy: zComposerSaveStrategy, + soul_lock: zComposerSoulLockPayload.optional(), + variant: zComposerVariant, + version_note: z.string().nullish(), +}) + +/** + * AgentConfigSnapshotDetailResponse + */ +export const zAgentConfigSnapshotDetailResponse = z.object({ + agent_id: z.string().nullish(), + config_snapshot: zAgentSoulConfig, + created_at: z.int().nullish(), + created_by: z.string().nullish(), + display_version: z.int().nullish(), + id: z.string(), + revisions: z.array(zAgentConfigRevisionResponse).optional(), + snapshot_version: z.int().nullish(), + summary: z.string().nullish(), + version: z.int(), + version_note: z.string().nullish(), +}) + /** * ValueSourceType * diff --git a/packages/contracts/generated/api/console/apps/types.gen.ts b/packages/contracts/generated/api/console/apps/types.gen.ts index fa56590f0a4..9b31296275b 100644 --- a/packages/contracts/generated/api/console/apps/types.gen.ts +++ b/packages/contracts/generated/api/console/apps/types.gen.ts @@ -1890,7 +1890,7 @@ export type AgentComposerSoulCandidatesResponse = { cli_tools?: Array dify_tools?: Array human_contacts?: Array - knowledge_datasets?: Array + knowledge_sets?: Array } export type ComposerCandidateCapabilities = { @@ -2124,9 +2124,7 @@ export type AgentSoulHumanConfig = { } export type AgentSoulKnowledgeConfig = { - datasets?: Array - query_config?: AgentKnowledgeQueryConfig - query_mode?: AgentKnowledgeQueryMode | null + sets?: Array } export type AgentSoulMemoryConfig = { @@ -2278,11 +2276,12 @@ export type AgentComposerDifyToolCandidateResponse = { tools_count?: number | null } -export type AgentKnowledgeDatasetConfig = { +export type AgentComposerKnowledgeSetCandidateResponse = { + datasets?: Array description?: string | null - id?: string | null - name?: string | null - [key: string]: unknown + id: string + missing_dataset_ids?: Array + name: string } export type CheckResultView = { @@ -2393,16 +2392,16 @@ export type AgentHumanToolConfig = { [key: string]: unknown } -export type AgentKnowledgeQueryConfig = { - query?: string | null - score_threshold?: number | null - score_threshold_enabled?: boolean | null - top_k?: number | null - [key: string]: unknown +export type AgentKnowledgeSetConfig = { + datasets: Array + description?: string | null + id: string + metadata_filtering?: AgentKnowledgeMetadataFilteringConfig + name: string + query: AgentKnowledgeQueryConfig + retrieval: AgentKnowledgeRetrievalConfig } -export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query' - export type AgentMemoryArtifactConfig = { id?: string | null name?: string | null @@ -2506,6 +2505,13 @@ export type AgentPermissionConfig = { export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown' +export type AgentComposerKnowledgeDatasetCandidateResponse = { + description?: string | null + id?: string | null + missing?: boolean + name?: string | null +} + export type ButtonStyle = 'accent' | 'default' | 'ghost' | 'primary' export type ParagraphInputConfig = { @@ -2545,6 +2551,34 @@ export type AgentModerationProviderConfig = { [key: string]: unknown } +export type AgentKnowledgeDatasetConfig = { + description?: string | null + id?: string | null + name?: string | null +} + +export type AgentKnowledgeMetadataFilteringConfig = { + conditions?: AgentKnowledgeMetadataConditions | null + mode?: 'automatic' | 'disabled' | 'manual' + model_config?: AgentKnowledgeModelConfig | null +} + +export type AgentKnowledgeQueryConfig = { + mode: AgentKnowledgeQueryMode + value?: string | null +} + +export type AgentKnowledgeRetrievalConfig = { + mode: 'multiple' | 'single' + model?: AgentKnowledgeModelConfig | null + reranking_enable?: boolean + reranking_mode?: string + reranking_model?: AgentKnowledgeRerankingModelConfig | null + score_threshold?: number | null + top_k?: number | null + weights?: AgentKnowledgeWeightedScoreConfig | null +} + export type AgentModelResponseFormatConfig = { type?: string | null [key: string]: unknown @@ -2578,8 +2612,64 @@ export type AgentModerationIoConfig = { [key: string]: unknown } +export type AgentKnowledgeMetadataConditions = { + conditions?: Array + logical_operator?: 'and' | 'or' +} + +export type AgentKnowledgeModelConfig = { + completion_params?: { + [key: string]: unknown + } + mode: string + name: string + provider: string +} + +export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query' + +export type AgentKnowledgeRerankingModelConfig = { + model: string + provider: string +} + +export type AgentKnowledgeWeightedScoreConfig = { + keyword_setting?: { + [key: string]: unknown + } | null + vector_setting?: { + [key: string]: unknown + } | null + weight_type?: string | null + [key: string]: unknown +} + export type ValueSourceType = 'constant' | 'variable' +export type AgentKnowledgeMetadataCondition = { + comparison_operator: + | '<' + | '=' + | '>' + | 'after' + | 'before' + | 'contains' + | 'empty' + | 'end with' + | 'in' + | 'is' + | 'is not' + | 'not contains' + | 'not empty' + | 'not in' + | 'start with' + | '≠' + | '≤' + | '≥' + name: string + value?: string | Array | number | null +} + export type AppPaginationWritable = { data: Array has_more: boolean diff --git a/packages/contracts/generated/api/console/apps/zod.gen.ts b/packages/contracts/generated/api/console/apps/zod.gen.ts index 043fc11261f..b3c0f05bf6b 100644 --- a/packages/contracts/generated/api/console/apps/zod.gen.ts +++ b/packages/contracts/generated/api/console/apps/zod.gen.ts @@ -2629,15 +2629,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({ tools_count: z.int().nullish(), }) -/** - * AgentKnowledgeDatasetConfig - */ -export const zAgentKnowledgeDatasetConfig = z.object({ - description: z.string().nullish(), - id: z.string().max(255).nullish(), - name: z.string().max(255).nullish(), -}) - /** * CheckResultView * @@ -2767,30 +2758,6 @@ export const zAgentSoulHumanConfig = z.object({ tools: z.array(zAgentHumanToolConfig).optional(), }) -/** - * AgentKnowledgeQueryConfig - */ -export const zAgentKnowledgeQueryConfig = z.object({ - query: z.string().nullish(), - score_threshold: z.number().gte(0).lte(1).nullish(), - score_threshold_enabled: z.boolean().nullish(), - top_k: z.int().gte(1).nullish(), -}) - -/** - * AgentKnowledgeQueryMode - */ -export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query']) - -/** - * AgentSoulKnowledgeConfig - */ -export const zAgentSoulKnowledgeConfig = z.object({ - datasets: z.array(zAgentKnowledgeDatasetConfig).optional(), - query_config: zAgentKnowledgeQueryConfig.optional(), - query_mode: zAgentKnowledgeQueryMode.nullish(), -}) - /** * AgentMemoryArtifactConfig */ @@ -3002,6 +2969,27 @@ export const zAgentCliToolConfig = z.object({ tool_name: z.string().max(255).nullish(), }) +/** + * AgentComposerKnowledgeDatasetCandidateResponse + */ +export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({ + description: z.string().nullish(), + id: z.string().max(255).nullish(), + missing: z.boolean().optional().default(false), + name: z.string().max(255).nullish(), +}) + +/** + * AgentComposerKnowledgeSetCandidateResponse + */ +export const zAgentComposerKnowledgeSetCandidateResponse = z.object({ + datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(), + description: z.string().nullish(), + id: z.string(), + missing_dataset_ids: z.array(z.string()).optional(), + name: z.string(), +}) + /** * AgentComposerSoulCandidatesResponse */ @@ -3009,7 +2997,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({ cli_tools: z.array(zAgentCliToolConfig).optional(), dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(), human_contacts: z.array(zAgentHumanContactConfig).optional(), - knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(), + knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(), }) /** @@ -3041,6 +3029,15 @@ export const zUserActionConfig = z.object({ title: z.string().max(100), }) +/** + * AgentKnowledgeDatasetConfig + */ +export const zAgentKnowledgeDatasetConfig = z.object({ + description: z.string().nullish(), + id: z.string().max(255).nullish(), + name: z.string().max(255).nullish(), +}) + /** * AgentModelResponseFormatConfig */ @@ -3292,57 +3289,57 @@ export const zAgentSoulAppFeaturesConfig = z.object({ }) /** - * AgentSoulConfig + * AgentKnowledgeModelConfig */ -export const zAgentSoulConfig = z.object({ - app_features: zAgentSoulAppFeaturesConfig.optional(), - app_variables: z.array(zAppVariableConfig).optional(), - env: zAgentSoulEnvConfig.optional(), - human: zAgentSoulHumanConfig.optional(), - knowledge: zAgentSoulKnowledgeConfig.optional(), - memory: zAgentSoulMemoryConfig.optional(), - misc_legacy: zAgentSoulAppFeaturesConfig.optional(), - model: zAgentSoulModelConfig.nullish(), - prompt: zAgentSoulPromptConfig.optional(), - sandbox: zAgentSoulSandboxConfig.optional(), - schema_version: z.int().optional().default(1), - tools: zAgentSoulToolsConfig.optional(), +export const zAgentKnowledgeModelConfig = z.object({ + completion_params: z.record(z.string(), z.unknown()).optional(), + mode: z.string().min(1).max(64), + name: z.string().min(1).max(255), + provider: z.string().min(1).max(255), }) /** - * WorkflowAgentComposerResponse + * AgentKnowledgeQueryMode */ -export const zWorkflowAgentComposerResponse = z.object({ - active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(), - agent: zAgentComposerAgentResponse.nullish(), - agent_soul: zAgentSoulConfig, - app_id: z.string().nullish(), - binding: zAgentComposerBindingResponse.nullish(), - effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(), - impact_summary: zAgentComposerImpactResponse.nullish(), - node_id: z.string().nullish(), - node_job: zWorkflowNodeJobConfig, - save_options: z.array(zComposerSaveStrategy), - soul_lock: zAgentComposerSoulLockResponse, - validation: zComposerValidationFindingsResponse.nullish(), - variant: z.literal('workflow'), - workflow_id: z.string().nullish(), +export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query']) + +/** + * AgentKnowledgeQueryConfig + */ +export const zAgentKnowledgeQueryConfig = z.object({ + mode: zAgentKnowledgeQueryMode, + value: z.string().nullish(), }) /** - * ComposerSavePayload + * AgentKnowledgeRerankingModelConfig */ -export const zComposerSavePayload = z.object({ - agent_soul: zAgentSoulConfig.nullish(), - binding: zComposerBindingPayload.nullish(), - client_revision_id: z.string().nullish(), - idempotency_key: z.string().nullish(), - new_agent_name: z.string().min(1).max(255).nullish(), - node_job: zWorkflowNodeJobConfig.nullish(), - save_strategy: zComposerSaveStrategy, - soul_lock: zComposerSoulLockPayload.optional(), - variant: zComposerVariant, - version_note: z.string().nullish(), +export const zAgentKnowledgeRerankingModelConfig = z.object({ + model: z.string().min(1).max(255), + provider: z.string().min(1).max(255), +}) + +/** + * AgentKnowledgeWeightedScoreConfig + */ +export const zAgentKnowledgeWeightedScoreConfig = z.object({ + keyword_setting: z.record(z.string(), z.unknown()).nullish(), + vector_setting: z.record(z.string(), z.unknown()).nullish(), + weight_type: z.string().max(64).nullish(), +}) + +/** + * AgentKnowledgeRetrievalConfig + */ +export const zAgentKnowledgeRetrievalConfig = z.object({ + mode: z.enum(['multiple', 'single']), + model: zAgentKnowledgeModelConfig.nullish(), + reranking_enable: z.boolean().optional().default(true), + reranking_mode: z.string().optional().default('reranking_model'), + reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(), + score_threshold: z.number().gte(0).lte(1).nullish(), + top_k: z.int().gte(1).nullish(), + weights: zAgentKnowledgeWeightedScoreConfig.nullish(), }) /** @@ -3466,6 +3463,125 @@ export const zMessageInfiniteScrollPaginationResponse = z.object({ limit: z.int(), }) +/** + * AgentKnowledgeMetadataCondition + */ +export const zAgentKnowledgeMetadataCondition = z.object({ + comparison_operator: z.enum([ + '<', + '=', + '>', + 'after', + 'before', + 'contains', + 'empty', + 'end with', + 'in', + 'is', + 'is not', + 'not contains', + 'not empty', + 'not in', + 'start with', + '≠', + '≤', + '≥', + ]), + name: z.string().min(1).max(255), + value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(), +}) + +/** + * AgentKnowledgeMetadataConditions + */ +export const zAgentKnowledgeMetadataConditions = z.object({ + conditions: z.array(zAgentKnowledgeMetadataCondition).optional(), + logical_operator: z.enum(['and', 'or']).optional().default('and'), +}) + +/** + * AgentKnowledgeMetadataFilteringConfig + */ +export const zAgentKnowledgeMetadataFilteringConfig = z.object({ + conditions: zAgentKnowledgeMetadataConditions.nullish(), + mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'), + model_config: zAgentKnowledgeModelConfig.nullish(), +}) + +/** + * AgentKnowledgeSetConfig + */ +export const zAgentKnowledgeSetConfig = z.object({ + datasets: z.array(zAgentKnowledgeDatasetConfig), + description: z.string().nullish(), + id: z.string().min(1).max(255), + metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(), + name: z.string().min(1).max(255), + query: zAgentKnowledgeQueryConfig, + retrieval: zAgentKnowledgeRetrievalConfig, +}) + +/** + * AgentSoulKnowledgeConfig + */ +export const zAgentSoulKnowledgeConfig = z.object({ + sets: z.array(zAgentKnowledgeSetConfig).optional(), +}) + +/** + * AgentSoulConfig + */ +export const zAgentSoulConfig = z.object({ + app_features: zAgentSoulAppFeaturesConfig.optional(), + app_variables: z.array(zAppVariableConfig).optional(), + env: zAgentSoulEnvConfig.optional(), + human: zAgentSoulHumanConfig.optional(), + knowledge: zAgentSoulKnowledgeConfig.optional(), + memory: zAgentSoulMemoryConfig.optional(), + misc_legacy: zAgentSoulAppFeaturesConfig.optional(), + model: zAgentSoulModelConfig.nullish(), + prompt: zAgentSoulPromptConfig.optional(), + sandbox: zAgentSoulSandboxConfig.optional(), + schema_version: z.int().optional().default(1), + tools: zAgentSoulToolsConfig.optional(), +}) + +/** + * WorkflowAgentComposerResponse + */ +export const zWorkflowAgentComposerResponse = z.object({ + active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(), + agent: zAgentComposerAgentResponse.nullish(), + agent_soul: zAgentSoulConfig, + app_id: z.string().nullish(), + binding: zAgentComposerBindingResponse.nullish(), + effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(), + impact_summary: zAgentComposerImpactResponse.nullish(), + node_id: z.string().nullish(), + node_job: zWorkflowNodeJobConfig, + save_options: z.array(zComposerSaveStrategy), + soul_lock: zAgentComposerSoulLockResponse, + validation: zComposerValidationFindingsResponse.nullish(), + variant: z.literal('workflow'), + workflow_id: z.string().nullish(), +}) + +/** + * ComposerSavePayload + */ +export const zComposerSavePayload = z.object({ + agent_soul: zAgentSoulConfig.nullish(), + binding: zComposerBindingPayload.nullish(), + client_revision_id: z.string().nullish(), + idempotency_key: z.string().nullish(), + new_agent_name: z.string().min(1).max(255).nullish(), + node_job: zWorkflowNodeJobConfig.nullish(), + save_strategy: zComposerSaveStrategy, + soul_lock: zComposerSoulLockPayload.optional(), + variant: zComposerVariant, + version_note: z.string().nullish(), +}) + /** * GeneratedAppResponse */