mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 23:01:11 +08:00
feat: wire agent v2 knowledge sets backend
This commit is contained in:
parent
c3cb134e73
commit
c06d924094
@ -312,7 +312,7 @@ class AgentBackendRunRequestBuilder:
|
||||
)
|
||||
)
|
||||
|
||||
if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
|
||||
if run_input.knowledge is not None and run_input.knowledge.sets:
|
||||
layers.append(
|
||||
RunLayerSpec(
|
||||
name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
|
||||
@ -513,7 +513,7 @@ class AgentBackendRunRequestBuilder:
|
||||
)
|
||||
)
|
||||
|
||||
if run_input.knowledge is not None and run_input.knowledge.dataset_ids:
|
||||
if run_input.knowledge is not None and run_input.knowledge.sets:
|
||||
layers.append(
|
||||
RunLayerSpec(
|
||||
name=DIFY_KNOWLEDGE_BASE_LAYER_ID,
|
||||
|
||||
@ -105,6 +105,7 @@ class WorkflowAgentComposerValidateApi(Resource):
|
||||
def post(self, tenant_id: str, app_model: App, node_id: str):
|
||||
payload = ComposerSavePayload.model_validate(console_ns.payload or {})
|
||||
ComposerConfigValidator.validate_save_payload(payload)
|
||||
AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
|
||||
findings = AgentComposerService.collect_validation_findings(
|
||||
tenant_id=tenant_id,
|
||||
payload=payload,
|
||||
@ -239,6 +240,7 @@ class AgentComposerValidateApi(Resource):
|
||||
_resolve_agent_app_id(tenant_id=tenant_id, agent_id=agent_id)
|
||||
payload = ComposerSavePayload.model_validate(console_ns.payload or {})
|
||||
ComposerConfigValidator.validate_save_payload(payload)
|
||||
AgentComposerService.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
|
||||
findings = AgentComposerService.collect_validation_findings(
|
||||
tenant_id=tenant_id,
|
||||
payload=payload,
|
||||
|
||||
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
from typing import Any
|
||||
|
||||
from models.agent_config_entities import AgentSoulConfig
|
||||
from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids
|
||||
|
||||
SUPPORTED_AGENT_BACKEND_FEATURES = frozenset(
|
||||
{
|
||||
@ -48,9 +49,7 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
|
||||
)
|
||||
|
||||
reserved_status = dict.fromkeys(sorted(RESERVED_AGENT_BACKEND_FEATURES), "reserved_not_executed")
|
||||
reserved_status["knowledge"] = (
|
||||
"supported_by_knowledge_layer" if list_configured_knowledge_dataset_ids(agent_soul) else "not_configured"
|
||||
)
|
||||
reserved_status["knowledge"] = "supported_by_knowledge_layer" if agent_soul.knowledge.sets else "not_configured"
|
||||
reserved_status["tools.dify_tools"] = "supported_when_config_valid"
|
||||
reserved_status["tools.cli_tools"] = "supported_by_shell_bootstrap"
|
||||
reserved_status["env"] = "supported_by_shell_bootstrap"
|
||||
@ -66,14 +65,14 @@ def build_runtime_feature_manifest(agent_soul: AgentSoulConfig) -> dict[str, Any
|
||||
|
||||
|
||||
def list_configured_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
|
||||
"""Return the normalized knowledge dataset ids that can produce a runtime layer.
|
||||
"""Return normalized dataset ids selected by Agent v2 knowledge sets.
|
||||
|
||||
``build_runtime_feature_manifest()`` and ``build_knowledge_layer_config()``
|
||||
must stay aligned: both decide knowledge support from this effective,
|
||||
non-blank dataset-id set rather than from raw
|
||||
``agent_soul.knowledge.datasets`` entries.
|
||||
stay aligned on the set-based contract: DTO validation rejects blank dataset
|
||||
ids before runtime, so this helper only flattens configured set datasets for
|
||||
metadata/diagnostic surfaces that still need a dataset-id summary.
|
||||
"""
|
||||
return [dataset_id for dataset in agent_soul.knowledge.datasets if (dataset_id := (dataset.id or "").strip())]
|
||||
return list_agent_soul_knowledge_dataset_ids(agent_soul)
|
||||
|
||||
|
||||
def _get_nested(value: dict[str, Any], path: str) -> Any:
|
||||
|
||||
@ -15,7 +15,16 @@ from dify_agent.layers.execution_context import (
|
||||
DifyExecutionContextLayerConfig,
|
||||
DifyExecutionContextUserFrom,
|
||||
)
|
||||
from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig, DifyKnowledgeRetrievalConfig
|
||||
from dify_agent.layers.knowledge import (
|
||||
DifyKnowledgeBaseLayerConfig,
|
||||
DifyKnowledgeDatasetConfig,
|
||||
DifyKnowledgeMetadataFilteringConfig,
|
||||
DifyKnowledgeModelConfig,
|
||||
DifyKnowledgeQueryConfig,
|
||||
DifyKnowledgeRerankingModelConfig,
|
||||
DifyKnowledgeRetrievalConfig,
|
||||
DifyKnowledgeSetConfig,
|
||||
)
|
||||
from dify_agent.layers.shell import (
|
||||
DifyShellCliToolConfig,
|
||||
DifyShellEnvVarConfig,
|
||||
@ -40,7 +49,9 @@ from graphon.file import FileTransferMethod
|
||||
from graphon.variables.segments import Segment
|
||||
from models.agent import Agent, AgentConfigSnapshot, WorkflowAgentNodeBinding
|
||||
from models.agent_config_entities import (
|
||||
AgentKnowledgeQueryConfig,
|
||||
AgentKnowledgeMetadataFilteringConfig,
|
||||
AgentKnowledgeModelConfig,
|
||||
AgentKnowledgeRetrievalConfig,
|
||||
AgentSoulConfig,
|
||||
DeclaredArrayItem,
|
||||
DeclaredOutputChildConfig,
|
||||
@ -547,42 +558,84 @@ def build_shell_layer_config(agent_soul: AgentSoulConfig) -> DifyShellLayerConfi
|
||||
|
||||
|
||||
def build_knowledge_layer_config(agent_soul: AgentSoulConfig) -> DifyKnowledgeBaseLayerConfig | None:
|
||||
"""Map Agent Soul knowledge config into the fixed Dify knowledge-base layer.
|
||||
"""Map Agent Soul knowledge sets into one Dify knowledge-base layer.
|
||||
|
||||
Normalization intentionally matches the current dify-agent runtime contract:
|
||||
|
||||
- blank or missing dataset ids are ignored;
|
||||
- if no valid dataset ids remain, no knowledge layer is injected;
|
||||
- retrieval mode is always forced to ``multiple`` in this first wiring pass;
|
||||
- ``top_k`` falls back to a stable runtime default when the soul omits it;
|
||||
- ``score_threshold`` is only forwarded when the product config explicitly
|
||||
enables it, otherwise the layer keeps the disabled/default ``0.0`` value;
|
||||
- metadata filtering stays at the layer DTO default (disabled).
|
||||
Agent Soul DTO validation owns malformed set rejection. Runtime mapping is
|
||||
intentionally lossless: every configured set is forwarded with its query
|
||||
policy, dataset refs, retrieval controls, and metadata-filtering controls.
|
||||
``score_threshold=None`` means disabled threshold filtering and maps to the
|
||||
inner retrieval request's ``0.0`` default through the Agent backend DTO.
|
||||
"""
|
||||
dataset_ids = list_configured_knowledge_dataset_ids(agent_soul)
|
||||
if not dataset_ids:
|
||||
if not agent_soul.knowledge.sets:
|
||||
return None
|
||||
|
||||
query_config = agent_soul.knowledge.query_config
|
||||
return DifyKnowledgeBaseLayerConfig(
|
||||
dataset_ids=dataset_ids,
|
||||
retrieval=DifyKnowledgeRetrievalConfig(
|
||||
mode="multiple",
|
||||
top_k=_knowledge_top_k(query_config),
|
||||
score_threshold=_knowledge_score_threshold(query_config),
|
||||
),
|
||||
sets=[
|
||||
DifyKnowledgeSetConfig(
|
||||
id=knowledge_set.id,
|
||||
name=knowledge_set.name,
|
||||
description=knowledge_set.description,
|
||||
datasets=[
|
||||
DifyKnowledgeDatasetConfig(
|
||||
id=dataset.id or "",
|
||||
name=dataset.name,
|
||||
description=dataset.description,
|
||||
)
|
||||
for dataset in knowledge_set.datasets
|
||||
],
|
||||
query=DifyKnowledgeQueryConfig(
|
||||
mode=cast(Literal["user_query", "generated_query"], knowledge_set.query.mode.value),
|
||||
value=knowledge_set.query.value,
|
||||
),
|
||||
retrieval=_knowledge_retrieval_config(knowledge_set.retrieval),
|
||||
metadata_filtering=_knowledge_metadata_filtering_config(knowledge_set.metadata_filtering),
|
||||
)
|
||||
for knowledge_set in agent_soul.knowledge.sets
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _knowledge_top_k(query_config: AgentKnowledgeQueryConfig) -> int:
|
||||
top_k = query_config.top_k
|
||||
return top_k if isinstance(top_k, int) and top_k >= 1 else 4
|
||||
def _knowledge_retrieval_config(retrieval: AgentKnowledgeRetrievalConfig) -> DifyKnowledgeRetrievalConfig:
|
||||
return DifyKnowledgeRetrievalConfig(
|
||||
mode=retrieval.mode,
|
||||
top_k=retrieval.top_k,
|
||||
score_threshold=retrieval.score_threshold or 0.0,
|
||||
reranking_mode=retrieval.reranking_mode,
|
||||
reranking_enable=retrieval.reranking_enable,
|
||||
reranking_model=DifyKnowledgeRerankingModelConfig(
|
||||
provider=retrieval.reranking_model.provider,
|
||||
model=retrieval.reranking_model.model,
|
||||
)
|
||||
if retrieval.reranking_model is not None
|
||||
else None,
|
||||
weights=cast(dict[str, Any], retrieval.weights.model_dump(mode="json", exclude_none=True))
|
||||
if retrieval.weights is not None
|
||||
else None,
|
||||
model=_knowledge_model_config(retrieval.model),
|
||||
)
|
||||
|
||||
|
||||
def _knowledge_score_threshold(query_config: AgentKnowledgeQueryConfig) -> float:
|
||||
if query_config.score_threshold_enabled and query_config.score_threshold is not None:
|
||||
return query_config.score_threshold
|
||||
return 0.0
|
||||
def _knowledge_metadata_filtering_config(
|
||||
metadata_filtering: AgentKnowledgeMetadataFilteringConfig,
|
||||
) -> DifyKnowledgeMetadataFilteringConfig:
|
||||
return DifyKnowledgeMetadataFilteringConfig(
|
||||
mode=metadata_filtering.mode,
|
||||
model_config=_knowledge_model_config(metadata_filtering.metadata_model_config),
|
||||
conditions=cast(Any, metadata_filtering.conditions.model_dump(mode="json"))
|
||||
if metadata_filtering.conditions is not None
|
||||
else None,
|
||||
)
|
||||
|
||||
|
||||
def _knowledge_model_config(model: AgentKnowledgeModelConfig | None) -> DifyKnowledgeModelConfig | None:
|
||||
if model is None:
|
||||
return None
|
||||
return DifyKnowledgeModelConfig(
|
||||
provider=model.provider,
|
||||
name=model.name,
|
||||
mode=model.mode,
|
||||
completion_params=model.completion_params,
|
||||
)
|
||||
|
||||
|
||||
def build_ask_human_layer_config(agent_soul: AgentSoulConfig) -> DifyAskHumanLayerConfig | None:
|
||||
|
||||
@ -18,6 +18,7 @@ from models.agent_config_entities import (
|
||||
)
|
||||
from models.model import UploadFile
|
||||
from models.workflow import Workflow
|
||||
from services.agent.knowledge_datasets import list_missing_tenant_knowledge_dataset_ids
|
||||
|
||||
from .entities import DifyAgentNodeData
|
||||
|
||||
@ -146,6 +147,7 @@ class WorkflowAgentNodeValidator:
|
||||
)
|
||||
cls._validate_agent_soul_env(binding=binding, agent_soul=agent_soul)
|
||||
cls._validate_agent_soul_tools(binding=binding, agent_soul=agent_soul)
|
||||
cls._validate_agent_soul_knowledge(binding=binding, agent_soul=agent_soul)
|
||||
node_job = WorkflowNodeJobConfig.model_validate(binding.node_job_config_dict)
|
||||
cls.validate_node_job(session=session, binding=binding, node_job=node_job, topology=topology)
|
||||
|
||||
@ -364,6 +366,24 @@ class WorkflowAgentNodeValidator:
|
||||
)
|
||||
cli_tool_names.add(normalized_name)
|
||||
|
||||
@classmethod
|
||||
def _validate_agent_soul_knowledge(
|
||||
cls,
|
||||
*,
|
||||
binding: WorkflowAgentNodeBinding,
|
||||
agent_soul: AgentSoulConfig,
|
||||
) -> None:
|
||||
"""Validate knowledge set dataset rows against the publishing tenant."""
|
||||
missing_ids = list_missing_tenant_knowledge_dataset_ids(
|
||||
tenant_id=binding.tenant_id,
|
||||
agent_soul=agent_soul,
|
||||
)
|
||||
if missing_ids:
|
||||
raise WorkflowAgentNodeValidationError(
|
||||
f"Workflow Agent node {binding.node_id} references missing or out-of-scope knowledge datasets: "
|
||||
f"{', '.join(missing_ids)}."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _validate_agent_soul_env(
|
||||
cls,
|
||||
|
||||
@ -400,10 +400,22 @@ class AgentComposerNodeJobCandidatesResponse(ResponseModel):
|
||||
human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AgentComposerKnowledgeDatasetCandidateResponse(AgentKnowledgeDatasetConfig):
|
||||
missing: bool = False
|
||||
|
||||
|
||||
class AgentComposerKnowledgeSetCandidateResponse(ResponseModel):
|
||||
id: str
|
||||
name: str
|
||||
description: str | None = None
|
||||
datasets: list[AgentComposerKnowledgeDatasetCandidateResponse] = Field(default_factory=list)
|
||||
missing_dataset_ids: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AgentComposerSoulCandidatesResponse(ResponseModel):
|
||||
dify_tools: list[AgentComposerDifyToolCandidateResponse] = Field(default_factory=list)
|
||||
cli_tools: list[AgentCliToolConfig] = Field(default_factory=list)
|
||||
knowledge_datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
|
||||
knowledge_sets: list[AgentComposerKnowledgeSetCandidateResponse] = Field(default_factory=list)
|
||||
human_contacts: list[AgentHumanContactConfig] = Field(default_factory=list)
|
||||
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, Any, Final, Literal
|
||||
from typing import Annotated, Any, Final, Literal, Self
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, field_validator, model_validator
|
||||
|
||||
from core.rag.entities.metadata_entities import ConditionValue, SupportedComparisonOperator
|
||||
from core.workflow.file_reference import is_canonical_file_reference
|
||||
from graphon.file import FileTransferMethod
|
||||
|
||||
@ -236,17 +237,161 @@ class AgentCliToolConfig(AgentFlexibleConfig):
|
||||
inferred_from: str | None = Field(default=None, max_length=255)
|
||||
|
||||
|
||||
class AgentKnowledgeDatasetConfig(AgentFlexibleConfig):
|
||||
class AgentKnowledgeDatasetConfig(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
id: str | None = Field(default=None, max_length=255)
|
||||
name: str | None = Field(default=None, max_length=255)
|
||||
description: str | None = None
|
||||
|
||||
|
||||
class AgentKnowledgeQueryConfig(AgentFlexibleConfig):
|
||||
query: str | None = None
|
||||
class AgentKnowledgeQueryConfig(BaseModel):
|
||||
"""Per-set query policy for Agent v2 knowledge retrieval.
|
||||
|
||||
Agent v2 stores knowledge as explicit ``knowledge.sets`` rather than the
|
||||
legacy flat ``datasets`` / ``query_mode`` / ``query_config`` shape. Each
|
||||
set owns its own query policy, so ``user_query`` must carry an explicit
|
||||
``value`` while ``generated_query`` leaves that value empty.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
mode: AgentKnowledgeQueryMode
|
||||
value: str | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_query(self) -> Self:
|
||||
if self.mode == AgentKnowledgeQueryMode.USER_QUERY and not (self.value or "").strip():
|
||||
raise ValueError("knowledge query.value is required for user_query mode")
|
||||
return self
|
||||
|
||||
|
||||
class AgentKnowledgeModelConfig(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
provider: str = Field(min_length=1, max_length=255)
|
||||
name: str = Field(min_length=1, max_length=255)
|
||||
mode: str = Field(min_length=1, max_length=64)
|
||||
completion_params: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class AgentKnowledgeRerankingModelConfig(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
provider: str = Field(min_length=1, max_length=255)
|
||||
model: str = Field(min_length=1, max_length=255)
|
||||
|
||||
|
||||
class AgentKnowledgeWeightedScoreConfig(AgentFlexibleConfig):
|
||||
weight_type: str | None = Field(default=None, max_length=64)
|
||||
vector_setting: dict[str, Any] | None = None
|
||||
keyword_setting: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class AgentKnowledgeRetrievalConfig(BaseModel):
|
||||
"""Per-set retrieval policy for Agent v2 knowledge retrieval.
|
||||
|
||||
Retrieval settings now live on each knowledge set instead of one shared
|
||||
flat config. A set may use either ``multiple`` retrieval with ``top_k`` or
|
||||
``single`` retrieval with a required model config.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
mode: Literal["single", "multiple"]
|
||||
top_k: int | None = Field(default=None, ge=1)
|
||||
score_threshold: float | None = Field(default=None, ge=0, le=1)
|
||||
score_threshold_enabled: bool | None = None
|
||||
reranking_mode: str = "reranking_model"
|
||||
reranking_enable: bool = True
|
||||
reranking_model: AgentKnowledgeRerankingModelConfig | None = None
|
||||
weights: AgentKnowledgeWeightedScoreConfig | None = None
|
||||
model: AgentKnowledgeModelConfig | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_mode_fields(self) -> Self:
|
||||
if self.mode == "multiple" and self.top_k is None:
|
||||
raise ValueError("knowledge retrieval.top_k is required for multiple mode")
|
||||
if self.mode == "single" and self.model is None:
|
||||
raise ValueError("knowledge retrieval.model is required for single mode")
|
||||
return self
|
||||
|
||||
|
||||
class AgentKnowledgeMetadataCondition(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
name: str = Field(min_length=1, max_length=255)
|
||||
comparison_operator: SupportedComparisonOperator
|
||||
value: ConditionValue = None
|
||||
|
||||
|
||||
class AgentKnowledgeMetadataConditions(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
logical_operator: Literal["and", "or"] = "and"
|
||||
conditions: list[AgentKnowledgeMetadataCondition] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AgentKnowledgeMetadataFilteringConfig(BaseModel):
|
||||
"""Per-set metadata filtering policy.
|
||||
|
||||
The Python attribute uses ``metadata_model_config`` for clarity because the
|
||||
model belongs to metadata filtering specifically, while the external API and
|
||||
generated schema keep the historical ``model_config`` field name via alias.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid", populate_by_name=True)
|
||||
|
||||
mode: Literal["disabled", "automatic", "manual"] = "disabled"
|
||||
# Internal name is explicit; wire format remains ``model_config``.
|
||||
metadata_model_config: AgentKnowledgeModelConfig | None = Field(default=None, alias="model_config")
|
||||
conditions: AgentKnowledgeMetadataConditions | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_mode_fields(self) -> Self:
|
||||
if self.mode == "automatic" and self.metadata_model_config is None:
|
||||
raise ValueError("metadata_filtering.model_config is required for automatic mode")
|
||||
if self.mode == "manual" and (self.conditions is None or not self.conditions.conditions):
|
||||
raise ValueError("metadata_filtering.conditions is required for manual mode")
|
||||
return self
|
||||
|
||||
|
||||
class AgentKnowledgeSetConfig(BaseModel):
|
||||
"""One explicit knowledge set in Agent v2.
|
||||
|
||||
``knowledge.sets`` replaces the old flat knowledge config. Each set owns
|
||||
its datasets plus query, retrieval, and metadata policies. An individual
|
||||
set must contain at least one dataset id even though the overall knowledge
|
||||
section may be empty, which is how callers express "no knowledge layer".
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
id: str = Field(min_length=1, max_length=255)
|
||||
name: str = Field(min_length=1, max_length=255)
|
||||
description: str | None = None
|
||||
datasets: list[AgentKnowledgeDatasetConfig]
|
||||
query: AgentKnowledgeQueryConfig
|
||||
retrieval: AgentKnowledgeRetrievalConfig
|
||||
metadata_filtering: AgentKnowledgeMetadataFilteringConfig = Field(
|
||||
default_factory=AgentKnowledgeMetadataFilteringConfig
|
||||
)
|
||||
|
||||
@field_validator("id", "name")
|
||||
@classmethod
|
||||
def validate_non_blank_identity(cls, value: str) -> str:
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
raise ValueError("knowledge set id and name must not be blank")
|
||||
return normalized
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_datasets(self) -> Self:
|
||||
dataset_ids = [(dataset.id or "").strip() for dataset in self.datasets]
|
||||
if not dataset_ids or any(not dataset_id for dataset_id in dataset_ids):
|
||||
raise ValueError("knowledge set requires at least one dataset id")
|
||||
if len(dataset_ids) != len(set(dataset_ids)):
|
||||
raise ValueError("knowledge set dataset ids must be unique")
|
||||
return self
|
||||
|
||||
|
||||
class AgentHumanContactConfig(AgentFlexibleConfig):
|
||||
@ -453,9 +598,28 @@ class AgentSoulToolsConfig(BaseModel):
|
||||
|
||||
|
||||
class AgentSoulKnowledgeConfig(BaseModel):
|
||||
datasets: list[AgentKnowledgeDatasetConfig] = Field(default_factory=list)
|
||||
query_mode: AgentKnowledgeQueryMode | None = None
|
||||
query_config: AgentKnowledgeQueryConfig = Field(default_factory=AgentKnowledgeQueryConfig)
|
||||
"""Top-level Agent v2 knowledge config.
|
||||
|
||||
Agent v2 models knowledge as explicit sets instead of one flat
|
||||
``datasets`` / ``query_mode`` / ``query_config`` block. An empty ``sets``
|
||||
list means no knowledge layer should be emitted at runtime, while set-name
|
||||
uniqueness stays case-insensitive because runtime selection addresses sets
|
||||
by name.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
sets: list[AgentKnowledgeSetConfig] = Field(default_factory=list)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_unique_sets(self) -> Self:
|
||||
set_ids = [item.id.strip() for item in self.sets]
|
||||
if len(set_ids) != len(set(set_ids)):
|
||||
raise ValueError("knowledge set ids must be unique")
|
||||
set_names = [item.name.strip().lower() for item in self.sets]
|
||||
if len(set_names) != len(set(set_names)):
|
||||
raise ValueError("knowledge set names must be unique")
|
||||
return self
|
||||
|
||||
|
||||
class AgentSoulHumanConfig(BaseModel):
|
||||
|
||||
@ -12433,6 +12433,25 @@ Risk marker for CLI tool bootstrap commands.
|
||||
| current_snapshot_id | string | | No |
|
||||
| workflow_node_count | integer | | Yes |
|
||||
|
||||
#### AgentComposerKnowledgeDatasetCandidateResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| description | string | | No |
|
||||
| id | string | | No |
|
||||
| missing | boolean | | No |
|
||||
| name | string | | No |
|
||||
|
||||
#### AgentComposerKnowledgeSetCandidateResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| datasets | [ [AgentComposerKnowledgeDatasetCandidateResponse](#agentcomposerknowledgedatasetcandidateresponse) ] | | No |
|
||||
| description | string | | No |
|
||||
| id | string | | Yes |
|
||||
| missing_dataset_ids | [ string ] | | No |
|
||||
| name | string | | Yes |
|
||||
|
||||
#### AgentComposerNodeJobCandidatesResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@ -12448,7 +12467,7 @@ Risk marker for CLI tool bootstrap commands.
|
||||
| cli_tools | [ [AgentCliToolConfig](#agentclitoolconfig) ] | | No |
|
||||
| dify_tools | [ [AgentComposerDifyToolCandidateResponse](#agentcomposerdifytoolcandidateresponse) ] | | No |
|
||||
| human_contacts | [ [AgentHumanContactConfig](#agenthumancontactconfig) ] | | No |
|
||||
| knowledge_datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No |
|
||||
| knowledge_sets | [ [AgentComposerKnowledgeSetCandidateResponse](#agentcomposerknowledgesetcandidateresponse) ] | | No |
|
||||
|
||||
#### AgentComposerSoulLockResponse
|
||||
|
||||
@ -12842,14 +12861,44 @@ the current roster/workflow APIs scoped to Dify Agent.
|
||||
| id | string | | No |
|
||||
| name | string | | No |
|
||||
|
||||
#### AgentKnowledgeMetadataCondition
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
|
||||
| name | string | | Yes |
|
||||
| value | string<br>[ string ]<br>number | | No |
|
||||
|
||||
#### AgentKnowledgeMetadataConditions
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| conditions | [ [AgentKnowledgeMetadataCondition](#agentknowledgemetadatacondition) ] | | No |
|
||||
| logical_operator | string, <br>**Available values:** "and", "or", <br>**Default:** and | *Enum:* `"and"`, `"or"` | No |
|
||||
|
||||
#### AgentKnowledgeMetadataFilteringConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| conditions | [AgentKnowledgeMetadataConditions](#agentknowledgemetadataconditions) | | No |
|
||||
| mode | string, <br>**Available values:** "automatic", "disabled", "manual", <br>**Default:** disabled | *Enum:* `"automatic"`, `"disabled"`, `"manual"` | No |
|
||||
| model_config | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No |
|
||||
|
||||
#### AgentKnowledgeModelConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| completion_params | object | | No |
|
||||
| mode | string | | Yes |
|
||||
| name | string | | Yes |
|
||||
| provider | string | | Yes |
|
||||
|
||||
#### AgentKnowledgeQueryConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| query | string | | No |
|
||||
| score_threshold | number | | No |
|
||||
| score_threshold_enabled | boolean | | No |
|
||||
| top_k | integer | | No |
|
||||
| mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | Yes |
|
||||
| value | string | | No |
|
||||
|
||||
#### AgentKnowledgeQueryMode
|
||||
|
||||
@ -12857,6 +12906,46 @@ the current roster/workflow APIs scoped to Dify Agent.
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| AgentKnowledgeQueryMode | string | | |
|
||||
|
||||
#### AgentKnowledgeRerankingModelConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| model | string | | Yes |
|
||||
| provider | string | | Yes |
|
||||
|
||||
#### AgentKnowledgeRetrievalConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| mode | string, <br>**Available values:** "multiple", "single" | *Enum:* `"multiple"`, `"single"` | Yes |
|
||||
| model | [AgentKnowledgeModelConfig](#agentknowledgemodelconfig) | | No |
|
||||
| reranking_enable | boolean, <br>**Default:** true | | No |
|
||||
| reranking_mode | string, <br>**Default:** reranking_model | | No |
|
||||
| reranking_model | [AgentKnowledgeRerankingModelConfig](#agentknowledgererankingmodelconfig) | | No |
|
||||
| score_threshold | number | | No |
|
||||
| top_k | integer | | No |
|
||||
| weights | [AgentKnowledgeWeightedScoreConfig](#agentknowledgeweightedscoreconfig) | | No |
|
||||
|
||||
#### AgentKnowledgeSetConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | Yes |
|
||||
| description | string | | No |
|
||||
| id | string | | Yes |
|
||||
| metadata_filtering | [AgentKnowledgeMetadataFilteringConfig](#agentknowledgemetadatafilteringconfig) | | No |
|
||||
| name | string | | Yes |
|
||||
| query | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | Yes |
|
||||
| retrieval | [AgentKnowledgeRetrievalConfig](#agentknowledgeretrievalconfig) | | Yes |
|
||||
|
||||
#### AgentKnowledgeWeightedScoreConfig
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| keyword_setting | object | | No |
|
||||
| vector_setting | object | | No |
|
||||
| weight_type | string | | No |
|
||||
|
||||
#### AgentLogConversationItemResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@ -13258,9 +13347,7 @@ old Agent tool payloads can be read while new payloads stay explicit.
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| datasets | [ [AgentKnowledgeDatasetConfig](#agentknowledgedatasetconfig) ] | | No |
|
||||
| query_config | [AgentKnowledgeQueryConfig](#agentknowledgequeryconfig) | | No |
|
||||
| query_mode | [AgentKnowledgeQueryMode](#agentknowledgequerymode) | | No |
|
||||
| sets | [ [AgentKnowledgeSetConfig](#agentknowledgesetconfig) ] | | No |
|
||||
|
||||
#### AgentSoulMemoryConfig
|
||||
|
||||
|
||||
@ -25,6 +25,7 @@ from models.agent_config_entities import (
|
||||
AgentSoulConfig,
|
||||
DeclaredOutputConfig,
|
||||
)
|
||||
from services.agent.knowledge_datasets import list_agent_soul_knowledge_dataset_ids
|
||||
|
||||
MAX_CANDIDATES_PER_LIST = 200
|
||||
|
||||
@ -139,19 +140,34 @@ def soul_candidates(
|
||||
|
||||
cli_tools = [tool.model_dump(exclude_none=True) for tool in soul.tools.cli_tools if tool.enabled]
|
||||
|
||||
dataset_ids = [dataset.id for dataset in soul.knowledge.datasets if dataset.id]
|
||||
dataset_ids = list_agent_soul_knowledge_dataset_ids(soul)
|
||||
dataset_rows = dataset_lookup(dataset_ids) if dataset_ids else {}
|
||||
knowledge_datasets: list[dict[str, Any]] = []
|
||||
for dataset in soul.knowledge.datasets:
|
||||
if not dataset.id:
|
||||
continue
|
||||
row = dataset_rows.get(dataset.id)
|
||||
knowledge_datasets.append(
|
||||
knowledge_sets: list[dict[str, Any]] = []
|
||||
for knowledge_set in soul.knowledge.sets:
|
||||
missing_dataset_ids: list[str] = []
|
||||
datasets: list[dict[str, Any]] = []
|
||||
for dataset in knowledge_set.datasets:
|
||||
dataset_id = (dataset.id or "").strip()
|
||||
if not dataset_id:
|
||||
continue
|
||||
row = dataset_rows.get(dataset_id)
|
||||
if row is None:
|
||||
missing_dataset_ids.append(dataset_id)
|
||||
datasets.append(
|
||||
{
|
||||
"id": dataset_id,
|
||||
"name": (getattr(row, "name", None) or dataset.name or dataset_id),
|
||||
"description": getattr(row, "description", None) or dataset.description,
|
||||
"missing": row is None,
|
||||
}
|
||||
)
|
||||
knowledge_sets.append(
|
||||
{
|
||||
"id": dataset.id,
|
||||
"name": (getattr(row, "name", None) or dataset.name or dataset.id),
|
||||
"description": getattr(row, "description", None) or dataset.description,
|
||||
"missing": row is None,
|
||||
"id": knowledge_set.id,
|
||||
"name": knowledge_set.name,
|
||||
"description": knowledge_set.description,
|
||||
"datasets": datasets,
|
||||
"missing_dataset_ids": missing_dataset_ids,
|
||||
}
|
||||
)
|
||||
|
||||
@ -161,7 +177,7 @@ def soul_candidates(
|
||||
lists = {
|
||||
"dify_tools": dify_tools,
|
||||
"cli_tools": cli_tools,
|
||||
"knowledge_datasets": knowledge_datasets,
|
||||
"knowledge_sets": knowledge_sets,
|
||||
"human_contacts": human_contacts,
|
||||
}
|
||||
capped: dict[str, list[dict[str, Any]]] = {}
|
||||
@ -192,7 +208,6 @@ def _ref_entry(
|
||||
"inferred": inferred,
|
||||
}
|
||||
|
||||
|
||||
def _capped(values: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], bool]:
|
||||
if len(values) > MAX_CANDIDATES_PER_LIST:
|
||||
return values[:MAX_CANDIDATES_PER_LIST], True
|
||||
|
||||
@ -33,6 +33,11 @@ from services.agent.errors import (
|
||||
AgentNameConflictError,
|
||||
AgentNotFoundError,
|
||||
AgentVersionNotFoundError,
|
||||
InvalidComposerConfigError,
|
||||
)
|
||||
from services.agent.knowledge_datasets import (
|
||||
get_tenant_knowledge_dataset_rows,
|
||||
list_missing_tenant_knowledge_dataset_ids,
|
||||
)
|
||||
from services.entities.agent_entities import (
|
||||
AgentSoulConfig,
|
||||
@ -101,6 +106,7 @@ class AgentComposerService:
|
||||
|
||||
_backfill_cli_tool_ids(payload.agent_soul)
|
||||
ComposerConfigValidator.validate_save_payload(payload)
|
||||
cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
|
||||
workflow = cls._get_draft_workflow(tenant_id=tenant_id, app_id=app_id)
|
||||
binding = cls._get_workflow_binding(tenant_id=tenant_id, workflow_id=workflow.id, node_id=node_id)
|
||||
|
||||
@ -195,6 +201,7 @@ class AgentComposerService:
|
||||
raise ValueError("Agent App composer endpoint only accepts agent_app variant")
|
||||
_backfill_cli_tool_ids(payload.agent_soul)
|
||||
ComposerConfigValidator.validate_save_payload(payload)
|
||||
cls.validate_knowledge_datasets(tenant_id=tenant_id, agent_soul=payload.agent_soul)
|
||||
if payload.agent_soul is None:
|
||||
raise ValueError("agent_soul is required")
|
||||
|
||||
@ -273,19 +280,15 @@ class AgentComposerService:
|
||||
agent_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""ENG-617 soft findings, with DB-backed dataset and drive mention checks."""
|
||||
from services.agent.prompt_mentions import MentionKind, parse_prompt_mentions
|
||||
|
||||
mentioned_ids: set[str] = set()
|
||||
if payload.agent_soul is not None:
|
||||
mentioned_ids |= {
|
||||
mention.ref_id
|
||||
for mention in parse_prompt_mentions(payload.agent_soul.prompt.system_prompt)
|
||||
if mention.kind == MentionKind.KNOWLEDGE
|
||||
}
|
||||
existing_dataset_ids: set[str] | None = None
|
||||
if mentioned_ids:
|
||||
existing_dataset_ids = set(cls._dataset_rows(tenant_id=tenant_id, dataset_ids=sorted(mentioned_ids)))
|
||||
findings = ComposerConfigValidator.collect_soft_findings(payload, existing_dataset_ids=existing_dataset_ids)
|
||||
existing_knowledge_set_ids = (
|
||||
{knowledge_set.id for knowledge_set in payload.agent_soul.knowledge.sets}
|
||||
if payload.agent_soul is not None
|
||||
else None
|
||||
)
|
||||
findings = ComposerConfigValidator.collect_soft_findings(
|
||||
payload,
|
||||
existing_knowledge_set_ids=existing_knowledge_set_ids,
|
||||
)
|
||||
if agent_id and payload.agent_soul is not None:
|
||||
findings["warnings"].extend(
|
||||
cls._drive_mention_findings(
|
||||
@ -296,6 +299,24 @@ class AgentComposerService:
|
||||
)
|
||||
return findings
|
||||
|
||||
@classmethod
|
||||
def validate_knowledge_datasets(cls, *, tenant_id: str, agent_soul: AgentSoulConfig | None) -> None:
|
||||
"""Hard-validate tenant-scoped knowledge set datasets before saving.
|
||||
|
||||
DTO validators own set shape, duplicate set ids/names, and duplicate
|
||||
dataset ids within one set. This service-level check owns database
|
||||
existence and tenant ownership so invalid or cross-tenant datasets fail
|
||||
before Agent Soul snapshots are persisted.
|
||||
"""
|
||||
if agent_soul is None:
|
||||
return
|
||||
missing_ids = list_missing_tenant_knowledge_dataset_ids(tenant_id=tenant_id, agent_soul=agent_soul)
|
||||
if missing_ids:
|
||||
raise InvalidComposerConfigError(
|
||||
"knowledge_dataset_not_found: knowledge sets reference missing or out-of-scope datasets: "
|
||||
+ ", ".join(missing_ids)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def resolve_bound_agent_id(cls, *, tenant_id: str, app_id: str) -> str | None:
|
||||
"""The Agent App's bound roster agent id, if any (validate-endpoint context)."""
|
||||
@ -410,7 +431,7 @@ class AgentComposerService:
|
||||
|
||||
soul_lists, soul_truncated = soul_candidates(
|
||||
agent_soul=agent_soul,
|
||||
dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
|
||||
dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
|
||||
workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
|
||||
)
|
||||
truncated = truncated or soul_truncated
|
||||
@ -437,7 +458,7 @@ class AgentComposerService:
|
||||
agent_soul = cls._load_agent_app_soul(tenant_id=tenant_id, app_id=app_id)
|
||||
soul_lists, truncated = soul_candidates(
|
||||
agent_soul=agent_soul,
|
||||
dataset_lookup=lambda ids: cls._dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
|
||||
dataset_lookup=lambda ids: get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=ids),
|
||||
workspace_tools_loader=lambda: cls._workspace_dify_tools(tenant_id=tenant_id, user_id=user_id),
|
||||
)
|
||||
response = ComposerCandidatesResponse(
|
||||
@ -530,30 +551,6 @@ class AgentComposerService:
|
||||
variables = WorkflowDraftVariableService(session=session).list_system_variables(app_id, user_id)
|
||||
return [(variable.name, variable.value_type.value) for variable in variables.variables]
|
||||
|
||||
@staticmethod
|
||||
def _dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
|
||||
"""Tenant-scoped dataset lookup tolerating malformed ids.
|
||||
|
||||
Mention ids come from user-editable prompt text; a non-UUID id can never
|
||||
match a dataset row, so it is simply absent from the result (-> missing/
|
||||
placeholder semantics) instead of breaking the UUID-typed query.
|
||||
"""
|
||||
from uuid import UUID
|
||||
|
||||
from services.dataset_service import DatasetService
|
||||
|
||||
valid_ids: list[str] = []
|
||||
for dataset_id in dataset_ids:
|
||||
try:
|
||||
UUID(dataset_id)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
valid_ids.append(dataset_id)
|
||||
if not valid_ids:
|
||||
return {}
|
||||
rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
|
||||
return {str(row.id): row for row in rows}
|
||||
|
||||
@staticmethod
|
||||
def _workspace_dify_tools(*, tenant_id: str, user_id: str) -> list[dict[str, Any]]:
|
||||
"""Workspace Dify Plugin tools, same source as the tool selector.
|
||||
|
||||
@ -141,15 +141,15 @@ class ComposerConfigValidator:
|
||||
cls,
|
||||
payload: ComposerSavePayload,
|
||||
*,
|
||||
existing_dataset_ids: set[str] | None = None,
|
||||
existing_knowledge_set_ids: set[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""ENG-617 §5.3/§5.4 soft findings — never block save.
|
||||
|
||||
``warnings`` carries ``mention_target_missing`` / ``mention_malformed``
|
||||
entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge
|
||||
entries; ``knowledge_retrieval_placeholder`` keeps dangling knowledge-set
|
||||
mentions with a placeholder name (0522 consensus) instead of dropping or
|
||||
rejecting them. With ``existing_dataset_ids`` provided, configured-but-
|
||||
deleted datasets surface as placeholders too.
|
||||
rejecting them. With ``existing_knowledge_set_ids`` provided, mentions
|
||||
that no longer exist in the current Agent Soul surface as placeholders too.
|
||||
"""
|
||||
warnings: list[dict[str, Any]] = []
|
||||
placeholders: list[dict[str, str]] = []
|
||||
@ -181,7 +181,7 @@ class ComposerConfigValidator:
|
||||
resolved = resolver(mention)
|
||||
if mention.kind == MentionKind.KNOWLEDGE:
|
||||
dangling = resolved is None or (
|
||||
existing_dataset_ids is not None and mention.ref_id not in existing_dataset_ids
|
||||
existing_knowledge_set_ids is not None and mention.ref_id not in existing_knowledge_set_ids
|
||||
)
|
||||
if dangling:
|
||||
placeholders.append(
|
||||
|
||||
63
api/services/agent/knowledge_datasets.py
Normal file
63
api/services/agent/knowledge_datasets.py
Normal file
@ -0,0 +1,63 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from models.agent_config_entities import AgentSoulConfig
|
||||
|
||||
|
||||
def list_agent_soul_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
|
||||
"""Return normalized unique knowledge dataset ids in config order.
|
||||
|
||||
Agent v2 knowledge dataset selection is owned by ``knowledge.sets``. This
|
||||
helper keeps composer, workflow validation, candidates, and runtime
|
||||
diagnostics aligned on the same normalization rules: strip whitespace, drop
|
||||
blanks, preserve first-seen order, and deduplicate.
|
||||
"""
|
||||
dataset_ids: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for knowledge_set in agent_soul.knowledge.sets:
|
||||
for dataset in knowledge_set.datasets:
|
||||
dataset_id = (dataset.id or "").strip()
|
||||
if not dataset_id or dataset_id in seen:
|
||||
continue
|
||||
seen.add(dataset_id)
|
||||
dataset_ids.append(dataset_id)
|
||||
return dataset_ids
|
||||
|
||||
|
||||
def get_tenant_knowledge_dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
|
||||
"""Return tenant-scoped dataset rows for normalized knowledge dataset ids.
|
||||
|
||||
Knowledge ids come from user-editable config. Malformed ids can never match
|
||||
a dataset row, so they are treated as missing instead of breaking the
|
||||
UUID-typed dataset lookup.
|
||||
"""
|
||||
from services.dataset_service import DatasetService
|
||||
|
||||
valid_ids: list[str] = []
|
||||
for dataset_id in dataset_ids:
|
||||
try:
|
||||
UUID(dataset_id)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
valid_ids.append(dataset_id)
|
||||
|
||||
if not valid_ids:
|
||||
return {}
|
||||
|
||||
rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
|
||||
return {str(row.id): row for row in rows}
|
||||
|
||||
|
||||
def list_missing_tenant_knowledge_dataset_ids(*, tenant_id: str, agent_soul: AgentSoulConfig | None) -> list[str]:
|
||||
"""Return normalized knowledge dataset ids missing from the tenant scope."""
|
||||
if agent_soul is None:
|
||||
return []
|
||||
|
||||
dataset_ids = list_agent_soul_knowledge_dataset_ids(agent_soul)
|
||||
if not dataset_ids:
|
||||
return []
|
||||
|
||||
rows = get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=dataset_ids)
|
||||
return [dataset_id for dataset_id in dataset_ids if dataset_id not in rows]
|
||||
@ -6,7 +6,7 @@ Slash-menu insertions are stored inline in the plain-string prompt as tokens:
|
||||
|
||||
``kind`` is a fixed lowercase word; ``id`` points at an item in the Agent
|
||||
runtime context. For prompt-owned entities that means Agent Soul lists such as
|
||||
``tools`` / ``knowledge.datasets`` / ``human.contacts`` and workflow job lists
|
||||
``tools`` / ``knowledge.sets`` / ``human.contacts`` and workflow job lists
|
||||
such as ``previous_node_output_refs`` / ``declared_outputs``. For drive-backed
|
||||
``skill`` / ``file`` mentions the field stores a URL-encoded drive key and is
|
||||
resolved against ``agent_drive_files`` at runtime. ``label`` is an optional
|
||||
@ -211,9 +211,9 @@ def build_soul_mention_resolver(agent_soul: AgentSoulConfig) -> MentionResolver:
|
||||
if mention.ref_id in (cli_tool.id, cli_tool.name):
|
||||
return cli_tool.name or cli_tool.id
|
||||
case MentionKind.KNOWLEDGE:
|
||||
for dataset in agent_soul.knowledge.datasets:
|
||||
if mention.ref_id == dataset.id:
|
||||
return dataset.name or dataset.id
|
||||
for knowledge_set in agent_soul.knowledge.sets:
|
||||
if mention.ref_id == knowledge_set.id:
|
||||
return knowledge_set.name or knowledge_set.id
|
||||
case MentionKind.HUMAN:
|
||||
return _resolve_human_contact(agent_soul.human.contacts, mention.ref_id)
|
||||
case _:
|
||||
|
||||
@ -162,8 +162,15 @@ def test_request_builder_adds_knowledge_layer_when_configured():
|
||||
run_input = _run_input()
|
||||
run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
|
||||
{
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
@ -174,7 +181,7 @@ def test_request_builder_adds_knowledge_layer_when_configured():
|
||||
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
|
||||
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
|
||||
knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
|
||||
assert knowledge_config.dataset_ids == ["dataset-1"]
|
||||
assert knowledge_config.sets[0].dataset_ids == ["dataset-1"]
|
||||
|
||||
|
||||
def test_request_builder_can_delete_on_exit_for_cleanup_paths():
|
||||
@ -386,8 +393,15 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
|
||||
run_input = _agent_app_input()
|
||||
run_input.knowledge = DifyKnowledgeBaseLayerConfig.model_validate(
|
||||
{
|
||||
"dataset_ids": ["dataset-1", "dataset-2"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 2},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 2},
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
@ -398,7 +412,7 @@ def test_agent_app_request_builder_adds_knowledge_layer_when_configured():
|
||||
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].type == DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
|
||||
assert layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].deps == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
|
||||
knowledge_config = cast(DifyKnowledgeBaseLayerConfig, layers[DIFY_KNOWLEDGE_BASE_LAYER_ID].config)
|
||||
assert knowledge_config.dataset_ids == ["dataset-1", "dataset-2"]
|
||||
assert knowledge_config.sets[0].dataset_ids == ["dataset-1", "dataset-2"]
|
||||
|
||||
|
||||
# ── ENG-635 / ENG-638: ask_human layer injection + deferred_tool_results ─────
|
||||
|
||||
@ -149,3 +149,55 @@ def test_generate_specs_is_idempotent(tmp_path):
|
||||
assert [path.name for path in first_paths] == [path.name for path in second_paths]
|
||||
for first_path, second_path in zip(first_paths, second_paths):
|
||||
assert first_path.read_text(encoding="utf-8") == second_path.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_generate_specs_include_agent_v2_knowledge_set_schema_and_query_enums(tmp_path):
|
||||
module = _load_generate_swagger_specs_module()
|
||||
|
||||
written_paths = module.generate_specs(tmp_path)
|
||||
console_path = next(path for path in written_paths if path.name == "console-openapi.json")
|
||||
payload = json.loads(console_path.read_text(encoding="utf-8"))
|
||||
schemas = payload["components"]["schemas"]
|
||||
|
||||
assert "AgentKnowledgeSetConfig" in schemas
|
||||
assert schemas["AgentSoulKnowledgeConfig"]["properties"]["sets"]["items"]["$ref"] == (
|
||||
"#/components/schemas/AgentKnowledgeSetConfig"
|
||||
)
|
||||
assert schemas["AgentKnowledgeQueryMode"]["enum"] == ["generated_query", "user_query"]
|
||||
|
||||
|
||||
def test_checked_in_agent_v2_knowledge_openapi_and_generated_contracts_are_in_sync():
|
||||
api_dir = Path(__file__).resolve().parents[3]
|
||||
repo_root = api_dir.parent
|
||||
|
||||
markdown = (api_dir / "openapi" / "markdown" / "console-openapi.md").read_text(encoding="utf-8")
|
||||
agent_types = (
|
||||
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "types.gen.ts"
|
||||
).read_text(encoding="utf-8")
|
||||
apps_types = (
|
||||
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "types.gen.ts"
|
||||
).read_text(encoding="utf-8")
|
||||
agent_zod = (
|
||||
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "agent" / "zod.gen.ts"
|
||||
).read_text(encoding="utf-8")
|
||||
apps_zod = (
|
||||
repo_root / "packages" / "contracts" / "generated" / "api" / "console" / "apps" / "zod.gen.ts"
|
||||
).read_text(encoding="utf-8")
|
||||
|
||||
assert "#### AgentKnowledgeSetConfig" in markdown
|
||||
assert "#### AgentSoulKnowledgeConfig" in markdown
|
||||
assert "#### AgentKnowledgeQueryMode" in markdown
|
||||
|
||||
for content in (agent_types, apps_types):
|
||||
assert "export type AgentKnowledgeSetConfig = {" in content
|
||||
assert "export type AgentSoulKnowledgeConfig = {" in content
|
||||
assert "AgentKnowledgeQueryMode" in content
|
||||
assert "generated_query" in content
|
||||
assert "user_query" in content
|
||||
|
||||
for content in (agent_zod, apps_zod):
|
||||
assert "export const zAgentKnowledgeSetConfig = z.object({" in content
|
||||
assert "export const zAgentSoulKnowledgeConfig = z.object({" in content
|
||||
assert "zAgentKnowledgeQueryMode = z.enum([" in content
|
||||
assert "generated_query" in content
|
||||
assert "user_query" in content
|
||||
|
||||
@ -153,12 +153,19 @@ class TestAgentAppRuntimeRequestBuilder:
|
||||
"model": "gpt-4o-mini",
|
||||
},
|
||||
"knowledge": {
|
||||
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
|
||||
"query_config": {
|
||||
"top_k": 3,
|
||||
"score_threshold": 0.5,
|
||||
"score_threshold_enabled": False,
|
||||
},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 3,
|
||||
"score_threshold": None,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
)
|
||||
@ -173,10 +180,12 @@ class TestAgentAppRuntimeRequestBuilder:
|
||||
assert knowledge.type == "dify.knowledge_base"
|
||||
assert knowledge.deps == {"execution_context": "execution_context"}
|
||||
dumped_config = knowledge.config.model_dump(mode="json", by_alias=True)
|
||||
assert dumped_config["dataset_ids"] == ["dataset-1", "dataset-2"]
|
||||
assert dumped_config["retrieval"]["mode"] == "multiple"
|
||||
assert dumped_config["retrieval"]["top_k"] == 3
|
||||
assert dumped_config["retrieval"]["score_threshold"] == 0.0
|
||||
knowledge_set = dumped_config["sets"][0]
|
||||
assert [dataset["id"] for dataset in knowledge_set["datasets"]] == ["dataset-1", "dataset-2"]
|
||||
assert knowledge_set["query"] == {"mode": "generated_query", "value": None}
|
||||
assert knowledge_set["retrieval"]["mode"] == "multiple"
|
||||
assert knowledge_set["retrieval"]["top_k"] == 3
|
||||
assert knowledge_set["retrieval"]["score_threshold"] == 0.0
|
||||
|
||||
def test_build_raises_when_model_missing(self):
|
||||
builder = AgentAppRuntimeRequestBuilder(
|
||||
|
||||
@ -512,12 +512,55 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
|
||||
"model": "gpt-test",
|
||||
},
|
||||
"knowledge": {
|
||||
"datasets": [{"id": "dataset-1"}, {"id": " "}, {"id": "dataset-2"}],
|
||||
"query_config": {
|
||||
"top_k": 6,
|
||||
"score_threshold": 0.4,
|
||||
"score_threshold_enabled": True,
|
||||
},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"description": "Support content",
|
||||
"datasets": [{"id": "dataset-1"}, {"id": "dataset-2"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 6,
|
||||
"score_threshold": 0.4,
|
||||
"reranking_model": {"provider": "cohere", "model": "rerank-v3"},
|
||||
"weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
|
||||
},
|
||||
"metadata_filtering": {
|
||||
"mode": "manual",
|
||||
"conditions": {
|
||||
"logical_operator": "and",
|
||||
"conditions": [
|
||||
{"name": "category", "comparison_operator": "contains", "value": "auth"}
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "release",
|
||||
"name": "Release Notes",
|
||||
"datasets": [{"id": "dataset-3"}],
|
||||
"query": {"mode": "user_query", "value": "release notes"},
|
||||
"retrieval": {
|
||||
"mode": "single",
|
||||
"model": {
|
||||
"provider": "openai",
|
||||
"name": "gpt-4o-mini",
|
||||
"mode": "chat",
|
||||
"completion_params": {"temperature": 0.2},
|
||||
},
|
||||
},
|
||||
"metadata_filtering": {
|
||||
"mode": "automatic",
|
||||
"model_config": {
|
||||
"provider": "openai",
|
||||
"name": "gpt-4o-mini",
|
||||
"mode": "chat",
|
||||
"completion_params": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
),
|
||||
@ -531,25 +574,75 @@ def test_build_maps_agent_soul_knowledge_to_knowledge_layer_config():
|
||||
knowledge_layer = layers["knowledge"]
|
||||
assert knowledge_layer["type"] == "dify.knowledge_base"
|
||||
assert knowledge_layer["deps"] == {"execution_context": DIFY_EXECUTION_CONTEXT_LAYER_ID}
|
||||
assert knowledge_layer["config"] == {
|
||||
"dataset_ids": ["dataset-1", "dataset-2"],
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 6,
|
||||
"score_threshold": 0.4,
|
||||
"reranking_mode": "reranking_model",
|
||||
"reranking_enable": True,
|
||||
"reranking_model": None,
|
||||
"weights": None,
|
||||
"model": None,
|
||||
assert knowledge_layer["config"]["sets"] == [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"description": "Support content",
|
||||
"datasets": [
|
||||
{"id": "dataset-1", "name": None, "description": None},
|
||||
{"id": "dataset-2", "name": None, "description": None},
|
||||
],
|
||||
"query": {"mode": "generated_query", "value": None},
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 6,
|
||||
"score_threshold": 0.4,
|
||||
"reranking_mode": "reranking_model",
|
||||
"reranking_enable": True,
|
||||
"reranking_model": {"provider": "cohere", "model": "rerank-v3"},
|
||||
"weights": {"weight_type": "weighted_score", "vector_setting": {"vector_weight": 0.7}},
|
||||
"model": None,
|
||||
},
|
||||
"metadata_filtering": {
|
||||
"mode": "manual",
|
||||
"metadata_model_config": None,
|
||||
"conditions": {
|
||||
"logical_operator": "and",
|
||||
"conditions": [
|
||||
{"name": "category", "comparison_operator": "contains", "value": "auth"}
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
"metadata_filtering": {"mode": "disabled", "metadata_model_config": None, "conditions": None},
|
||||
"max_result_content_chars": 2000,
|
||||
"max_observation_chars": 12000,
|
||||
}
|
||||
{
|
||||
"id": "release",
|
||||
"name": "Release Notes",
|
||||
"description": None,
|
||||
"datasets": [{"id": "dataset-3", "name": None, "description": None}],
|
||||
"query": {"mode": "user_query", "value": "release notes"},
|
||||
"retrieval": {
|
||||
"mode": "single",
|
||||
"top_k": None,
|
||||
"score_threshold": 0.0,
|
||||
"reranking_mode": "reranking_model",
|
||||
"reranking_enable": True,
|
||||
"reranking_model": None,
|
||||
"weights": None,
|
||||
"model": {
|
||||
"provider": "openai",
|
||||
"name": "gpt-4o-mini",
|
||||
"mode": "chat",
|
||||
"completion_params": {"temperature": 0.2},
|
||||
},
|
||||
},
|
||||
"metadata_filtering": {
|
||||
"mode": "automatic",
|
||||
"metadata_model_config": {
|
||||
"provider": "openai",
|
||||
"name": "gpt-4o-mini",
|
||||
"mode": "chat",
|
||||
"completion_params": {},
|
||||
},
|
||||
"conditions": None,
|
||||
},
|
||||
},
|
||||
]
|
||||
assert knowledge_layer["config"]["max_result_content_chars"] == 2000
|
||||
assert knowledge_layer["config"]["max_observation_chars"] == 12000
|
||||
|
||||
|
||||
def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits_it():
|
||||
def test_build_knowledge_layer_maps_disabled_score_threshold_to_zero():
|
||||
context = _context()
|
||||
snapshot = AgentConfigSnapshot(
|
||||
id="snapshot-1",
|
||||
@ -565,8 +658,19 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
|
||||
"model": "gpt-test",
|
||||
},
|
||||
"knowledge": {
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query_config": {},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 4,
|
||||
"score_threshold": None,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
),
|
||||
@ -577,10 +681,10 @@ def test_build_knowledge_layer_uses_stable_default_top_k_when_query_config_omits
|
||||
|
||||
dumped = result.request.model_dump(mode="json")
|
||||
knowledge_layer = next(layer for layer in dumped["composition"]["layers"] if layer["name"] == "knowledge")
|
||||
assert knowledge_layer["config"]["retrieval"]["top_k"] == 4
|
||||
assert knowledge_layer["config"]["sets"][0]["retrieval"]["score_threshold"] == 0.0
|
||||
|
||||
|
||||
def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
|
||||
def test_build_skips_knowledge_layer_when_agent_soul_has_no_sets():
|
||||
context = _context()
|
||||
snapshot = AgentConfigSnapshot(
|
||||
id="snapshot-1",
|
||||
@ -595,9 +699,7 @@ def test_build_skips_knowledge_layer_when_agent_soul_has_no_valid_dataset_ids():
|
||||
"model_provider": "openai",
|
||||
"model": "gpt-test",
|
||||
},
|
||||
"knowledge": {
|
||||
"datasets": [{"id": " "}, {}],
|
||||
},
|
||||
"knowledge": {"sets": []},
|
||||
}
|
||||
),
|
||||
)
|
||||
@ -1094,7 +1196,15 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
|
||||
soul = AgentSoulConfig.model_validate(
|
||||
{
|
||||
"knowledge": {
|
||||
"datasets": [{"id": "dataset-1", "name": "Product Docs"}],
|
||||
"sets": [
|
||||
{
|
||||
"id": "product",
|
||||
"name": "Product Docs",
|
||||
"datasets": [{"id": "dataset-1", "name": "Product Docs"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
)
|
||||
@ -1106,13 +1216,13 @@ def test_feature_manifest_marks_knowledge_supported_without_warning_when_configu
|
||||
assert all("knowledge" not in w["section"] for w in manifest["unsupported_runtime_warnings"])
|
||||
|
||||
|
||||
def test_feature_manifest_treats_blank_knowledge_dataset_ids_as_not_configured():
|
||||
def test_feature_manifest_treats_empty_knowledge_sets_as_not_configured():
|
||||
from core.workflow.nodes.agent_v2.runtime_feature_manifest import build_runtime_feature_manifest
|
||||
|
||||
soul = AgentSoulConfig.model_validate(
|
||||
{
|
||||
"knowledge": {
|
||||
"datasets": [{"id": " "}, {}],
|
||||
"sets": [],
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
@ -55,6 +55,33 @@ def _snapshot() -> AgentConfigSnapshot:
|
||||
)
|
||||
|
||||
|
||||
def _snapshot_with_knowledge_dataset(dataset_id: str) -> AgentConfigSnapshot:
|
||||
return AgentConfigSnapshot(
|
||||
id="snapshot-1",
|
||||
tenant_id="tenant-1",
|
||||
agent_id="agent-1",
|
||||
version=1,
|
||||
config_snapshot=AgentSoulConfig(
|
||||
model=AgentSoulModelConfig(
|
||||
plugin_id="langgenius/openai",
|
||||
model_provider="openai",
|
||||
model="gpt-test",
|
||||
),
|
||||
knowledge={
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": dataset_id}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _graph(edges: list[dict]) -> dict:
|
||||
return {
|
||||
"nodes": [
|
||||
@ -515,6 +542,35 @@ def test_publish_validation_rejects_missing_file_ref():
|
||||
)
|
||||
|
||||
|
||||
def test_publish_validation_rejects_missing_or_out_of_scope_knowledge_datasets(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
dataset_id = "550e8400-e29b-41d4-a716-446655440000"
|
||||
node_job = WorkflowNodeJobConfig.model_validate({})
|
||||
snapshot = _snapshot_with_knowledge_dataset(dataset_id)
|
||||
session = Mock()
|
||||
session.scalar.side_effect = [_binding(node_job), _agent(), snapshot]
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_get_datasets_by_ids(ids, tenant_id):
|
||||
captured["ids"] = ids
|
||||
captured["tenant_id"] = tenant_id
|
||||
return [], 0
|
||||
|
||||
import services.dataset_service as dataset_service_module
|
||||
|
||||
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
|
||||
|
||||
with pytest.raises(WorkflowAgentNodeValidationError, match=dataset_id):
|
||||
WorkflowAgentNodeValidator.validate_published_workflow(
|
||||
session=session,
|
||||
workflow=_workflow(_graph([{"source": "start", "target": "agent-node"}])),
|
||||
)
|
||||
|
||||
assert captured == {"ids": [dataset_id], "tenant_id": "tenant-1"}
|
||||
|
||||
|
||||
def test_publish_validation_accepts_tool_node_agentic_manual_mode():
|
||||
session = Mock()
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from models.agent_config_entities import AgentKnowledgeQueryMode, AgentSoulModelConfig, DeclaredOutputType
|
||||
from services.agent.composer_service import AgentComposerService
|
||||
@ -91,14 +92,144 @@ def test_knowledge_query_mode_uses_stable_backend_enums():
|
||||
config = AgentSoulConfig.model_validate(
|
||||
{
|
||||
"knowledge": {
|
||||
"datasets": [{"dataset_id": "dataset-1"}],
|
||||
"query_mode": "generated_query",
|
||||
"query_config": {"generation_prompt": "Create a retrieval query."},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
assert config.knowledge.query_mode == AgentKnowledgeQueryMode.GENERATED_QUERY
|
||||
assert config.knowledge.sets[0].query.mode == AgentKnowledgeQueryMode.GENERATED_QUERY
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("knowledge_payload", "match"),
|
||||
[
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Billing KB",
|
||||
"datasets": [{"id": "dataset-2"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
]
|
||||
},
|
||||
"knowledge set ids must be unique",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Shared KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
{
|
||||
"id": "billing",
|
||||
"name": "Shared KB",
|
||||
"datasets": [{"id": "dataset-2"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
]
|
||||
},
|
||||
"knowledge set names must be unique",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}, {"id": " dataset-1 "}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
]
|
||||
},
|
||||
"knowledge set dataset ids must be unique",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "user_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
]
|
||||
},
|
||||
"knowledge query.value is required for user_query mode",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "single"},
|
||||
},
|
||||
]
|
||||
},
|
||||
"knowledge retrieval.model is required for single mode",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"metadata_filtering": {"mode": "automatic"},
|
||||
},
|
||||
]
|
||||
},
|
||||
"metadata_filtering.model_config is required for automatic mode",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"metadata_filtering": {"mode": "manual"},
|
||||
},
|
||||
]
|
||||
},
|
||||
"metadata_filtering.conditions is required for manual mode",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_knowledge_sets_contract_rejects_invalid_configs(knowledge_payload, match: str):
|
||||
with pytest.raises(ValidationError, match=match):
|
||||
AgentSoulConfig.model_validate({"knowledge": knowledge_payload})
|
||||
|
||||
|
||||
def test_agent_soul_model_config_is_first_class_without_credentials():
|
||||
|
||||
@ -2594,20 +2594,151 @@ def test_dataset_rows_filters_malformed_ids(monkeypatch: pytest.MonkeyPatch):
|
||||
return [], 0
|
||||
|
||||
import services.dataset_service as dataset_service_module
|
||||
from services.agent.knowledge_datasets import get_tenant_knowledge_dataset_rows
|
||||
|
||||
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
|
||||
|
||||
valid = "550e8400-e29b-41d4-a716-446655440000"
|
||||
rows = AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
|
||||
rows = get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["9999dead-beef", valid])
|
||||
assert rows == {}
|
||||
assert captured["ids"] == [valid]
|
||||
|
||||
# all-malformed input never touches the DB
|
||||
captured.clear()
|
||||
assert AgentComposerService._dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
|
||||
assert get_tenant_knowledge_dataset_rows(tenant_id="tenant-1", dataset_ids=["nope"]) == {}
|
||||
assert captured == {}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("variant", "save_call"),
|
||||
[
|
||||
(
|
||||
ComposerVariant.AGENT_APP,
|
||||
lambda payload: AgentComposerService.save_agent_app_composer(
|
||||
tenant_id="tenant-1",
|
||||
app_id="app-1",
|
||||
account_id="account-1",
|
||||
payload=payload,
|
||||
),
|
||||
),
|
||||
(
|
||||
ComposerVariant.WORKFLOW,
|
||||
lambda payload: AgentComposerService.save_workflow_composer(
|
||||
tenant_id="tenant-1",
|
||||
app_id="app-1",
|
||||
node_id="node-1",
|
||||
account_id="account-1",
|
||||
payload=payload,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_composer_save_rejects_malformed_knowledge_dataset_ids(monkeypatch: pytest.MonkeyPatch, variant, save_call):
|
||||
captured = {"calls": 0}
|
||||
|
||||
def fake_get_datasets_by_ids(ids, tenant_id):
|
||||
captured["calls"] += 1
|
||||
captured["ids"] = ids
|
||||
captured["tenant_id"] = tenant_id
|
||||
return [], 0
|
||||
|
||||
import services.dataset_service as dataset_service_module
|
||||
|
||||
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
|
||||
|
||||
payload = ComposerSavePayload.model_validate(
|
||||
{
|
||||
"variant": variant.value,
|
||||
"save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
|
||||
"soul_lock": {"locked": False},
|
||||
"agent_soul": {
|
||||
"knowledge": {
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "not-a-uuid"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(InvalidComposerConfigError, match="not-a-uuid"):
|
||||
save_call(payload)
|
||||
|
||||
assert captured == {"calls": 0}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("variant", "save_call"),
|
||||
[
|
||||
(
|
||||
ComposerVariant.AGENT_APP,
|
||||
lambda payload: AgentComposerService.save_agent_app_composer(
|
||||
tenant_id="tenant-1",
|
||||
app_id="app-1",
|
||||
account_id="account-1",
|
||||
payload=payload,
|
||||
),
|
||||
),
|
||||
(
|
||||
ComposerVariant.WORKFLOW,
|
||||
lambda payload: AgentComposerService.save_workflow_composer(
|
||||
tenant_id="tenant-1",
|
||||
app_id="app-1",
|
||||
node_id="node-1",
|
||||
account_id="account-1",
|
||||
payload=payload,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_composer_save_rejects_missing_or_out_of_scope_knowledge_datasets(
|
||||
monkeypatch: pytest.MonkeyPatch, variant, save_call
|
||||
):
|
||||
captured = {}
|
||||
missing_dataset_id = "550e8400-e29b-41d4-a716-446655440000"
|
||||
|
||||
def fake_get_datasets_by_ids(ids, tenant_id):
|
||||
captured["ids"] = ids
|
||||
captured["tenant_id"] = tenant_id
|
||||
return [], 0
|
||||
|
||||
import services.dataset_service as dataset_service_module
|
||||
|
||||
monkeypatch.setattr(dataset_service_module.DatasetService, "get_datasets_by_ids", fake_get_datasets_by_ids)
|
||||
|
||||
payload = ComposerSavePayload.model_validate(
|
||||
{
|
||||
"variant": variant.value,
|
||||
"save_strategy": ComposerSaveStrategy.SAVE_TO_CURRENT_VERSION.value,
|
||||
"soul_lock": {"locked": False},
|
||||
"agent_soul": {
|
||||
"knowledge": {
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": missing_dataset_id}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(InvalidComposerConfigError, match=missing_dataset_id):
|
||||
save_call(payload)
|
||||
|
||||
assert captured == {"ids": [missing_dataset_id], "tenant_id": "tenant-1"}
|
||||
|
||||
|
||||
def test_workspace_dify_tools_returns_provider_and_tool_granularities(monkeypatch: pytest.MonkeyPatch):
|
||||
"""The slash-menu Tools tab needs both selection granularities: a provider
|
||||
hosts many tools (like an MCP server), so candidates return one
|
||||
|
||||
@ -124,7 +124,18 @@ def _soul() -> AgentSoulConfig:
|
||||
{"id": "ct-2", "name": "disabled-one", "enabled": False},
|
||||
],
|
||||
},
|
||||
"knowledge": {"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}]},
|
||||
"knowledge": {
|
||||
"sets": [
|
||||
{
|
||||
"id": "kb-1",
|
||||
"name": "产品知识",
|
||||
"description": "knowledge set",
|
||||
"datasets": [{"id": "ds-1", "name": "旧名"}, {"id": "ds-gone", "name": "已删"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
},
|
||||
"human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
|
||||
}
|
||||
)
|
||||
@ -143,12 +154,16 @@ def test_soul_candidates_lists_configured_items_only():
|
||||
assert [item["name"] for item in lists["cli_tools"]] == ["ffmpeg"]
|
||||
# the stable mention id flows through so the frontend can mint [§cli_tool:<id>§]
|
||||
assert [item["id"] for item in lists["cli_tools"]] == ["ct-1"]
|
||||
# enriched from DB; dangling dataset kept with missing flag (placeholder, 0522)
|
||||
knowledge = {item["id"]: item for item in lists["knowledge_datasets"]}
|
||||
assert knowledge["ds-1"]["name"] == "产品手册"
|
||||
assert knowledge["ds-1"]["missing"] is False
|
||||
assert knowledge["ds-gone"]["missing"] is True
|
||||
assert knowledge["ds-gone"]["name"] == "已删"
|
||||
# Knowledge mentions point at set ids; nested datasets are hydrated for context.
|
||||
knowledge_set = lists["knowledge_sets"][0]
|
||||
assert knowledge_set["id"] == "kb-1"
|
||||
assert knowledge_set["name"] == "产品知识"
|
||||
assert knowledge_set["missing_dataset_ids"] == ["ds-gone"]
|
||||
datasets = {item["id"]: item for item in knowledge_set["datasets"]}
|
||||
assert datasets["ds-1"]["name"] == "产品手册"
|
||||
assert datasets["ds-1"]["missing"] is False
|
||||
assert datasets["ds-gone"]["missing"] is True
|
||||
assert datasets["ds-gone"]["name"] == "已删"
|
||||
assert lists["human_contacts"][0]["id"] == "c-1"
|
||||
assert lists["dify_tools"][0]["id"] == "tavily/tavily_search"
|
||||
|
||||
|
||||
@ -149,22 +149,32 @@ def test_dangling_knowledge_without_label_gets_fallback_name():
|
||||
]
|
||||
|
||||
|
||||
def test_configured_but_deleted_dataset_surfaces_as_placeholder():
|
||||
def test_configured_but_deleted_knowledge_set_surfaces_as_placeholder():
|
||||
payload = ComposerSavePayload.model_validate(
|
||||
{
|
||||
"variant": "agent_app",
|
||||
"agent_soul": {
|
||||
"prompt": {"system_prompt": "see [§knowledge:ds-1:产品手册§]"},
|
||||
"knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
|
||||
"prompt": {"system_prompt": "see [§knowledge:kb-1:产品手册§]"},
|
||||
"knowledge": {
|
||||
"sets": [
|
||||
{
|
||||
"id": "kb-1",
|
||||
"name": "产品手册",
|
||||
"datasets": [{"id": "ds-1", "name": "产品手册"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
},
|
||||
},
|
||||
"save_strategy": "save_to_current_version",
|
||||
}
|
||||
)
|
||||
# configured + DB row exists -> clean
|
||||
assert _findings(payload, existing_dataset_ids={"ds-1"})["knowledge_retrieval_placeholder"] == []
|
||||
# configured but deleted in DB -> placeholder
|
||||
assert _findings(payload, existing_dataset_ids=set())["knowledge_retrieval_placeholder"] == [
|
||||
{"id": "ds-1", "placeholder_name": "产品手册"}
|
||||
# configured + current Agent Soul row exists -> clean
|
||||
assert _findings(payload, existing_knowledge_set_ids={"kb-1"})["knowledge_retrieval_placeholder"] == []
|
||||
# configured but removed from the current Agent Soul surface -> placeholder
|
||||
assert _findings(payload, existing_knowledge_set_ids=set())["knowledge_retrieval_placeholder"] == [
|
||||
{"id": "kb-1", "placeholder_name": "产品手册"}
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -107,7 +107,17 @@ def soul() -> AgentSoulConfig:
|
||||
],
|
||||
"cli_tools": [{"id": "ct-1", "name": "ffmpeg"}],
|
||||
},
|
||||
"knowledge": {"datasets": [{"id": "ds-1", "name": "产品手册"}]},
|
||||
"knowledge": {
|
||||
"sets": [
|
||||
{
|
||||
"id": "kb-1",
|
||||
"name": "产品手册",
|
||||
"datasets": [{"id": "ds-1", "name": "产品手册"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
},
|
||||
"human": {"contacts": [{"id": "c-1", "name": "David Hayes", "channel": "email"}]},
|
||||
}
|
||||
)
|
||||
@ -117,7 +127,7 @@ def test_soul_resolver_resolves_each_kind(soul: AgentSoulConfig):
|
||||
resolver = build_soul_mention_resolver(soul)
|
||||
prompt = (
|
||||
"Use [§tool:tavily/tavily_search:tavily§], run [§cli_tool:ct-1:ffmpeg§], "
|
||||
"ground in [§knowledge:ds-1§], ask [§human:c-1§]."
|
||||
"ground in [§knowledge:kb-1§], ask [§human:c-1§]."
|
||||
)
|
||||
|
||||
expanded = expand_prompt_mentions(prompt, resolver)
|
||||
|
||||
@ -7,21 +7,31 @@ root stays import-safe for callers that only need to construct run requests.
|
||||
from dify_agent.layers.knowledge.configs import (
|
||||
DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
|
||||
DifyKnowledgeBaseLayerConfig,
|
||||
DifyKnowledgeDatasetConfig,
|
||||
DifyKnowledgeEagerResult,
|
||||
DifyKnowledgeMetadataCondition,
|
||||
DifyKnowledgeMetadataConditions,
|
||||
DifyKnowledgeMetadataFilteringConfig,
|
||||
DifyKnowledgeModelConfig,
|
||||
DifyKnowledgeQueryConfig,
|
||||
DifyKnowledgeRerankingModelConfig,
|
||||
DifyKnowledgeRetrievalConfig,
|
||||
DifyKnowledgeRuntimeState,
|
||||
DifyKnowledgeSetConfig,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
|
||||
"DifyKnowledgeBaseLayerConfig",
|
||||
"DifyKnowledgeDatasetConfig",
|
||||
"DifyKnowledgeEagerResult",
|
||||
"DifyKnowledgeMetadataCondition",
|
||||
"DifyKnowledgeMetadataConditions",
|
||||
"DifyKnowledgeMetadataFilteringConfig",
|
||||
"DifyKnowledgeModelConfig",
|
||||
"DifyKnowledgeQueryConfig",
|
||||
"DifyKnowledgeRerankingModelConfig",
|
||||
"DifyKnowledgeRetrievalConfig",
|
||||
"DifyKnowledgeRuntimeState",
|
||||
"DifyKnowledgeSetConfig",
|
||||
]
|
||||
|
||||
@ -1,12 +1,11 @@
|
||||
"""Client-safe DTOs for the Dify knowledge-base Agenton layer.
|
||||
|
||||
The public layer config exposes only static retrieval controls: dataset ids,
|
||||
retrieval strategy, metadata filtering, and observation-size limits. The agent
|
||||
model itself should only ever see a single ``query`` tool argument; tenant/
|
||||
app/user context comes from the execution-context layer and the actual
|
||||
retrieval is delegated to the Dify API inner endpoint. Tool naming is not
|
||||
caller-configurable: the runtime always exposes the same stable knowledge-base
|
||||
search tool.
|
||||
The public layer config carries one or more named knowledge sets. Each set owns
|
||||
its dataset ids plus query, retrieval, and metadata-filtering policy. Generated-
|
||||
query sets are exposed through one stable model-visible search tool whose
|
||||
schema lets the model pick ``set_name`` and ``query``; user-query sets are
|
||||
retrieved eagerly when the layer enters a run and their formatted observations
|
||||
are kept only in JSON-safe ``runtime_state`` for session snapshots.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -61,6 +60,44 @@ class DifyKnowledgeRerankingModelConfig(BaseModel):
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class DifyKnowledgeDatasetConfig(BaseModel):
|
||||
"""One dataset selected by a knowledge set.
|
||||
|
||||
Only ``id`` is used for retrieval. ``name`` and ``description`` are retained
|
||||
because callers already have them and they are useful in runtime/debug
|
||||
snapshots without changing the inner retrieval request contract.
|
||||
"""
|
||||
|
||||
id: str
|
||||
name: str | None = None
|
||||
description: str | None = None
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
@field_validator("id")
|
||||
@classmethod
|
||||
def validate_id(cls, value: str) -> str:
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
raise ValueError("dataset id must not be blank")
|
||||
return normalized
|
||||
|
||||
|
||||
class DifyKnowledgeQueryConfig(BaseModel):
|
||||
"""Query policy for one knowledge set."""
|
||||
|
||||
mode: Literal["user_query", "generated_query"]
|
||||
value: str | None = None
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_mode_specific_fields(self) -> DifyKnowledgeQueryConfig:
|
||||
if self.mode == "user_query" and not (self.value or "").strip():
|
||||
raise ValueError("query.value is required for user_query mode")
|
||||
return self
|
||||
|
||||
|
||||
class DifyKnowledgeRetrievalConfig(BaseModel):
|
||||
"""Static retrieval controls mirrored into the inner API request."""
|
||||
|
||||
@ -151,38 +188,90 @@ class DifyKnowledgeMetadataFilteringConfig(BaseModel):
|
||||
return payload
|
||||
|
||||
|
||||
class DifyKnowledgeBaseLayerConfig(LayerConfig):
|
||||
"""Public config for one model-visible knowledge search tool.
|
||||
class DifyKnowledgeSetConfig(BaseModel):
|
||||
"""One independently searchable or eagerly-preloaded knowledge set."""
|
||||
|
||||
The model only gets to choose whether to call the tool and what ``query``
|
||||
to send. Dataset ids, retrieval settings, metadata filtering, and caller
|
||||
context remain config/runtime concerns outside the model-visible tool
|
||||
schema. The tool name and description are fixed by the layer runtime and do
|
||||
not appear in the public config DTO.
|
||||
"""
|
||||
|
||||
dataset_ids: list[str]
|
||||
id: str
|
||||
name: str
|
||||
description: str | None = None
|
||||
datasets: list[DifyKnowledgeDatasetConfig]
|
||||
query: DifyKnowledgeQueryConfig
|
||||
retrieval: DifyKnowledgeRetrievalConfig
|
||||
metadata_filtering: DifyKnowledgeMetadataFilteringConfig = Field(
|
||||
default_factory=DifyKnowledgeMetadataFilteringConfig
|
||||
)
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
@field_validator("id", "name")
|
||||
@classmethod
|
||||
def validate_non_blank_identity(cls, value: str) -> str:
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
raise ValueError("knowledge set id and name must not be blank")
|
||||
return normalized
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_dataset_ids(self) -> DifyKnowledgeSetConfig:
|
||||
if not self.datasets:
|
||||
raise ValueError("knowledge set requires at least one dataset")
|
||||
dataset_ids = [dataset.id for dataset in self.datasets]
|
||||
if len(dataset_ids) != len(set(dataset_ids)):
|
||||
raise ValueError("knowledge set dataset ids must be unique")
|
||||
return self
|
||||
|
||||
@property
|
||||
def dataset_ids(self) -> list[str]:
|
||||
"""Return the selected dataset ids for the inner retrieval request."""
|
||||
return [dataset.id for dataset in self.datasets]
|
||||
|
||||
|
||||
class DifyKnowledgeEagerResult(BaseModel):
|
||||
"""JSON-safe eager user-query result stored in layer runtime state."""
|
||||
|
||||
set_id: str
|
||||
set_name: str
|
||||
query: str
|
||||
observation: str
|
||||
status: Literal["success", "empty", "temporarily_unavailable"]
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class DifyKnowledgeRuntimeState(BaseModel):
|
||||
"""Serializable eager-retrieval state stored in Agenton session snapshots."""
|
||||
|
||||
eager_config_fingerprint: str | None = None
|
||||
eager_results: list[DifyKnowledgeEagerResult] = Field(default_factory=list)
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", validate_assignment=True)
|
||||
|
||||
|
||||
class DifyKnowledgeBaseLayerConfig(LayerConfig):
|
||||
"""Public config for one knowledge-base layer.
|
||||
|
||||
The model-visible surface stays fixed to ``knowledge_base_search``. Set
|
||||
names are the only model-visible selection labels; dataset ids, retrieval
|
||||
controls, metadata filtering, and caller identity remain config/runtime
|
||||
concerns outside the tool schema.
|
||||
"""
|
||||
|
||||
sets: list[DifyKnowledgeSetConfig]
|
||||
max_result_content_chars: int = Field(default=2000, ge=1)
|
||||
max_observation_chars: int = Field(default=12000, ge=1)
|
||||
|
||||
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid")
|
||||
|
||||
@field_validator("dataset_ids")
|
||||
@classmethod
|
||||
def validate_dataset_ids(cls, value: list[str]) -> list[str]:
|
||||
if not value:
|
||||
raise ValueError("dataset_ids must contain at least one item")
|
||||
normalized_ids = [item.strip() for item in value]
|
||||
if any(not item for item in normalized_ids):
|
||||
raise ValueError("dataset_ids must not contain blank items")
|
||||
return normalized_ids
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
|
||||
def validate_sets_and_observation_limits(self) -> DifyKnowledgeBaseLayerConfig:
|
||||
if not self.sets:
|
||||
raise ValueError("sets must contain at least one knowledge set")
|
||||
set_ids = [knowledge_set.id for knowledge_set in self.sets]
|
||||
if len(set_ids) != len(set(set_ids)):
|
||||
raise ValueError("knowledge set ids must be unique")
|
||||
normalized_names = [knowledge_set.name.strip().lower() for knowledge_set in self.sets]
|
||||
if len(normalized_names) != len(set(normalized_names)):
|
||||
raise ValueError("knowledge set names must be unique")
|
||||
if self.max_observation_chars < self.max_result_content_chars:
|
||||
raise ValueError("max_observation_chars must be greater than or equal to max_result_content_chars")
|
||||
return self
|
||||
@ -191,10 +280,15 @@ class DifyKnowledgeBaseLayerConfig(LayerConfig):
|
||||
__all__ = [
|
||||
"DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID",
|
||||
"DifyKnowledgeBaseLayerConfig",
|
||||
"DifyKnowledgeDatasetConfig",
|
||||
"DifyKnowledgeEagerResult",
|
||||
"DifyKnowledgeMetadataCondition",
|
||||
"DifyKnowledgeMetadataConditions",
|
||||
"DifyKnowledgeMetadataFilteringConfig",
|
||||
"DifyKnowledgeModelConfig",
|
||||
"DifyKnowledgeQueryConfig",
|
||||
"DifyKnowledgeRerankingModelConfig",
|
||||
"DifyKnowledgeRetrievalConfig",
|
||||
"DifyKnowledgeRuntimeState",
|
||||
"DifyKnowledgeSetConfig",
|
||||
]
|
||||
|
||||
@ -1,17 +1,18 @@
|
||||
"""Dify knowledge-base layer exposing one model-visible search tool.
|
||||
"""Dify knowledge-base layer exposing set-aware retrieval.
|
||||
|
||||
The layer depends on ``DifyExecutionContextLayer`` for tenant/app/user/invoke
|
||||
identity, keeps retrieval controls in config only, and borrows a lifespan-owned
|
||||
HTTP client for each tool invocation. It never owns live clients or stores
|
||||
retrieved source content in layer state. Tool identity is intentionally fixed at
|
||||
runtime: callers cannot rename the knowledge tool or override its description
|
||||
through public layer config because the model-visible surface must stay stable
|
||||
across API-side Agent Soul mappings.
|
||||
identity. Generated-query sets become one stable model-visible
|
||||
``knowledge_base_search(set_name, query)`` tool, while user-query sets are
|
||||
retrieved eagerly during context entry and exposed as additional user prompt
|
||||
content. Eager observations are persisted only as JSON-safe runtime state so
|
||||
Agenton session snapshots can resume without repeating unchanged retrievals.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from typing import ClassVar, cast
|
||||
|
||||
@ -27,7 +28,13 @@ from dify_agent.layers.knowledge.client import (
|
||||
DifyKnowledgeBaseClientError,
|
||||
DifyKnowledgeRetrieveResponse,
|
||||
)
|
||||
from dify_agent.layers.knowledge.configs import DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID, DifyKnowledgeBaseLayerConfig
|
||||
from dify_agent.layers.knowledge.configs import (
|
||||
DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID,
|
||||
DifyKnowledgeBaseLayerConfig,
|
||||
DifyKnowledgeEagerResult,
|
||||
DifyKnowledgeRuntimeState,
|
||||
DifyKnowledgeSetConfig,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -35,23 +42,14 @@ logger = logging.getLogger(__name__)
|
||||
# public DTO cannot grow a parallel naming contract that diverges from the
|
||||
# runtime knowledge-search surface.
|
||||
_KNOWLEDGE_BASE_TOOL_NAME = "knowledge_base_search"
|
||||
_KNOWLEDGE_BASE_TOOL_DESCRIPTION = "Search configured knowledge bases for information relevant to the query."
|
||||
_KNOWLEDGE_BASE_TOOL_DESCRIPTION = (
|
||||
"Search a configured knowledge set. Pick one configured set_name and provide a focused search query."
|
||||
)
|
||||
BLANK_QUERY_OBSERVATION = "knowledge base search requires a non-empty query"
|
||||
NO_RESULTS_OBSERVATION = "No relevant knowledge base results were found."
|
||||
TEMPORARY_UNAVAILABLE_OBSERVATION = (
|
||||
"Knowledge base search is temporarily unavailable. Please continue without it if possible."
|
||||
)
|
||||
QUERY_TOOL_SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query for the configured knowledge bases.",
|
||||
}
|
||||
},
|
||||
"required": ["query"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
class DifyKnowledgeBaseDeps(LayerDeps):
|
||||
@ -61,8 +59,10 @@ class DifyKnowledgeBaseDeps(LayerDeps):
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig]):
|
||||
"""Layer that resolves one config-scoped knowledge search tool."""
|
||||
class DifyKnowledgeBaseLayer(
|
||||
PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBaseLayerConfig, DifyKnowledgeRuntimeState]
|
||||
):
|
||||
"""Layer that resolves set-scoped knowledge tools and eager user prompts."""
|
||||
|
||||
type_id: ClassVar[str | None] = DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID
|
||||
|
||||
@ -95,7 +95,7 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
|
||||
)
|
||||
|
||||
async def get_tools(self, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
|
||||
"""Build one Pydantic AI tool that exposes only ``query`` to the model.
|
||||
"""Build the unified generated-query Pydantic AI tool, when needed.
|
||||
|
||||
Knowledge tools depend on execution-context identity that is optional for
|
||||
other run types but mandatory here: ``tenant_id``, ``user_id``,
|
||||
@ -103,11 +103,15 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
|
||||
any HTTP request is attempted. Tool execution then follows a strict
|
||||
observation policy:
|
||||
|
||||
- unknown ``set_name`` returns a local validation observation;
|
||||
- blank ``query`` returns a local validation observation;
|
||||
- retryable client failures (timeouts, connection failures, HTTP
|
||||
``429``/``502``) become a temporary-unavailable observation;
|
||||
- non-retryable client failures are raised so the run fails fast.
|
||||
"""
|
||||
generated_sets = self._generated_query_sets()
|
||||
if not generated_sets:
|
||||
return []
|
||||
if http_client.is_closed:
|
||||
raise RuntimeError("DifyKnowledgeBaseLayer.get_tools() requires an open shared HTTP client.")
|
||||
|
||||
@ -118,54 +122,28 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
|
||||
api_key=self.inner_api_key,
|
||||
http_client=http_client,
|
||||
)
|
||||
set_by_name = {knowledge_set.name: knowledge_set for knowledge_set in generated_sets}
|
||||
|
||||
async def knowledge_base_search(_ctx: RunContext[object], query: str) -> str:
|
||||
async def knowledge_base_search(_ctx: RunContext[object], set_name: str, query: str) -> str:
|
||||
knowledge_set = set_by_name.get(set_name)
|
||||
if knowledge_set is None:
|
||||
return f"unknown knowledge set: {set_name}"
|
||||
normalized_query = query.strip()
|
||||
if not normalized_query:
|
||||
return BLANK_QUERY_OBSERVATION
|
||||
try:
|
||||
response = await client.retrieve(
|
||||
tenant_id=caller["tenant_id"],
|
||||
user_id=caller["user_id"],
|
||||
app_id=caller["app_id"],
|
||||
user_from=caller["user_from"],
|
||||
invoke_from=caller["invoke_from"],
|
||||
dataset_ids=list(self.config.dataset_ids),
|
||||
query=normalized_query,
|
||||
retrieval=self.config.retrieval,
|
||||
metadata_filtering=self.config.metadata_filtering,
|
||||
)
|
||||
except DifyKnowledgeBaseClientError as exc:
|
||||
if exc.retryable:
|
||||
logger.warning(
|
||||
"knowledge base search temporarily unavailable",
|
||||
extra={
|
||||
"tenant_id": caller["tenant_id"],
|
||||
"app_id": caller["app_id"],
|
||||
"invoke_from": caller["invoke_from"],
|
||||
"error_code": exc.error_code,
|
||||
"status_code": exc.status_code,
|
||||
},
|
||||
)
|
||||
return TEMPORARY_UNAVAILABLE_OBSERVATION
|
||||
logger.error(
|
||||
"knowledge base search failed",
|
||||
extra={
|
||||
"tenant_id": caller["tenant_id"],
|
||||
"app_id": caller["app_id"],
|
||||
"invoke_from": caller["invoke_from"],
|
||||
"error_code": exc.error_code,
|
||||
"status_code": exc.status_code,
|
||||
},
|
||||
)
|
||||
raise
|
||||
return _format_observation(response, self.config)
|
||||
return await self._retrieve_for_set(
|
||||
client=client,
|
||||
caller=caller,
|
||||
knowledge_set=knowledge_set,
|
||||
query=normalized_query,
|
||||
retryable_observation=True,
|
||||
)
|
||||
|
||||
async def prepare_tool_definition(_ctx: RunContext[object], tool_def: ToolDefinition) -> ToolDefinition:
|
||||
return ToolDefinition(
|
||||
name=tool_def.name,
|
||||
description=tool_def.description,
|
||||
parameters_json_schema=QUERY_TOOL_SCHEMA,
|
||||
parameters_json_schema=_tool_schema(generated_sets),
|
||||
strict=tool_def.strict,
|
||||
sequential=tool_def.sequential,
|
||||
metadata=tool_def.metadata,
|
||||
@ -181,11 +159,177 @@ class DifyKnowledgeBaseLayer(PlainLayer[DifyKnowledgeBaseDeps, DifyKnowledgeBase
|
||||
knowledge_base_search,
|
||||
takes_ctx=True,
|
||||
name=_KNOWLEDGE_BASE_TOOL_NAME,
|
||||
description=_KNOWLEDGE_BASE_TOOL_DESCRIPTION,
|
||||
description=_tool_description(generated_sets),
|
||||
prepare=prepare_tool_definition,
|
||||
)
|
||||
]
|
||||
|
||||
@property
|
||||
@override
|
||||
def user_prompts(self) -> list[str]:
|
||||
"""Expose eager user-query results as an additional user prompt."""
|
||||
if not self.runtime_state.eager_results:
|
||||
return []
|
||||
|
||||
sections: list[str] = []
|
||||
for result in self.runtime_state.eager_results:
|
||||
sections.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Set: {result.set_name}",
|
||||
f"Query: {result.query}",
|
||||
"Results:",
|
||||
result.observation,
|
||||
]
|
||||
)
|
||||
)
|
||||
return ["Knowledge retrieval results:\n\n" + "\n\n".join(sections)]
|
||||
|
||||
@override
|
||||
async def on_context_create(self) -> None:
|
||||
await self._refresh_eager_results_if_needed()
|
||||
|
||||
@override
|
||||
async def on_context_resume(self) -> None:
|
||||
await self._refresh_eager_results_if_needed()
|
||||
|
||||
def _generated_query_sets(self) -> list[DifyKnowledgeSetConfig]:
|
||||
return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "generated_query"]
|
||||
|
||||
def _user_query_sets(self) -> list[DifyKnowledgeSetConfig]:
|
||||
return [knowledge_set for knowledge_set in self.config.sets if knowledge_set.query.mode == "user_query"]
|
||||
|
||||
async def _refresh_eager_results_if_needed(self) -> None:
|
||||
user_query_sets = self._user_query_sets()
|
||||
if not user_query_sets:
|
||||
self.runtime_state.eager_config_fingerprint = None
|
||||
self.runtime_state.eager_results = []
|
||||
return
|
||||
|
||||
fingerprint = _eager_config_fingerprint(user_query_sets)
|
||||
if self.runtime_state.eager_config_fingerprint == fingerprint:
|
||||
return
|
||||
|
||||
caller = _build_caller_context(self.deps.execution_context.config)
|
||||
async with httpx.AsyncClient() as http_client:
|
||||
client = DifyKnowledgeBaseClient(
|
||||
base_url=self.inner_api_url,
|
||||
api_key=self.inner_api_key,
|
||||
http_client=http_client,
|
||||
)
|
||||
eager_results: list[DifyKnowledgeEagerResult] = []
|
||||
for knowledge_set in user_query_sets:
|
||||
query = (knowledge_set.query.value or "").strip()
|
||||
try:
|
||||
response = await client.retrieve(
|
||||
tenant_id=caller["tenant_id"],
|
||||
user_id=caller["user_id"],
|
||||
app_id=caller["app_id"],
|
||||
user_from=caller["user_from"],
|
||||
invoke_from=caller["invoke_from"],
|
||||
dataset_ids=knowledge_set.dataset_ids,
|
||||
query=query,
|
||||
retrieval=knowledge_set.retrieval,
|
||||
metadata_filtering=knowledge_set.metadata_filtering,
|
||||
)
|
||||
except DifyKnowledgeBaseClientError as exc:
|
||||
if exc.retryable:
|
||||
logger.warning(
|
||||
"eager knowledge retrieval temporarily unavailable",
|
||||
extra={
|
||||
"tenant_id": caller["tenant_id"],
|
||||
"app_id": caller["app_id"],
|
||||
"invoke_from": caller["invoke_from"],
|
||||
"knowledge_set_id": knowledge_set.id,
|
||||
"error_code": exc.error_code,
|
||||
"status_code": exc.status_code,
|
||||
},
|
||||
)
|
||||
eager_results.append(
|
||||
DifyKnowledgeEagerResult(
|
||||
set_id=knowledge_set.id,
|
||||
set_name=knowledge_set.name,
|
||||
query=query,
|
||||
observation=TEMPORARY_UNAVAILABLE_OBSERVATION,
|
||||
status="temporarily_unavailable",
|
||||
)
|
||||
)
|
||||
continue
|
||||
logger.error(
|
||||
"eager knowledge retrieval failed",
|
||||
extra={
|
||||
"tenant_id": caller["tenant_id"],
|
||||
"app_id": caller["app_id"],
|
||||
"invoke_from": caller["invoke_from"],
|
||||
"knowledge_set_id": knowledge_set.id,
|
||||
"error_code": exc.error_code,
|
||||
"status_code": exc.status_code,
|
||||
},
|
||||
)
|
||||
raise
|
||||
|
||||
eager_results.append(
|
||||
DifyKnowledgeEagerResult(
|
||||
set_id=knowledge_set.id,
|
||||
set_name=knowledge_set.name,
|
||||
query=query,
|
||||
observation=_format_observation(response, self.config, include_heading=False),
|
||||
status="success" if response.results else "empty",
|
||||
)
|
||||
)
|
||||
|
||||
self.runtime_state.eager_results = eager_results
|
||||
self.runtime_state.eager_config_fingerprint = fingerprint
|
||||
|
||||
async def _retrieve_for_set(
|
||||
self,
|
||||
*,
|
||||
client: DifyKnowledgeBaseClient,
|
||||
caller: dict[str, str],
|
||||
knowledge_set: DifyKnowledgeSetConfig,
|
||||
query: str,
|
||||
retryable_observation: bool,
|
||||
) -> str:
|
||||
try:
|
||||
response = await client.retrieve(
|
||||
tenant_id=caller["tenant_id"],
|
||||
user_id=caller["user_id"],
|
||||
app_id=caller["app_id"],
|
||||
user_from=caller["user_from"],
|
||||
invoke_from=caller["invoke_from"],
|
||||
dataset_ids=knowledge_set.dataset_ids,
|
||||
query=query,
|
||||
retrieval=knowledge_set.retrieval,
|
||||
metadata_filtering=knowledge_set.metadata_filtering,
|
||||
)
|
||||
except DifyKnowledgeBaseClientError as exc:
|
||||
if exc.retryable and retryable_observation:
|
||||
logger.warning(
|
||||
"knowledge base search temporarily unavailable",
|
||||
extra={
|
||||
"tenant_id": caller["tenant_id"],
|
||||
"app_id": caller["app_id"],
|
||||
"invoke_from": caller["invoke_from"],
|
||||
"knowledge_set_id": knowledge_set.id,
|
||||
"error_code": exc.error_code,
|
||||
"status_code": exc.status_code,
|
||||
},
|
||||
)
|
||||
return TEMPORARY_UNAVAILABLE_OBSERVATION
|
||||
logger.error(
|
||||
"knowledge base search failed",
|
||||
extra={
|
||||
"tenant_id": caller["tenant_id"],
|
||||
"app_id": caller["app_id"],
|
||||
"invoke_from": caller["invoke_from"],
|
||||
"knowledge_set_id": knowledge_set.id,
|
||||
"error_code": exc.error_code,
|
||||
"status_code": exc.status_code,
|
||||
},
|
||||
)
|
||||
raise
|
||||
return _format_observation(response, self.config)
|
||||
|
||||
|
||||
def _build_caller_context(execution_context: object) -> dict[str, str]:
|
||||
"""Extract the inner-API caller identity from execution-context config.
|
||||
@ -232,7 +376,56 @@ def _build_caller_context(execution_context: object) -> dict[str, str]:
|
||||
}
|
||||
|
||||
|
||||
def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKnowledgeBaseLayerConfig) -> str:
|
||||
def _tool_schema(generated_sets: list[DifyKnowledgeSetConfig]) -> dict[str, object]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"set_name": {
|
||||
"type": "string",
|
||||
"enum": [knowledge_set.name for knowledge_set in generated_sets],
|
||||
"description": "Knowledge set to search.",
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query for the selected knowledge set.",
|
||||
},
|
||||
},
|
||||
"required": ["set_name", "query"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
def _tool_description(generated_sets: list[DifyKnowledgeSetConfig]) -> str:
|
||||
set_descriptions = []
|
||||
for knowledge_set in generated_sets:
|
||||
if knowledge_set.description:
|
||||
set_descriptions.append(f"{knowledge_set.name}: {knowledge_set.description}")
|
||||
else:
|
||||
set_descriptions.append(knowledge_set.name)
|
||||
return f"{_KNOWLEDGE_BASE_TOOL_DESCRIPTION} Configured sets: {', '.join(set_descriptions)}."
|
||||
|
||||
|
||||
def _eager_config_fingerprint(user_query_sets: list[DifyKnowledgeSetConfig]) -> str:
|
||||
payload = [
|
||||
{
|
||||
"id": knowledge_set.id,
|
||||
"query": knowledge_set.query.model_dump(mode="json"),
|
||||
"dataset_ids": knowledge_set.dataset_ids,
|
||||
"retrieval": knowledge_set.retrieval.model_dump(mode="json"),
|
||||
"metadata_filtering": knowledge_set.metadata_filtering.model_dump(mode="json", by_alias=True),
|
||||
}
|
||||
for knowledge_set in user_query_sets
|
||||
]
|
||||
serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
|
||||
return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _format_observation(
|
||||
response: DifyKnowledgeRetrieveResponse,
|
||||
config: DifyKnowledgeBaseLayerConfig,
|
||||
*,
|
||||
include_heading: bool = True,
|
||||
) -> str:
|
||||
"""Render inner-API retrieval results into the model-visible tool response.
|
||||
|
||||
The formatting contract is intentionally simple and stable for the model:
|
||||
@ -248,7 +441,7 @@ def _format_observation(response: DifyKnowledgeRetrieveResponse, config: DifyKno
|
||||
if not response.results:
|
||||
return NO_RESULTS_OBSERVATION
|
||||
|
||||
lines = ["Knowledge base search results:"]
|
||||
lines = ["Knowledge base search results:"] if include_heading else []
|
||||
for index, result in enumerate(response.results, start=1):
|
||||
metadata = result.metadata
|
||||
title = result.title or metadata.document_name or "Untitled"
|
||||
@ -280,6 +473,5 @@ __all__ = [
|
||||
"DifyKnowledgeBaseDeps",
|
||||
"DifyKnowledgeBaseLayer",
|
||||
"NO_RESULTS_OBSERVATION",
|
||||
"QUERY_TOOL_SCHEMA",
|
||||
"TEMPORARY_UNAVAILABLE_OBSERVATION",
|
||||
]
|
||||
|
||||
@ -6,46 +6,142 @@ from dify_agent.layers.knowledge import DifyKnowledgeBaseLayerConfig
|
||||
|
||||
def _valid_config() -> dict[str, object]:
|
||||
return {
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 4,
|
||||
},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {
|
||||
"mode": "multiple",
|
||||
"top_k": 4,
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_knowledge_base_config_accepts_valid_multiple_mode() -> None:
|
||||
config = DifyKnowledgeBaseLayerConfig.model_validate(_valid_config())
|
||||
|
||||
assert config.dataset_ids == ["dataset-1"]
|
||||
assert config.retrieval.top_k == 4
|
||||
assert config.metadata_filtering.mode == "disabled"
|
||||
assert config.sets[0].dataset_ids == ["dataset-1"]
|
||||
assert config.sets[0].retrieval.top_k == 4
|
||||
assert config.sets[0].metadata_filtering.mode == "disabled"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
({"dataset_ids": [], "retrieval": {"mode": "multiple", "top_k": 4}}, "dataset_ids"),
|
||||
({"sets": []}, "sets"),
|
||||
({"tool_name": "knowledge_base_search", **_valid_config()}, "Extra inputs are not permitted"),
|
||||
({"tool_description": "Search knowledge", **_valid_config()}, "Extra inputs are not permitted"),
|
||||
({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "multiple"}}, "top_k"),
|
||||
({"dataset_ids": ["dataset-1"], "retrieval": {"mode": "single"}}, "retrieval.model"),
|
||||
(
|
||||
{
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"metadata_filtering": {"mode": "automatic"},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": ""}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
},
|
||||
"dataset id",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "user_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
]
|
||||
},
|
||||
"query.value",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple"},
|
||||
}
|
||||
]
|
||||
},
|
||||
"top_k",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "single"},
|
||||
}
|
||||
]
|
||||
},
|
||||
"retrieval.model",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"metadata_filtering": {"mode": "automatic"},
|
||||
}
|
||||
],
|
||||
},
|
||||
"metadata_filtering.model_config",
|
||||
),
|
||||
(
|
||||
{
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"metadata_filtering": {"mode": "manual"},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"metadata_filtering": {"mode": "manual"},
|
||||
}
|
||||
],
|
||||
},
|
||||
"metadata_filtering.conditions",
|
||||
),
|
||||
(
|
||||
{
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
{
|
||||
"id": "docs",
|
||||
"name": "support kb",
|
||||
"datasets": [{"id": "dataset-2"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
},
|
||||
]
|
||||
},
|
||||
"names must be unique",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_knowledge_base_config_rejects_invalid_inputs(payload: dict[str, object], expected_message: str) -> None:
|
||||
@ -57,8 +153,7 @@ def test_knowledge_base_config_rejects_observation_limit_smaller_than_result_lim
|
||||
with pytest.raises(ValidationError, match="max_observation_chars"):
|
||||
_ = DifyKnowledgeBaseLayerConfig.model_validate(
|
||||
{
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
**_valid_config(),
|
||||
"max_result_content_chars": 50,
|
||||
"max_observation_chars": 20,
|
||||
}
|
||||
|
||||
@ -8,7 +8,11 @@ from pydantic_ai import Tool
|
||||
from agenton.compositor import Compositor, LayerNode, LayerProvider
|
||||
from dify_agent.layers.execution_context import DifyExecutionContextLayerConfig
|
||||
from dify_agent.layers.execution_context.layer import DifyExecutionContextLayer
|
||||
from dify_agent.layers.knowledge.client import DifyKnowledgeBaseClientError
|
||||
from dify_agent.layers.knowledge.client import (
|
||||
DifyKnowledgeBaseClient,
|
||||
DifyKnowledgeBaseClientError,
|
||||
DifyKnowledgeRetrieveResponse,
|
||||
)
|
||||
from dify_agent.layers.knowledge.configs import DifyKnowledgeBaseLayerConfig
|
||||
from dify_agent.layers.knowledge.layer import (
|
||||
BLANK_QUERY_OBSERVATION,
|
||||
@ -32,10 +36,23 @@ def _execution_context_config(**overrides: object) -> DifyExecutionContextLayerC
|
||||
|
||||
|
||||
def _knowledge_config(**overrides: object) -> DifyKnowledgeBaseLayerConfig:
|
||||
payload: dict[str, object] = {
|
||||
"dataset_ids": ["dataset-1"],
|
||||
set_payload: dict[str, object] = {
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
for key in ("id", "name", "description", "datasets", "query", "retrieval", "metadata_filtering"):
|
||||
if key in overrides:
|
||||
set_payload[key] = overrides.pop(key)
|
||||
if "dataset_ids" in overrides:
|
||||
dataset_ids = overrides.pop("dataset_ids")
|
||||
assert isinstance(dataset_ids, list)
|
||||
set_payload["datasets"] = [{"id": dataset_id} for dataset_id in dataset_ids]
|
||||
payload: dict[str, object] = {
|
||||
"sets": [set_payload],
|
||||
}
|
||||
payload.update(overrides)
|
||||
return DifyKnowledgeBaseLayerConfig.model_validate(payload)
|
||||
|
||||
@ -62,7 +79,7 @@ def _knowledge_provider() -> LayerProvider[DifyKnowledgeBaseLayer]:
|
||||
)
|
||||
|
||||
|
||||
def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
|
||||
def test_knowledge_layer_exposes_one_set_scoped_tool_definition() -> None:
|
||||
async def scenario() -> None:
|
||||
compositor = Compositor(
|
||||
[
|
||||
@ -82,20 +99,23 @@ def test_knowledge_layer_exposes_one_query_only_tool_definition() -> None:
|
||||
tool_def = await tool.prepare_tool_def(None) # pyright: ignore[reportArgumentType]
|
||||
assert isinstance(tool, Tool)
|
||||
assert tool.name == "knowledge_base_search"
|
||||
assert tool.description == "Search configured knowledge bases for information relevant to the query."
|
||||
assert "Pick one configured set_name" in tool.description
|
||||
assert tool_def is not None
|
||||
assert (
|
||||
tool_def.description == "Search configured knowledge bases for information relevant to the query."
|
||||
)
|
||||
assert "Pick one configured set_name" in tool_def.description
|
||||
assert tool_def.parameters_json_schema == {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"set_name": {
|
||||
"type": "string",
|
||||
"enum": ["Support KB"],
|
||||
"description": "Knowledge set to search.",
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query for the configured knowledge bases.",
|
||||
}
|
||||
"description": "Search query for the selected knowledge set.",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
"required": ["set_name", "query"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
@ -119,12 +139,105 @@ def test_knowledge_layer_rejects_blank_query_locally() -> None:
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
result = await tool.function_schema.call({"query": " "}, None) # pyright: ignore[reportArgumentType]
|
||||
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": " "}, None
|
||||
)
|
||||
assert result == BLANK_QUERY_OBSERVATION
|
||||
|
||||
asyncio.run(scenario())
|
||||
|
||||
|
||||
def test_knowledge_layer_exposes_no_tool_when_all_sets_are_user_query(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
async def fake_retrieve(self: DifyKnowledgeBaseClient, **_kwargs: object) -> DifyKnowledgeRetrieveResponse:
|
||||
del self
|
||||
return DifyKnowledgeRetrieveResponse.model_validate({"results": [], "usage": {}})
|
||||
|
||||
monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
|
||||
|
||||
async def scenario() -> None:
|
||||
compositor = Compositor(
|
||||
[
|
||||
LayerNode("execution_context", _execution_context_provider()),
|
||||
LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
|
||||
]
|
||||
)
|
||||
async with httpx.AsyncClient() as http_client:
|
||||
async with compositor.enter(
|
||||
configs={
|
||||
"execution_context": _execution_context_config(),
|
||||
"knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
|
||||
}
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
assert await knowledge_layer.get_tools(http_client=http_client) == []
|
||||
|
||||
asyncio.run(scenario())
|
||||
|
||||
|
||||
def test_knowledge_layer_fetches_user_query_sets_on_context_entry(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
seen_requests: list[dict[str, object]] = []
|
||||
|
||||
async def fake_retrieve(self: DifyKnowledgeBaseClient, **kwargs: object) -> DifyKnowledgeRetrieveResponse:
|
||||
del self
|
||||
seen_requests.append(kwargs)
|
||||
return DifyKnowledgeRetrieveResponse.model_validate(
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"metadata": {
|
||||
"_source": "knowledge",
|
||||
"dataset_name": "Docs",
|
||||
"document_name": "Release.md",
|
||||
"score": 0.8,
|
||||
},
|
||||
"title": "Release",
|
||||
"files": [],
|
||||
"content": "Version notes",
|
||||
"summary": None,
|
||||
}
|
||||
],
|
||||
"usage": {},
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(DifyKnowledgeBaseClient, "retrieve", fake_retrieve)
|
||||
|
||||
async def scenario() -> None:
|
||||
compositor = Compositor(
|
||||
[
|
||||
LayerNode("execution_context", _execution_context_provider()),
|
||||
LayerNode("knowledge", _knowledge_provider(), deps={"execution_context": "execution_context"}),
|
||||
]
|
||||
)
|
||||
async with compositor.enter(
|
||||
configs={
|
||||
"execution_context": _execution_context_config(),
|
||||
"knowledge": _knowledge_config(query={"mode": "user_query", "value": "release notes"}),
|
||||
}
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
assert len(seen_requests) == 1
|
||||
assert seen_requests[0]["query"] == "release notes"
|
||||
assert seen_requests[0]["dataset_ids"] == ["dataset-1"]
|
||||
assert knowledge_layer.runtime_state.eager_config_fingerprint
|
||||
assert knowledge_layer.runtime_state.eager_results[0].status == "success"
|
||||
assert knowledge_layer.user_prompts == [
|
||||
"Knowledge retrieval results:\n\n"
|
||||
"Set: Support KB\n"
|
||||
"Query: release notes\n"
|
||||
"Results:\n"
|
||||
"1. Title: Release\n"
|
||||
" Dataset: Docs\n"
|
||||
" Document: Release.md\n"
|
||||
" Score: 0.8\n"
|
||||
" Content: Version notes"
|
||||
]
|
||||
await knowledge_layer.on_context_resume()
|
||||
assert len(seen_requests) == 1
|
||||
|
||||
asyncio.run(scenario())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("field_name", "field_value"),
|
||||
[
|
||||
@ -199,7 +312,9 @@ def test_knowledge_layer_formats_results_and_truncates_observation() -> None:
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert result.startswith("Knowledge base search results:\n1. Title: Guide")
|
||||
assert "Dataset: Docs" in result
|
||||
assert "Document: Guide.md" in result
|
||||
@ -229,7 +344,9 @@ def test_knowledge_layer_returns_no_results_observation() -> None:
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert result == NO_RESULTS_OBSERVATION
|
||||
|
||||
asyncio.run(scenario())
|
||||
@ -256,7 +373,9 @@ def test_knowledge_layer_converts_retryable_failures_into_observation() -> None:
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert result == TEMPORARY_UNAVAILABLE_OBSERVATION
|
||||
|
||||
asyncio.run(scenario())
|
||||
@ -289,7 +408,9 @@ def test_knowledge_layer_converts_retryable_transport_failures_into_observation(
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert result == TEMPORARY_UNAVAILABLE_OBSERVATION
|
||||
|
||||
asyncio.run(scenario())
|
||||
@ -317,7 +438,9 @@ def test_knowledge_layer_raises_non_retryable_client_errors() -> None:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
|
||||
await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert exc_info.value.status_code == 403
|
||||
|
||||
asyncio.run(scenario())
|
||||
@ -343,7 +466,9 @@ def test_knowledge_layer_raises_for_malformed_success_responses() -> None:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
with pytest.raises(DifyKnowledgeBaseClientError) as exc_info:
|
||||
await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert exc_info.value.error_code == "invalid_response"
|
||||
assert exc_info.value.retryable is False
|
||||
|
||||
@ -411,7 +536,9 @@ def test_knowledge_layer_sends_execution_context_and_static_config_to_inner_api(
|
||||
) as run:
|
||||
knowledge_layer = run.get_layer("knowledge", DifyKnowledgeBaseLayer)
|
||||
tool = (await knowledge_layer.get_tools(http_client=http_client))[0]
|
||||
result = await tool.function_schema.call({"query": "reset"}, None) # pyright: ignore[reportArgumentType]
|
||||
result = await tool.function_schema.call( # pyright: ignore[reportArgumentType]
|
||||
{"set_name": "Support KB", "query": "reset"}, None
|
||||
)
|
||||
assert result == NO_RESULTS_OBSERVATION
|
||||
|
||||
asyncio.run(scenario())
|
||||
|
||||
@ -995,7 +995,7 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
|
||||
return TestModel(custom_output_text="done") # pyright: ignore[reportReturnType]
|
||||
|
||||
async def fake_get_tools(self: DifyKnowledgeBaseLayer, *, http_client: httpx.AsyncClient) -> list[Tool[object]]:
|
||||
assert self.config.dataset_ids == ["dataset-1"]
|
||||
assert self.config.sets[0].dataset_ids == ["dataset-1"]
|
||||
assert http_client.headers.get("X-Test-Client") == "dify-api"
|
||||
return [Tool(knowledge_tool, name="knowledge_base_search")]
|
||||
|
||||
@ -1055,8 +1055,15 @@ def test_runner_passes_dynamic_dify_knowledge_tools_to_agent(monkeypatch: pytest
|
||||
deps={"execution_context": "execution_context"},
|
||||
config=DifyKnowledgeBaseLayerConfig.model_validate(
|
||||
{
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 4},
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
),
|
||||
|
||||
@ -231,8 +231,15 @@ def test_create_app_creates_scheduler_and_closes_after_shutdown(monkeypatch: pyt
|
||||
knowledge_layer = knowledge_provider.create_layer(
|
||||
DifyKnowledgeBaseLayerConfig.model_validate(
|
||||
{
|
||||
"dataset_ids": ["dataset-1"],
|
||||
"retrieval": {"mode": "multiple", "top_k": 2},
|
||||
"sets": [
|
||||
{
|
||||
"id": "support",
|
||||
"name": "Support KB",
|
||||
"datasets": [{"id": "dataset-1"}],
|
||||
"query": {"mode": "generated_query"},
|
||||
"retrieval": {"mode": "multiple", "top_k": 2},
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
@ -115,7 +115,7 @@ def test_protocol_and_dify_plugin_exports_do_not_import_server_only_modules() ->
|
||||
"assert dify_agent_layers_execution_context.__all__ == ['DIFY_EXECUTION_CONTEXT_LAYER_TYPE_ID', 'DifyExecutionContextAgentMode', 'DifyExecutionContextInvokeFrom', 'DifyExecutionContextLayerConfig', 'DifyExecutionContextUserFrom']",
|
||||
"assert dify_agent_layers_ask_human.__all__ == ['AskHumanAction', 'AskHumanActionStyle', 'AskHumanField', 'AskHumanFieldType', 'AskHumanFileField', 'AskHumanFileListField', 'AskHumanParagraphField', 'AskHumanResultStatus', 'AskHumanSelectField', 'AskHumanSelectOption', 'AskHumanSelectedAction', 'AskHumanToolArgs', 'AskHumanToolResult', 'AskHumanUrgency', 'DEFAULT_ASK_HUMAN_TOOL_DESCRIPTION', 'DIFY_ASK_HUMAN_LAYER_TYPE_ID', 'DifyAskHumanLayerConfig']",
|
||||
"assert dify_agent_layers_dify_plugin.__all__ == ['DIFY_PLUGIN_LLM_LAYER_TYPE_ID', 'DIFY_PLUGIN_TOOLS_LAYER_TYPE_ID', 'DifyPluginCredentialValue', 'DifyPluginLLMLayerConfig', 'DifyPluginToolCredentialType', 'DifyPluginToolConfig', 'DifyPluginToolOption', 'DifyPluginToolParameter', 'DifyPluginToolParameterForm', 'DifyPluginToolParameterType', 'DifyPluginToolsLayerConfig', 'DifyPluginToolValue']",
|
||||
"assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig']",
|
||||
"assert dify_agent_layers_knowledge.__all__ == ['DIFY_KNOWLEDGE_BASE_LAYER_TYPE_ID', 'DifyKnowledgeBaseLayerConfig', 'DifyKnowledgeDatasetConfig', 'DifyKnowledgeEagerResult', 'DifyKnowledgeMetadataCondition', 'DifyKnowledgeMetadataConditions', 'DifyKnowledgeMetadataFilteringConfig', 'DifyKnowledgeModelConfig', 'DifyKnowledgeQueryConfig', 'DifyKnowledgeRerankingModelConfig', 'DifyKnowledgeRetrievalConfig', 'DifyKnowledgeRuntimeState', 'DifyKnowledgeSetConfig']",
|
||||
"assert dify_agent_layers_output.__all__ == ['DIFY_OUTPUT_LAYER_TYPE_ID', 'DifyOutputLayerConfig']",
|
||||
"assert dify_agent_layers_shell.__all__ == ['DIFY_SHELL_LAYER_TYPE_ID', 'DifyShellCliToolConfig', 'DifyShellEnvVarConfig', 'DifyShellLayerConfig', 'DifyShellSandboxConfig', 'DifyShellSecretRefConfig']",
|
||||
],
|
||||
|
||||
@ -563,7 +563,7 @@ export type AgentComposerSoulCandidatesResponse = {
|
||||
cli_tools?: Array<AgentCliToolConfig>
|
||||
dify_tools?: Array<AgentComposerDifyToolCandidateResponse>
|
||||
human_contacts?: Array<AgentHumanContactConfig>
|
||||
knowledge_datasets?: Array<AgentKnowledgeDatasetConfig>
|
||||
knowledge_sets?: Array<AgentComposerKnowledgeSetCandidateResponse>
|
||||
}
|
||||
|
||||
export type ComposerCandidateCapabilities = {
|
||||
@ -926,9 +926,7 @@ export type AgentSoulHumanConfig = {
|
||||
}
|
||||
|
||||
export type AgentSoulKnowledgeConfig = {
|
||||
datasets?: Array<AgentKnowledgeDatasetConfig>
|
||||
query_config?: AgentKnowledgeQueryConfig
|
||||
query_mode?: AgentKnowledgeQueryMode | null
|
||||
sets?: Array<AgentKnowledgeSetConfig>
|
||||
}
|
||||
|
||||
export type AgentSoulMemoryConfig = {
|
||||
@ -1069,11 +1067,12 @@ export type AgentComposerDifyToolCandidateResponse = {
|
||||
tools_count?: number | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeDatasetConfig = {
|
||||
export type AgentComposerKnowledgeSetCandidateResponse = {
|
||||
datasets?: Array<AgentComposerKnowledgeDatasetCandidateResponse>
|
||||
description?: string | null
|
||||
id?: string | null
|
||||
name?: string | null
|
||||
[key: string]: unknown
|
||||
id: string
|
||||
missing_dataset_ids?: Array<string>
|
||||
name: string
|
||||
}
|
||||
|
||||
export type AgentModerationProviderConfig = {
|
||||
@ -1228,16 +1227,16 @@ export type AgentHumanToolConfig = {
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryConfig = {
|
||||
query?: string | null
|
||||
score_threshold?: number | null
|
||||
score_threshold_enabled?: boolean | null
|
||||
top_k?: number | null
|
||||
[key: string]: unknown
|
||||
export type AgentKnowledgeSetConfig = {
|
||||
datasets: Array<AgentKnowledgeDatasetConfig>
|
||||
description?: string | null
|
||||
id: string
|
||||
metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
|
||||
name: string
|
||||
query: AgentKnowledgeQueryConfig
|
||||
retrieval: AgentKnowledgeRetrievalConfig
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
|
||||
|
||||
export type AgentMemoryArtifactConfig = {
|
||||
id?: string | null
|
||||
name?: string | null
|
||||
@ -1376,6 +1375,13 @@ export type AgentPermissionConfig = {
|
||||
|
||||
export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'
|
||||
|
||||
export type AgentComposerKnowledgeDatasetCandidateResponse = {
|
||||
description?: string | null
|
||||
id?: string | null
|
||||
missing?: boolean
|
||||
name?: string | null
|
||||
}
|
||||
|
||||
export type AgentModerationIoConfig = {
|
||||
enabled?: boolean
|
||||
preset_response?: string | null
|
||||
@ -1404,6 +1410,34 @@ export type FormInputConfig
|
||||
|
||||
export type JsonValue2 = unknown
|
||||
|
||||
export type AgentKnowledgeDatasetConfig = {
|
||||
description?: string | null
|
||||
id?: string | null
|
||||
name?: string | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeMetadataFilteringConfig = {
|
||||
conditions?: AgentKnowledgeMetadataConditions | null
|
||||
mode?: 'automatic' | 'disabled' | 'manual'
|
||||
model_config?: AgentKnowledgeModelConfig | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryConfig = {
|
||||
mode: AgentKnowledgeQueryMode
|
||||
value?: string | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeRetrievalConfig = {
|
||||
mode: 'multiple' | 'single'
|
||||
model?: AgentKnowledgeModelConfig | null
|
||||
reranking_enable?: boolean
|
||||
reranking_mode?: string
|
||||
reranking_model?: AgentKnowledgeRerankingModelConfig | null
|
||||
score_threshold?: number | null
|
||||
top_k?: number | null
|
||||
weights?: AgentKnowledgeWeightedScoreConfig | null
|
||||
}
|
||||
|
||||
export type AgentModelResponseFormatConfig = {
|
||||
type?: string | null
|
||||
[key: string]: unknown
|
||||
@ -1454,6 +1488,38 @@ export type FileListInputConfig = {
|
||||
type?: 'file-list'
|
||||
}
|
||||
|
||||
export type AgentKnowledgeMetadataConditions = {
|
||||
conditions?: Array<AgentKnowledgeMetadataCondition>
|
||||
logical_operator?: 'and' | 'or'
|
||||
}
|
||||
|
||||
export type AgentKnowledgeModelConfig = {
|
||||
completion_params?: {
|
||||
[key: string]: unknown
|
||||
}
|
||||
mode: string
|
||||
name: string
|
||||
provider: string
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
|
||||
|
||||
export type AgentKnowledgeRerankingModelConfig = {
|
||||
model: string
|
||||
provider: string
|
||||
}
|
||||
|
||||
export type AgentKnowledgeWeightedScoreConfig = {
|
||||
keyword_setting?: {
|
||||
[key: string]: unknown
|
||||
} | null
|
||||
vector_setting?: {
|
||||
[key: string]: unknown
|
||||
} | null
|
||||
weight_type?: string | null
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export type StringSource = {
|
||||
selector?: Array<string>
|
||||
type: ValueSourceType
|
||||
@ -1470,6 +1536,30 @@ export type FileType = 'audio' | 'custom' | 'document' | 'image' | 'video'
|
||||
|
||||
export type FileTransferMethod = 'datasource_file' | 'local_file' | 'remote_url' | 'tool_file'
|
||||
|
||||
export type AgentKnowledgeMetadataCondition = {
|
||||
comparison_operator:
|
||||
| '<'
|
||||
| '='
|
||||
| '>'
|
||||
| 'after'
|
||||
| 'before'
|
||||
| 'contains'
|
||||
| 'empty'
|
||||
| 'end with'
|
||||
| 'in'
|
||||
| 'is'
|
||||
| 'is not'
|
||||
| 'not contains'
|
||||
| 'not empty'
|
||||
| 'not in'
|
||||
| 'start with'
|
||||
| '≠'
|
||||
| '≤'
|
||||
| '≥'
|
||||
name: string
|
||||
value?: string | Array<string> | number | null
|
||||
}
|
||||
|
||||
export type ValueSourceType = 'constant' | 'variable'
|
||||
|
||||
export type AgentAppPaginationWritable = {
|
||||
|
||||
@ -1022,15 +1022,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
|
||||
tools_count: z.int().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeDatasetConfig
|
||||
*/
|
||||
export const zAgentKnowledgeDatasetConfig = z.object({
|
||||
description: z.string().nullish(),
|
||||
id: z.string().max(255).nullish(),
|
||||
name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* SimpleAccount
|
||||
*/
|
||||
@ -1279,30 +1270,6 @@ export const zAgentSoulHumanConfig = z.object({
|
||||
tools: z.array(zAgentHumanToolConfig).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryConfig
|
||||
*/
|
||||
export const zAgentKnowledgeQueryConfig = z.object({
|
||||
query: z.string().nullish(),
|
||||
score_threshold: z.number().gte(0).lte(1).nullish(),
|
||||
score_threshold_enabled: z.boolean().nullish(),
|
||||
top_k: z.int().gte(1).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryMode
|
||||
*/
|
||||
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
|
||||
|
||||
/**
|
||||
* AgentSoulKnowledgeConfig
|
||||
*/
|
||||
export const zAgentSoulKnowledgeConfig = z.object({
|
||||
datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
|
||||
query_config: zAgentKnowledgeQueryConfig.optional(),
|
||||
query_mode: zAgentKnowledgeQueryMode.nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentMemoryArtifactConfig
|
||||
*/
|
||||
@ -1521,6 +1488,27 @@ export const zAgentCliToolConfig = z.object({
|
||||
tool_name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentComposerKnowledgeDatasetCandidateResponse
|
||||
*/
|
||||
export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
|
||||
description: z.string().nullish(),
|
||||
id: z.string().max(255).nullish(),
|
||||
missing: z.boolean().optional().default(false),
|
||||
name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentComposerKnowledgeSetCandidateResponse
|
||||
*/
|
||||
export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
|
||||
datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
|
||||
description: z.string().nullish(),
|
||||
id: z.string(),
|
||||
missing_dataset_ids: z.array(z.string()).optional(),
|
||||
name: z.string(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentComposerSoulCandidatesResponse
|
||||
*/
|
||||
@ -1528,7 +1516,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
|
||||
cli_tools: z.array(zAgentCliToolConfig).optional(),
|
||||
dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
|
||||
human_contacts: z.array(zAgentHumanContactConfig).optional(),
|
||||
knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
|
||||
knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
@ -1583,6 +1571,15 @@ export const zHumanInputFormSubmissionData = z.object({
|
||||
submitted_data: z.record(z.string(), zJsonValue2).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeDatasetConfig
|
||||
*/
|
||||
export const zAgentKnowledgeDatasetConfig = z.object({
|
||||
description: z.string().nullish(),
|
||||
id: z.string().max(255).nullish(),
|
||||
name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentModelResponseFormatConfig
|
||||
*/
|
||||
@ -1733,53 +1730,6 @@ export const zAgentSoulToolsConfig = z.object({
|
||||
dify_tools: z.array(zAgentSoulDifyToolConfig).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentSoulConfig
|
||||
*/
|
||||
export const zAgentSoulConfig = z.object({
|
||||
app_features: zAgentSoulAppFeaturesConfig.optional(),
|
||||
app_variables: z.array(zAppVariableConfig).optional(),
|
||||
env: zAgentSoulEnvConfig.optional(),
|
||||
human: zAgentSoulHumanConfig.optional(),
|
||||
knowledge: zAgentSoulKnowledgeConfig.optional(),
|
||||
memory: zAgentSoulMemoryConfig.optional(),
|
||||
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
|
||||
model: zAgentSoulModelConfig.nullish(),
|
||||
prompt: zAgentSoulPromptConfig.optional(),
|
||||
sandbox: zAgentSoulSandboxConfig.optional(),
|
||||
schema_version: z.int().optional().default(1),
|
||||
tools: zAgentSoulToolsConfig.optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentAppComposerResponse
|
||||
*/
|
||||
export const zAgentAppComposerResponse = z.object({
|
||||
active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
|
||||
agent: zAgentComposerAgentResponse,
|
||||
agent_soul: zAgentSoulConfig,
|
||||
save_options: z.array(zComposerSaveStrategy),
|
||||
validation: zComposerValidationFindingsResponse.nullish(),
|
||||
variant: z.literal('agent_app'),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentConfigSnapshotDetailResponse
|
||||
*/
|
||||
export const zAgentConfigSnapshotDetailResponse = z.object({
|
||||
agent_id: z.string().nullish(),
|
||||
config_snapshot: zAgentSoulConfig,
|
||||
created_at: z.int().nullish(),
|
||||
created_by: z.string().nullish(),
|
||||
display_version: z.int().nullish(),
|
||||
id: z.string(),
|
||||
revisions: z.array(zAgentConfigRevisionResponse).optional(),
|
||||
snapshot_version: z.int().nullish(),
|
||||
summary: z.string().nullish(),
|
||||
version: z.int(),
|
||||
version_note: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* OutputErrorStrategy
|
||||
*
|
||||
@ -1869,22 +1819,6 @@ export const zWorkflowNodeJobConfig = z.object({
|
||||
workflow_prompt: z.string().optional().default(''),
|
||||
})
|
||||
|
||||
/**
|
||||
* ComposerSavePayload
|
||||
*/
|
||||
export const zComposerSavePayload = z.object({
|
||||
agent_soul: zAgentSoulConfig.nullish(),
|
||||
binding: zComposerBindingPayload.nullish(),
|
||||
client_revision_id: z.string().nullish(),
|
||||
idempotency_key: z.string().nullish(),
|
||||
new_agent_name: z.string().min(1).max(255).nullish(),
|
||||
node_job: zWorkflowNodeJobConfig.nullish(),
|
||||
save_strategy: zComposerSaveStrategy,
|
||||
soul_lock: zComposerSoulLockPayload.optional(),
|
||||
variant: zComposerVariant,
|
||||
version_note: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* ButtonStyle
|
||||
*
|
||||
@ -1903,6 +1837,60 @@ export const zUserActionConfig = z.object({
|
||||
title: z.string().max(100),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeModelConfig
|
||||
*/
|
||||
export const zAgentKnowledgeModelConfig = z.object({
|
||||
completion_params: z.record(z.string(), z.unknown()).optional(),
|
||||
mode: z.string().min(1).max(64),
|
||||
name: z.string().min(1).max(255),
|
||||
provider: z.string().min(1).max(255),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryMode
|
||||
*/
|
||||
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryConfig
|
||||
*/
|
||||
export const zAgentKnowledgeQueryConfig = z.object({
|
||||
mode: zAgentKnowledgeQueryMode,
|
||||
value: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeRerankingModelConfig
|
||||
*/
|
||||
export const zAgentKnowledgeRerankingModelConfig = z.object({
|
||||
model: z.string().min(1).max(255),
|
||||
provider: z.string().min(1).max(255),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeWeightedScoreConfig
|
||||
*/
|
||||
export const zAgentKnowledgeWeightedScoreConfig = z.object({
|
||||
keyword_setting: z.record(z.string(), z.unknown()).nullish(),
|
||||
vector_setting: z.record(z.string(), z.unknown()).nullish(),
|
||||
weight_type: z.string().max(64).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeRetrievalConfig
|
||||
*/
|
||||
export const zAgentKnowledgeRetrievalConfig = z.object({
|
||||
mode: z.enum(['multiple', 'single']),
|
||||
model: zAgentKnowledgeModelConfig.nullish(),
|
||||
reranking_enable: z.boolean().optional().default(true),
|
||||
reranking_mode: z.string().optional().default('reranking_model'),
|
||||
reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
|
||||
score_threshold: z.number().gte(0).lte(1).nullish(),
|
||||
top_k: z.int().gte(1).nullish(),
|
||||
weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* FileType
|
||||
*/
|
||||
@ -1941,6 +1929,134 @@ export const zFileListInputConfig = z.object({
|
||||
type: z.literal('file-list').optional().default('file-list'),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeMetadataCondition
|
||||
*/
|
||||
export const zAgentKnowledgeMetadataCondition = z.object({
|
||||
comparison_operator: z.enum([
|
||||
'<',
|
||||
'=',
|
||||
'>',
|
||||
'after',
|
||||
'before',
|
||||
'contains',
|
||||
'empty',
|
||||
'end with',
|
||||
'in',
|
||||
'is',
|
||||
'is not',
|
||||
'not contains',
|
||||
'not empty',
|
||||
'not in',
|
||||
'start with',
|
||||
'≠',
|
||||
'≤',
|
||||
'≥',
|
||||
]),
|
||||
name: z.string().min(1).max(255),
|
||||
value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeMetadataConditions
|
||||
*/
|
||||
export const zAgentKnowledgeMetadataConditions = z.object({
|
||||
conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
|
||||
logical_operator: z.enum(['and', 'or']).optional().default('and'),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeMetadataFilteringConfig
|
||||
*/
|
||||
export const zAgentKnowledgeMetadataFilteringConfig = z.object({
|
||||
conditions: zAgentKnowledgeMetadataConditions.nullish(),
|
||||
mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
|
||||
model_config: zAgentKnowledgeModelConfig.nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeSetConfig
|
||||
*/
|
||||
export const zAgentKnowledgeSetConfig = z.object({
|
||||
datasets: z.array(zAgentKnowledgeDatasetConfig),
|
||||
description: z.string().nullish(),
|
||||
id: z.string().min(1).max(255),
|
||||
metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
|
||||
name: z.string().min(1).max(255),
|
||||
query: zAgentKnowledgeQueryConfig,
|
||||
retrieval: zAgentKnowledgeRetrievalConfig,
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentSoulKnowledgeConfig
|
||||
*/
|
||||
export const zAgentSoulKnowledgeConfig = z.object({
|
||||
sets: z.array(zAgentKnowledgeSetConfig).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentSoulConfig
|
||||
*/
|
||||
export const zAgentSoulConfig = z.object({
|
||||
app_features: zAgentSoulAppFeaturesConfig.optional(),
|
||||
app_variables: z.array(zAppVariableConfig).optional(),
|
||||
env: zAgentSoulEnvConfig.optional(),
|
||||
human: zAgentSoulHumanConfig.optional(),
|
||||
knowledge: zAgentSoulKnowledgeConfig.optional(),
|
||||
memory: zAgentSoulMemoryConfig.optional(),
|
||||
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
|
||||
model: zAgentSoulModelConfig.nullish(),
|
||||
prompt: zAgentSoulPromptConfig.optional(),
|
||||
sandbox: zAgentSoulSandboxConfig.optional(),
|
||||
schema_version: z.int().optional().default(1),
|
||||
tools: zAgentSoulToolsConfig.optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentAppComposerResponse
|
||||
*/
|
||||
export const zAgentAppComposerResponse = z.object({
|
||||
active_config_snapshot: zAgentConfigSnapshotSummaryResponse,
|
||||
agent: zAgentComposerAgentResponse,
|
||||
agent_soul: zAgentSoulConfig,
|
||||
save_options: z.array(zComposerSaveStrategy),
|
||||
validation: zComposerValidationFindingsResponse.nullish(),
|
||||
variant: z.literal('agent_app'),
|
||||
})
|
||||
|
||||
/**
|
||||
* ComposerSavePayload
|
||||
*/
|
||||
export const zComposerSavePayload = z.object({
|
||||
agent_soul: zAgentSoulConfig.nullish(),
|
||||
binding: zComposerBindingPayload.nullish(),
|
||||
client_revision_id: z.string().nullish(),
|
||||
idempotency_key: z.string().nullish(),
|
||||
new_agent_name: z.string().min(1).max(255).nullish(),
|
||||
node_job: zWorkflowNodeJobConfig.nullish(),
|
||||
save_strategy: zComposerSaveStrategy,
|
||||
soul_lock: zComposerSoulLockPayload.optional(),
|
||||
variant: zComposerVariant,
|
||||
version_note: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentConfigSnapshotDetailResponse
|
||||
*/
|
||||
export const zAgentConfigSnapshotDetailResponse = z.object({
|
||||
agent_id: z.string().nullish(),
|
||||
config_snapshot: zAgentSoulConfig,
|
||||
created_at: z.int().nullish(),
|
||||
created_by: z.string().nullish(),
|
||||
display_version: z.int().nullish(),
|
||||
id: z.string(),
|
||||
revisions: z.array(zAgentConfigRevisionResponse).optional(),
|
||||
snapshot_version: z.int().nullish(),
|
||||
summary: z.string().nullish(),
|
||||
version: z.int(),
|
||||
version_note: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* ValueSourceType
|
||||
*
|
||||
|
||||
@ -1890,7 +1890,7 @@ export type AgentComposerSoulCandidatesResponse = {
|
||||
cli_tools?: Array<AgentCliToolConfig>
|
||||
dify_tools?: Array<AgentComposerDifyToolCandidateResponse>
|
||||
human_contacts?: Array<AgentHumanContactConfig>
|
||||
knowledge_datasets?: Array<AgentKnowledgeDatasetConfig>
|
||||
knowledge_sets?: Array<AgentComposerKnowledgeSetCandidateResponse>
|
||||
}
|
||||
|
||||
export type ComposerCandidateCapabilities = {
|
||||
@ -2124,9 +2124,7 @@ export type AgentSoulHumanConfig = {
|
||||
}
|
||||
|
||||
export type AgentSoulKnowledgeConfig = {
|
||||
datasets?: Array<AgentKnowledgeDatasetConfig>
|
||||
query_config?: AgentKnowledgeQueryConfig
|
||||
query_mode?: AgentKnowledgeQueryMode | null
|
||||
sets?: Array<AgentKnowledgeSetConfig>
|
||||
}
|
||||
|
||||
export type AgentSoulMemoryConfig = {
|
||||
@ -2278,11 +2276,12 @@ export type AgentComposerDifyToolCandidateResponse = {
|
||||
tools_count?: number | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeDatasetConfig = {
|
||||
export type AgentComposerKnowledgeSetCandidateResponse = {
|
||||
datasets?: Array<AgentComposerKnowledgeDatasetCandidateResponse>
|
||||
description?: string | null
|
||||
id?: string | null
|
||||
name?: string | null
|
||||
[key: string]: unknown
|
||||
id: string
|
||||
missing_dataset_ids?: Array<string>
|
||||
name: string
|
||||
}
|
||||
|
||||
export type CheckResultView = {
|
||||
@ -2393,16 +2392,16 @@ export type AgentHumanToolConfig = {
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryConfig = {
|
||||
query?: string | null
|
||||
score_threshold?: number | null
|
||||
score_threshold_enabled?: boolean | null
|
||||
top_k?: number | null
|
||||
[key: string]: unknown
|
||||
export type AgentKnowledgeSetConfig = {
|
||||
datasets: Array<AgentKnowledgeDatasetConfig>
|
||||
description?: string | null
|
||||
id: string
|
||||
metadata_filtering?: AgentKnowledgeMetadataFilteringConfig
|
||||
name: string
|
||||
query: AgentKnowledgeQueryConfig
|
||||
retrieval: AgentKnowledgeRetrievalConfig
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
|
||||
|
||||
export type AgentMemoryArtifactConfig = {
|
||||
id?: string | null
|
||||
name?: string | null
|
||||
@ -2506,6 +2505,13 @@ export type AgentPermissionConfig = {
|
||||
|
||||
export type AgentCliToolRiskLevel = 'dangerous' | 'safe' | 'unknown'
|
||||
|
||||
export type AgentComposerKnowledgeDatasetCandidateResponse = {
|
||||
description?: string | null
|
||||
id?: string | null
|
||||
missing?: boolean
|
||||
name?: string | null
|
||||
}
|
||||
|
||||
export type ButtonStyle = 'accent' | 'default' | 'ghost' | 'primary'
|
||||
|
||||
export type ParagraphInputConfig = {
|
||||
@ -2545,6 +2551,34 @@ export type AgentModerationProviderConfig = {
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export type AgentKnowledgeDatasetConfig = {
|
||||
description?: string | null
|
||||
id?: string | null
|
||||
name?: string | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeMetadataFilteringConfig = {
|
||||
conditions?: AgentKnowledgeMetadataConditions | null
|
||||
mode?: 'automatic' | 'disabled' | 'manual'
|
||||
model_config?: AgentKnowledgeModelConfig | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryConfig = {
|
||||
mode: AgentKnowledgeQueryMode
|
||||
value?: string | null
|
||||
}
|
||||
|
||||
export type AgentKnowledgeRetrievalConfig = {
|
||||
mode: 'multiple' | 'single'
|
||||
model?: AgentKnowledgeModelConfig | null
|
||||
reranking_enable?: boolean
|
||||
reranking_mode?: string
|
||||
reranking_model?: AgentKnowledgeRerankingModelConfig | null
|
||||
score_threshold?: number | null
|
||||
top_k?: number | null
|
||||
weights?: AgentKnowledgeWeightedScoreConfig | null
|
||||
}
|
||||
|
||||
export type AgentModelResponseFormatConfig = {
|
||||
type?: string | null
|
||||
[key: string]: unknown
|
||||
@ -2578,8 +2612,64 @@ export type AgentModerationIoConfig = {
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export type AgentKnowledgeMetadataConditions = {
|
||||
conditions?: Array<AgentKnowledgeMetadataCondition>
|
||||
logical_operator?: 'and' | 'or'
|
||||
}
|
||||
|
||||
export type AgentKnowledgeModelConfig = {
|
||||
completion_params?: {
|
||||
[key: string]: unknown
|
||||
}
|
||||
mode: string
|
||||
name: string
|
||||
provider: string
|
||||
}
|
||||
|
||||
export type AgentKnowledgeQueryMode = 'generated_query' | 'user_query'
|
||||
|
||||
export type AgentKnowledgeRerankingModelConfig = {
|
||||
model: string
|
||||
provider: string
|
||||
}
|
||||
|
||||
export type AgentKnowledgeWeightedScoreConfig = {
|
||||
keyword_setting?: {
|
||||
[key: string]: unknown
|
||||
} | null
|
||||
vector_setting?: {
|
||||
[key: string]: unknown
|
||||
} | null
|
||||
weight_type?: string | null
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export type ValueSourceType = 'constant' | 'variable'
|
||||
|
||||
export type AgentKnowledgeMetadataCondition = {
|
||||
comparison_operator:
|
||||
| '<'
|
||||
| '='
|
||||
| '>'
|
||||
| 'after'
|
||||
| 'before'
|
||||
| 'contains'
|
||||
| 'empty'
|
||||
| 'end with'
|
||||
| 'in'
|
||||
| 'is'
|
||||
| 'is not'
|
||||
| 'not contains'
|
||||
| 'not empty'
|
||||
| 'not in'
|
||||
| 'start with'
|
||||
| '≠'
|
||||
| '≤'
|
||||
| '≥'
|
||||
name: string
|
||||
value?: string | Array<string> | number | null
|
||||
}
|
||||
|
||||
export type AppPaginationWritable = {
|
||||
data: Array<AppPartialWritable>
|
||||
has_more: boolean
|
||||
|
||||
@ -2629,15 +2629,6 @@ export const zAgentComposerDifyToolCandidateResponse = z.object({
|
||||
tools_count: z.int().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeDatasetConfig
|
||||
*/
|
||||
export const zAgentKnowledgeDatasetConfig = z.object({
|
||||
description: z.string().nullish(),
|
||||
id: z.string().max(255).nullish(),
|
||||
name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* CheckResultView
|
||||
*
|
||||
@ -2767,30 +2758,6 @@ export const zAgentSoulHumanConfig = z.object({
|
||||
tools: z.array(zAgentHumanToolConfig).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryConfig
|
||||
*/
|
||||
export const zAgentKnowledgeQueryConfig = z.object({
|
||||
query: z.string().nullish(),
|
||||
score_threshold: z.number().gte(0).lte(1).nullish(),
|
||||
score_threshold_enabled: z.boolean().nullish(),
|
||||
top_k: z.int().gte(1).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryMode
|
||||
*/
|
||||
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
|
||||
|
||||
/**
|
||||
* AgentSoulKnowledgeConfig
|
||||
*/
|
||||
export const zAgentSoulKnowledgeConfig = z.object({
|
||||
datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
|
||||
query_config: zAgentKnowledgeQueryConfig.optional(),
|
||||
query_mode: zAgentKnowledgeQueryMode.nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentMemoryArtifactConfig
|
||||
*/
|
||||
@ -3002,6 +2969,27 @@ export const zAgentCliToolConfig = z.object({
|
||||
tool_name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentComposerKnowledgeDatasetCandidateResponse
|
||||
*/
|
||||
export const zAgentComposerKnowledgeDatasetCandidateResponse = z.object({
|
||||
description: z.string().nullish(),
|
||||
id: z.string().max(255).nullish(),
|
||||
missing: z.boolean().optional().default(false),
|
||||
name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentComposerKnowledgeSetCandidateResponse
|
||||
*/
|
||||
export const zAgentComposerKnowledgeSetCandidateResponse = z.object({
|
||||
datasets: z.array(zAgentComposerKnowledgeDatasetCandidateResponse).optional(),
|
||||
description: z.string().nullish(),
|
||||
id: z.string(),
|
||||
missing_dataset_ids: z.array(z.string()).optional(),
|
||||
name: z.string(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentComposerSoulCandidatesResponse
|
||||
*/
|
||||
@ -3009,7 +2997,7 @@ export const zAgentComposerSoulCandidatesResponse = z.object({
|
||||
cli_tools: z.array(zAgentCliToolConfig).optional(),
|
||||
dify_tools: z.array(zAgentComposerDifyToolCandidateResponse).optional(),
|
||||
human_contacts: z.array(zAgentHumanContactConfig).optional(),
|
||||
knowledge_datasets: z.array(zAgentKnowledgeDatasetConfig).optional(),
|
||||
knowledge_sets: z.array(zAgentComposerKnowledgeSetCandidateResponse).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
@ -3041,6 +3029,15 @@ export const zUserActionConfig = z.object({
|
||||
title: z.string().max(100),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeDatasetConfig
|
||||
*/
|
||||
export const zAgentKnowledgeDatasetConfig = z.object({
|
||||
description: z.string().nullish(),
|
||||
id: z.string().max(255).nullish(),
|
||||
name: z.string().max(255).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentModelResponseFormatConfig
|
||||
*/
|
||||
@ -3292,57 +3289,57 @@ export const zAgentSoulAppFeaturesConfig = z.object({
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentSoulConfig
|
||||
* AgentKnowledgeModelConfig
|
||||
*/
|
||||
export const zAgentSoulConfig = z.object({
|
||||
app_features: zAgentSoulAppFeaturesConfig.optional(),
|
||||
app_variables: z.array(zAppVariableConfig).optional(),
|
||||
env: zAgentSoulEnvConfig.optional(),
|
||||
human: zAgentSoulHumanConfig.optional(),
|
||||
knowledge: zAgentSoulKnowledgeConfig.optional(),
|
||||
memory: zAgentSoulMemoryConfig.optional(),
|
||||
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
|
||||
model: zAgentSoulModelConfig.nullish(),
|
||||
prompt: zAgentSoulPromptConfig.optional(),
|
||||
sandbox: zAgentSoulSandboxConfig.optional(),
|
||||
schema_version: z.int().optional().default(1),
|
||||
tools: zAgentSoulToolsConfig.optional(),
|
||||
export const zAgentKnowledgeModelConfig = z.object({
|
||||
completion_params: z.record(z.string(), z.unknown()).optional(),
|
||||
mode: z.string().min(1).max(64),
|
||||
name: z.string().min(1).max(255),
|
||||
provider: z.string().min(1).max(255),
|
||||
})
|
||||
|
||||
/**
|
||||
* WorkflowAgentComposerResponse
|
||||
* AgentKnowledgeQueryMode
|
||||
*/
|
||||
export const zWorkflowAgentComposerResponse = z.object({
|
||||
active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
|
||||
agent: zAgentComposerAgentResponse.nullish(),
|
||||
agent_soul: zAgentSoulConfig,
|
||||
app_id: z.string().nullish(),
|
||||
binding: zAgentComposerBindingResponse.nullish(),
|
||||
effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
|
||||
impact_summary: zAgentComposerImpactResponse.nullish(),
|
||||
node_id: z.string().nullish(),
|
||||
node_job: zWorkflowNodeJobConfig,
|
||||
save_options: z.array(zComposerSaveStrategy),
|
||||
soul_lock: zAgentComposerSoulLockResponse,
|
||||
validation: zComposerValidationFindingsResponse.nullish(),
|
||||
variant: z.literal('workflow'),
|
||||
workflow_id: z.string().nullish(),
|
||||
export const zAgentKnowledgeQueryMode = z.enum(['generated_query', 'user_query'])
|
||||
|
||||
/**
|
||||
* AgentKnowledgeQueryConfig
|
||||
*/
|
||||
export const zAgentKnowledgeQueryConfig = z.object({
|
||||
mode: zAgentKnowledgeQueryMode,
|
||||
value: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* ComposerSavePayload
|
||||
* AgentKnowledgeRerankingModelConfig
|
||||
*/
|
||||
export const zComposerSavePayload = z.object({
|
||||
agent_soul: zAgentSoulConfig.nullish(),
|
||||
binding: zComposerBindingPayload.nullish(),
|
||||
client_revision_id: z.string().nullish(),
|
||||
idempotency_key: z.string().nullish(),
|
||||
new_agent_name: z.string().min(1).max(255).nullish(),
|
||||
node_job: zWorkflowNodeJobConfig.nullish(),
|
||||
save_strategy: zComposerSaveStrategy,
|
||||
soul_lock: zComposerSoulLockPayload.optional(),
|
||||
variant: zComposerVariant,
|
||||
version_note: z.string().nullish(),
|
||||
export const zAgentKnowledgeRerankingModelConfig = z.object({
|
||||
model: z.string().min(1).max(255),
|
||||
provider: z.string().min(1).max(255),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeWeightedScoreConfig
|
||||
*/
|
||||
export const zAgentKnowledgeWeightedScoreConfig = z.object({
|
||||
keyword_setting: z.record(z.string(), z.unknown()).nullish(),
|
||||
vector_setting: z.record(z.string(), z.unknown()).nullish(),
|
||||
weight_type: z.string().max(64).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeRetrievalConfig
|
||||
*/
|
||||
export const zAgentKnowledgeRetrievalConfig = z.object({
|
||||
mode: z.enum(['multiple', 'single']),
|
||||
model: zAgentKnowledgeModelConfig.nullish(),
|
||||
reranking_enable: z.boolean().optional().default(true),
|
||||
reranking_mode: z.string().optional().default('reranking_model'),
|
||||
reranking_model: zAgentKnowledgeRerankingModelConfig.nullish(),
|
||||
score_threshold: z.number().gte(0).lte(1).nullish(),
|
||||
top_k: z.int().gte(1).nullish(),
|
||||
weights: zAgentKnowledgeWeightedScoreConfig.nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
@ -3466,6 +3463,125 @@ export const zMessageInfiniteScrollPaginationResponse = z.object({
|
||||
limit: z.int(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeMetadataCondition
|
||||
*/
|
||||
export const zAgentKnowledgeMetadataCondition = z.object({
|
||||
comparison_operator: z.enum([
|
||||
'<',
|
||||
'=',
|
||||
'>',
|
||||
'after',
|
||||
'before',
|
||||
'contains',
|
||||
'empty',
|
||||
'end with',
|
||||
'in',
|
||||
'is',
|
||||
'is not',
|
||||
'not contains',
|
||||
'not empty',
|
||||
'not in',
|
||||
'start with',
|
||||
'≠',
|
||||
'≤',
|
||||
'≥',
|
||||
]),
|
||||
name: z.string().min(1).max(255),
|
||||
value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeMetadataConditions
|
||||
*/
|
||||
export const zAgentKnowledgeMetadataConditions = z.object({
|
||||
conditions: z.array(zAgentKnowledgeMetadataCondition).optional(),
|
||||
logical_operator: z.enum(['and', 'or']).optional().default('and'),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeMetadataFilteringConfig
|
||||
*/
|
||||
export const zAgentKnowledgeMetadataFilteringConfig = z.object({
|
||||
conditions: zAgentKnowledgeMetadataConditions.nullish(),
|
||||
mode: z.enum(['automatic', 'disabled', 'manual']).optional().default('disabled'),
|
||||
model_config: zAgentKnowledgeModelConfig.nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentKnowledgeSetConfig
|
||||
*/
|
||||
export const zAgentKnowledgeSetConfig = z.object({
|
||||
datasets: z.array(zAgentKnowledgeDatasetConfig),
|
||||
description: z.string().nullish(),
|
||||
id: z.string().min(1).max(255),
|
||||
metadata_filtering: zAgentKnowledgeMetadataFilteringConfig.optional(),
|
||||
name: z.string().min(1).max(255),
|
||||
query: zAgentKnowledgeQueryConfig,
|
||||
retrieval: zAgentKnowledgeRetrievalConfig,
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentSoulKnowledgeConfig
|
||||
*/
|
||||
export const zAgentSoulKnowledgeConfig = z.object({
|
||||
sets: z.array(zAgentKnowledgeSetConfig).optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* AgentSoulConfig
|
||||
*/
|
||||
export const zAgentSoulConfig = z.object({
|
||||
app_features: zAgentSoulAppFeaturesConfig.optional(),
|
||||
app_variables: z.array(zAppVariableConfig).optional(),
|
||||
env: zAgentSoulEnvConfig.optional(),
|
||||
human: zAgentSoulHumanConfig.optional(),
|
||||
knowledge: zAgentSoulKnowledgeConfig.optional(),
|
||||
memory: zAgentSoulMemoryConfig.optional(),
|
||||
misc_legacy: zAgentSoulAppFeaturesConfig.optional(),
|
||||
model: zAgentSoulModelConfig.nullish(),
|
||||
prompt: zAgentSoulPromptConfig.optional(),
|
||||
sandbox: zAgentSoulSandboxConfig.optional(),
|
||||
schema_version: z.int().optional().default(1),
|
||||
tools: zAgentSoulToolsConfig.optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* WorkflowAgentComposerResponse
|
||||
*/
|
||||
export const zWorkflowAgentComposerResponse = z.object({
|
||||
active_config_snapshot: zAgentConfigSnapshotSummaryResponse.nullish(),
|
||||
agent: zAgentComposerAgentResponse.nullish(),
|
||||
agent_soul: zAgentSoulConfig,
|
||||
app_id: z.string().nullish(),
|
||||
binding: zAgentComposerBindingResponse.nullish(),
|
||||
effective_declared_outputs: z.array(zDeclaredOutputConfig).optional(),
|
||||
impact_summary: zAgentComposerImpactResponse.nullish(),
|
||||
node_id: z.string().nullish(),
|
||||
node_job: zWorkflowNodeJobConfig,
|
||||
save_options: z.array(zComposerSaveStrategy),
|
||||
soul_lock: zAgentComposerSoulLockResponse,
|
||||
validation: zComposerValidationFindingsResponse.nullish(),
|
||||
variant: z.literal('workflow'),
|
||||
workflow_id: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* ComposerSavePayload
|
||||
*/
|
||||
export const zComposerSavePayload = z.object({
|
||||
agent_soul: zAgentSoulConfig.nullish(),
|
||||
binding: zComposerBindingPayload.nullish(),
|
||||
client_revision_id: z.string().nullish(),
|
||||
idempotency_key: z.string().nullish(),
|
||||
new_agent_name: z.string().min(1).max(255).nullish(),
|
||||
node_job: zWorkflowNodeJobConfig.nullish(),
|
||||
save_strategy: zComposerSaveStrategy,
|
||||
soul_lock: zComposerSoulLockPayload.optional(),
|
||||
variant: zComposerVariant,
|
||||
version_note: z.string().nullish(),
|
||||
})
|
||||
|
||||
/**
|
||||
* GeneratedAppResponse
|
||||
*/
|
||||
|
||||
Loading…
Reference in New Issue
Block a user