mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 23:01:11 +08:00
64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
from uuid import UUID
|
|
|
|
from models.agent_config_entities import AgentSoulConfig
|
|
|
|
|
|
def list_agent_soul_knowledge_dataset_ids(agent_soul: AgentSoulConfig) -> list[str]:
|
|
"""Return normalized unique knowledge dataset ids in config order.
|
|
|
|
Agent v2 knowledge dataset selection is owned by ``knowledge.sets``. This
|
|
helper keeps composer, workflow validation, candidates, and runtime
|
|
diagnostics aligned on the same normalization rules: strip whitespace, drop
|
|
blanks, preserve first-seen order, and deduplicate.
|
|
"""
|
|
dataset_ids: list[str] = []
|
|
seen: set[str] = set()
|
|
for knowledge_set in agent_soul.knowledge.sets:
|
|
for dataset in knowledge_set.datasets:
|
|
dataset_id = (dataset.id or "").strip()
|
|
if not dataset_id or dataset_id in seen:
|
|
continue
|
|
seen.add(dataset_id)
|
|
dataset_ids.append(dataset_id)
|
|
return dataset_ids
|
|
|
|
|
|
def get_tenant_knowledge_dataset_rows(*, tenant_id: str, dataset_ids: list[str]) -> dict[str, Any]:
|
|
"""Return tenant-scoped dataset rows for normalized knowledge dataset ids.
|
|
|
|
Knowledge ids come from user-editable config. Malformed ids can never match
|
|
a dataset row, so they are treated as missing instead of breaking the
|
|
UUID-typed dataset lookup.
|
|
"""
|
|
from services.dataset_service import DatasetService
|
|
|
|
valid_ids: list[str] = []
|
|
for dataset_id in dataset_ids:
|
|
try:
|
|
UUID(dataset_id)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
valid_ids.append(dataset_id)
|
|
|
|
if not valid_ids:
|
|
return {}
|
|
|
|
rows, _ = DatasetService.get_datasets_by_ids(valid_ids, tenant_id)
|
|
return {str(row.id): row for row in rows}
|
|
|
|
|
|
def list_missing_tenant_knowledge_dataset_ids(*, tenant_id: str, agent_soul: AgentSoulConfig | None) -> list[str]:
|
|
"""Return normalized knowledge dataset ids missing from the tenant scope."""
|
|
if agent_soul is None:
|
|
return []
|
|
|
|
dataset_ids = list_agent_soul_knowledge_dataset_ids(agent_soul)
|
|
if not dataset_ids:
|
|
return []
|
|
|
|
rows = get_tenant_knowledge_dataset_rows(tenant_id=tenant_id, dataset_ids=dataset_ids)
|
|
return [dataset_id for dataset_id in dataset_ids if dataset_id not in rows]
|