mirror of
https://github.com/langgenius/dify.git
synced 2026-06-18 07:41:09 +08:00
211 lines
7.6 KiB
Python
211 lines
7.6 KiB
Python
"""DTOs for the inner knowledge retrieval API.
|
|
|
|
These models define the stable HTTP contract for trusted internal callers and
|
|
the response shape returned by the workflow knowledge retrieval stack.
|
|
|
|
Key cross-field invariants live here because callers cannot infer them from
|
|
scalar field types alone: ``dataset_ids`` must be non-empty, either ``query``
|
|
or ``attachment_ids`` is required, ``single`` retrieval requires both ``query``
|
|
and ``retrieval.model``, ``automatic`` metadata filtering requires
|
|
``model_config``, and ``manual`` metadata filtering requires conditions. The
|
|
response reuses workflow ``Source`` items plus serialized ``llm_usage``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Literal
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
|
|
|
from core.rag.data_post_processor.data_post_processor import WeightsDict
|
|
from core.rag.entities.metadata_entities import SupportedComparisonOperator
|
|
from core.workflow.nodes.knowledge_retrieval.retrieval import Source
|
|
from fields.base import ResponseModel
|
|
|
|
type JsonScalar = str | int | float | bool | None
|
|
type JsonValue = JsonScalar | list[JsonScalar] | dict[str, JsonScalar]
|
|
type MetadataValue = str | list[str] | int | float | None
|
|
|
|
|
|
class InnerKnowledgeRetrieveCaller(BaseModel):
|
|
"""Execution context provided by the trusted internal caller."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
tenant_id: str = Field(min_length=1)
|
|
user_id: str = Field(min_length=1)
|
|
app_id: str = Field(min_length=1)
|
|
user_from: Literal["account", "end-user"]
|
|
invoke_from: str = Field(min_length=1)
|
|
|
|
|
|
class InnerKnowledgeRetrieveModelConfig(BaseModel):
|
|
"""Model configuration used by single-retrieval or metadata filtering."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
provider: str = Field(min_length=1)
|
|
name: str = Field(min_length=1)
|
|
mode: str = Field(min_length=1)
|
|
completion_params: dict[str, JsonValue] = Field(default_factory=dict)
|
|
|
|
|
|
class InnerKnowledgeRetrieveRerankingModelConfig(BaseModel):
|
|
"""Reranking model configuration for multiple retrieval mode."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
provider: str = Field(min_length=1)
|
|
model: str = Field(min_length=1)
|
|
|
|
|
|
class InnerKnowledgeRetrieveRetrievalConfig(BaseModel):
|
|
"""Retrieval strategy and its mode-specific configuration."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
mode: Literal["multiple", "single"]
|
|
top_k: int | None = Field(default=None, ge=1)
|
|
score_threshold: float = 0.0
|
|
reranking_mode: str = "reranking_model"
|
|
reranking_enable: bool = True
|
|
reranking_model: InnerKnowledgeRetrieveRerankingModelConfig | None = None
|
|
weights: WeightsDict | None = None
|
|
model: InnerKnowledgeRetrieveModelConfig | None = None
|
|
|
|
@model_validator(mode="after")
|
|
def validate_mode_specific_fields(self) -> InnerKnowledgeRetrieveRetrievalConfig:
|
|
if self.mode == "single" and self.model is None:
|
|
raise ValueError("retrieval.model is required for single mode")
|
|
if self.mode == "multiple" and self.top_k is None:
|
|
raise ValueError("retrieval.top_k is required for multiple mode")
|
|
return self
|
|
|
|
|
|
class InnerKnowledgeRetrieveMetadataCondition(BaseModel):
|
|
"""Single metadata filter condition."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
name: str = Field(min_length=1)
|
|
comparison_operator: SupportedComparisonOperator
|
|
value: MetadataValue = None
|
|
|
|
|
|
class InnerKnowledgeRetrieveMetadataConditions(BaseModel):
|
|
"""Boolean composition for metadata filter conditions."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
logical_operator: Literal["and", "or"] | None = "and"
|
|
conditions: list[InnerKnowledgeRetrieveMetadataCondition] | None = None
|
|
|
|
|
|
class InnerKnowledgeRetrieveMetadataFilteringConfig(BaseModel):
|
|
"""Metadata filtering configuration forwarded to workflow retrieval.
|
|
|
|
``automatic`` mode requires ``model_config`` so downstream metadata model
|
|
planning has the necessary LLM settings. ``manual`` mode requires
|
|
non-empty conditions because workflow retrieval expects explicit filters
|
|
instead of a bare mode switch.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="forbid", populate_by_name=True)
|
|
|
|
mode: Literal["disabled", "automatic", "manual"] = "disabled"
|
|
metadata_model_config: InnerKnowledgeRetrieveModelConfig | None = Field(default=None, alias="model_config")
|
|
conditions: InnerKnowledgeRetrieveMetadataConditions | None = None
|
|
|
|
@model_validator(mode="after")
|
|
def validate_mode_specific_fields(self) -> InnerKnowledgeRetrieveMetadataFilteringConfig:
|
|
if self.mode == "automatic" and self.metadata_model_config is None:
|
|
raise ValueError("metadata_filtering.model_config is required for automatic mode")
|
|
if self.mode == "manual" and (self.conditions is None or not self.conditions.conditions):
|
|
raise ValueError("metadata_filtering.conditions is required for manual mode")
|
|
return self
|
|
|
|
|
|
class InnerKnowledgeRetrieveRequest(BaseModel):
|
|
"""Top-level request payload for the inner knowledge retrieval endpoint.
|
|
|
|
Request validation enforces the endpoint's behavioral contract: callers
|
|
must provide at least one dataset ID, at least one of ``query`` or
|
|
``attachment_ids``, and a text query for ``single`` retrieval mode.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
caller: InnerKnowledgeRetrieveCaller
|
|
dataset_ids: list[str]
|
|
query: str | None = None
|
|
retrieval: InnerKnowledgeRetrieveRetrievalConfig
|
|
metadata_filtering: InnerKnowledgeRetrieveMetadataFilteringConfig = Field(
|
|
default_factory=InnerKnowledgeRetrieveMetadataFilteringConfig
|
|
)
|
|
attachment_ids: list[str] = Field(default_factory=list)
|
|
|
|
@field_validator("dataset_ids", "attachment_ids")
|
|
@classmethod
|
|
def validate_non_empty_items(cls, value: list[str]) -> list[str]:
|
|
if any(not item.strip() for item in value):
|
|
raise ValueError("list items must not be empty")
|
|
return value
|
|
|
|
@field_validator("query")
|
|
@classmethod
|
|
def normalize_query(cls, value: str | None) -> str | None:
|
|
if value is None:
|
|
return None
|
|
normalized = value.strip()
|
|
return normalized or None
|
|
|
|
@model_validator(mode="after")
|
|
def validate_request(self) -> InnerKnowledgeRetrieveRequest:
|
|
if not self.dataset_ids:
|
|
raise ValueError("dataset_ids must contain at least one item")
|
|
if not self.query and not self.attachment_ids:
|
|
raise ValueError("query or attachment_ids is required")
|
|
if self.retrieval.mode == "single" and not self.query:
|
|
raise ValueError("query is required for single mode")
|
|
return self
|
|
|
|
|
|
class InnerKnowledgeRetrieveUsage(ResponseModel):
|
|
"""Serialized LLM usage payload returned by dataset retrieval."""
|
|
|
|
model_config = ConfigDict(
|
|
from_attributes=True,
|
|
extra="forbid",
|
|
populate_by_name=True,
|
|
serialize_by_alias=True,
|
|
protected_namespaces=(),
|
|
)
|
|
|
|
prompt_tokens: int
|
|
completion_tokens: int
|
|
total_tokens: int
|
|
prompt_unit_price: str
|
|
completion_unit_price: str
|
|
prompt_price_unit: str
|
|
completion_price_unit: str
|
|
prompt_price: str
|
|
completion_price: str
|
|
total_price: str
|
|
currency: str | None = None
|
|
latency: float | int
|
|
|
|
|
|
class InnerKnowledgeRetrieveResponse(ResponseModel):
|
|
"""Workflow-style retrieval results plus accumulated usage."""
|
|
|
|
model_config = ConfigDict(
|
|
from_attributes=True,
|
|
extra="forbid",
|
|
populate_by_name=True,
|
|
serialize_by_alias=True,
|
|
protected_namespaces=(),
|
|
)
|
|
|
|
results: list[Source]
|
|
usage: InnerKnowledgeRetrieveUsage
|