mirror of
https://github.com/langgenius/dify.git
synced 2026-06-18 07:41:09 +08:00
146 lines
7.1 KiB
Python
146 lines
7.1 KiB
Python
"""Service wrapper for the inner knowledge retrieval API.
|
|
|
|
This service keeps the internal HTTP contract small while reusing the workflow
|
|
retrieval stack in ``core.rag.retrieval.dataset_retrieval.DatasetRetrieval``.
|
|
The only authorization enforced here is tenant ownership of the caller app and
|
|
requested datasets.
|
|
|
|
It intentionally does not check ``dataset.enable_api`` or user-level dataset
|
|
permissions. After the caller app and requested datasets pass tenant-scoped
|
|
prechecks, dataset availability and "no usable document" cases are delegated to
|
|
``DatasetRetrieval`` and may legitimately produce an empty result list instead
|
|
of a separate validation error.
|
|
"""
|
|
|
|
from sqlalchemy import select
|
|
|
|
from core.rag.entities.metadata_entities import Condition, MetadataFilteringCondition
|
|
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
|
from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest
|
|
from extensions.ext_database import db
|
|
from graphon.model_runtime.utils.encoders import jsonable_encoder
|
|
from graphon.nodes.llm.entities import ModelConfig
|
|
from models.dataset import Dataset
|
|
from models.model import App
|
|
from services.entities.knowledge_retrieval_inner import (
|
|
InnerKnowledgeRetrieveRequest,
|
|
InnerKnowledgeRetrieveResponse,
|
|
InnerKnowledgeRetrieveUsage,
|
|
)
|
|
from services.errors.knowledge_retrieval import (
|
|
InnerKnowledgeRetrieveAppNotFoundError,
|
|
InnerKnowledgeRetrieveAppTenantMismatchError,
|
|
InnerKnowledgeRetrieveDatasetNotFoundError,
|
|
InnerKnowledgeRetrieveDatasetTenantMismatchError,
|
|
)
|
|
|
|
|
|
class InnerKnowledgeRetrievalService:
|
|
"""Validate inner caller scope and delegate to workflow dataset retrieval."""
|
|
|
|
def retrieve(self, request: InnerKnowledgeRetrieveRequest) -> InnerKnowledgeRetrieveResponse:
|
|
"""Run tenant-scoped retrieval for a trusted internal caller.
|
|
|
|
This method only rejects caller app existence/tenant mismatches and
|
|
requested dataset existence/tenant mismatches. It deliberately leaves
|
|
``dataset.enable_api``, user-level dataset permissions, and
|
|
availability/no-usable-document handling to ``DatasetRetrieval`` so the
|
|
inner API stays aligned with workflow retrieval semantics, including
|
|
returning ``[]`` when datasets are present but yield no retrievable
|
|
content.
|
|
|
|
Raises:
|
|
InnerKnowledgeRetrieveAppNotFoundError: The caller app does not exist.
|
|
InnerKnowledgeRetrieveAppTenantMismatchError: The caller app is outside the caller tenant.
|
|
InnerKnowledgeRetrieveDatasetNotFoundError: At least one requested dataset does not exist.
|
|
InnerKnowledgeRetrieveDatasetTenantMismatchError:
|
|
At least one requested dataset is outside the caller tenant.
|
|
"""
|
|
self._validate_caller_app(tenant_id=request.caller.tenant_id, app_id=request.caller.app_id)
|
|
self._validate_datasets(tenant_id=request.caller.tenant_id, dataset_ids=request.dataset_ids)
|
|
|
|
rag = DatasetRetrieval()
|
|
results = rag.knowledge_retrieval(request=self._to_rag_request(request))
|
|
return InnerKnowledgeRetrieveResponse(
|
|
results=results,
|
|
usage=InnerKnowledgeRetrieveUsage.model_validate(jsonable_encoder(rag.llm_usage)),
|
|
)
|
|
|
|
def _validate_caller_app(self, *, tenant_id: str, app_id: str) -> None:
|
|
app = db.session.scalar(select(App).where(App.id == app_id).limit(1))
|
|
if app is None:
|
|
raise InnerKnowledgeRetrieveAppNotFoundError(f"App '{app_id}' not found")
|
|
if app.tenant_id != tenant_id:
|
|
raise InnerKnowledgeRetrieveAppTenantMismatchError(
|
|
f"App '{app_id}' does not belong to tenant '{tenant_id}'"
|
|
)
|
|
|
|
def _validate_datasets(self, *, tenant_id: str, dataset_ids: list[str]) -> None:
|
|
datasets = db.session.scalars(select(Dataset).where(Dataset.id.in_(dataset_ids))).all()
|
|
|
|
found_ids = {dataset.id for dataset in datasets}
|
|
missing_ids = sorted(set(dataset_ids) - found_ids)
|
|
if missing_ids:
|
|
raise InnerKnowledgeRetrieveDatasetNotFoundError(f"Datasets not found: {', '.join(missing_ids)}")
|
|
|
|
mismatched_ids = sorted(dataset.id for dataset in datasets if dataset.tenant_id != tenant_id)
|
|
if mismatched_ids:
|
|
raise InnerKnowledgeRetrieveDatasetTenantMismatchError(
|
|
f"Datasets do not belong to tenant '{tenant_id}': {', '.join(mismatched_ids)}"
|
|
)
|
|
|
|
def _to_rag_request(self, request: InnerKnowledgeRetrieveRequest) -> KnowledgeRetrievalRequest:
|
|
metadata_model_config = request.metadata_filtering.metadata_model_config
|
|
metadata_conditions = request.metadata_filtering.conditions
|
|
|
|
return KnowledgeRetrievalRequest(
|
|
tenant_id=request.caller.tenant_id,
|
|
user_id=request.caller.user_id,
|
|
app_id=request.caller.app_id,
|
|
user_from=request.caller.user_from,
|
|
dataset_ids=request.dataset_ids,
|
|
query=request.query,
|
|
retrieval_mode=request.retrieval.mode,
|
|
model_provider=request.retrieval.model.provider if request.retrieval.model else None,
|
|
completion_params=request.retrieval.model.completion_params if request.retrieval.model else None,
|
|
model_mode=request.retrieval.model.mode if request.retrieval.model else None,
|
|
model_name=request.retrieval.model.name if request.retrieval.model else None,
|
|
metadata_model_config=ModelConfig.model_validate(metadata_model_config.model_dump(mode="python"))
|
|
if metadata_model_config
|
|
else None,
|
|
metadata_filtering_conditions=(
|
|
MetadataFilteringCondition(
|
|
logical_operator=metadata_conditions.logical_operator,
|
|
conditions=(
|
|
[
|
|
Condition(
|
|
name=condition.name,
|
|
comparison_operator=condition.comparison_operator,
|
|
value=condition.value,
|
|
)
|
|
for condition in metadata_conditions.conditions
|
|
]
|
|
if metadata_conditions.conditions is not None
|
|
else None
|
|
),
|
|
)
|
|
if metadata_conditions is not None
|
|
else None
|
|
),
|
|
metadata_filtering_mode=request.metadata_filtering.mode,
|
|
top_k=request.retrieval.top_k or 0,
|
|
score_threshold=request.retrieval.score_threshold,
|
|
reranking_mode=request.retrieval.reranking_mode,
|
|
reranking_model=(
|
|
{
|
|
"reranking_provider_name": request.retrieval.reranking_model.provider,
|
|
"reranking_model_name": request.retrieval.reranking_model.model,
|
|
}
|
|
if request.retrieval.reranking_model is not None
|
|
else None
|
|
),
|
|
weights=request.retrieval.weights,
|
|
reranking_enable=request.retrieval.reranking_enable,
|
|
attachment_ids=request.attachment_ids or None,
|
|
)
|