From 8c4ea5c8980f73bf64ee94f5d29719bd00d9b137 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Mon, 13 Apr 2026 09:47:57 +0800 Subject: [PATCH] fix: external dataset tenant checks for bound knowledge APIs (#34734) --- api/models/dataset.py | 8 +++- api/services/dataset_service.py | 2 + api/services/external_knowledge_service.py | 5 ++- .../test_dataset_service_update_dataset.py | 45 +++++++++++++++++-- .../unit_tests/models/test_dataset_models.py | 21 ++++++++- .../services/test_dataset_service_dataset.py | 29 ++++++++++++ .../services/test_external_dataset_service.py | 11 +++++ 7 files changed, 114 insertions(+), 7 deletions(-) diff --git a/api/models/dataset.py b/api/models/dataset.py index f6aa81f35d..a8ed821c3a 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -353,13 +353,17 @@ class Dataset(Base): if self.provider != "external": return None external_knowledge_binding = db.session.scalar( - select(ExternalKnowledgeBindings).where(ExternalKnowledgeBindings.dataset_id == self.id) + select(ExternalKnowledgeBindings).where( + ExternalKnowledgeBindings.dataset_id == self.id, + ExternalKnowledgeBindings.tenant_id == self.tenant_id, + ) ) if not external_knowledge_binding: return None external_knowledge_api = db.session.scalar( select(ExternalKnowledgeApis).where( - ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id + ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id, + ExternalKnowledgeApis.tenant_id == self.tenant_id, ) ) if external_knowledge_api is None or external_knowledge_api.settings is None: diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index b2920c1006..e07e01ad42 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -528,6 +528,8 @@ class DatasetService: raise ValueError("External knowledge id is required.") if not external_knowledge_api_id: raise ValueError("External knowledge api id is required.") + # Ensure the referenced external API template exists and belongs to the dataset tenant. + ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id, dataset.tenant_id) # Update metadata fields dataset.updated_by = user.id if user else None dataset.updated_at = naive_utc_now() diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index ce5dee4943..96db644d44 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -317,7 +317,10 @@ class ExternalDatasetService: external_knowledge_api = db.session.scalar( select(ExternalKnowledgeApis) - .where(ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id) + .where( + ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id, + ExternalKnowledgeApis.tenant_id == tenant_id, + ) .limit(1) ) if external_knowledge_api is None or external_knowledge_api.settings is None: diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_service_update_dataset.py b/api/tests/test_containers_integration_tests/services/test_dataset_service_update_dataset.py index a814466e14..2a2d86a8a6 100644 --- a/api/tests/test_containers_integration_tests/services/test_dataset_service_update_dataset.py +++ b/api/tests/test_containers_integration_tests/services/test_dataset_service_update_dataset.py @@ -1,3 +1,4 @@ +import json from unittest.mock import Mock, patch from uuid import uuid4 @@ -7,7 +8,7 @@ from sqlalchemy.orm import Session from core.rag.index_processor.constant.index_type import IndexTechniqueType from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole -from models.dataset import Dataset, ExternalKnowledgeBindings +from models.dataset import Dataset, ExternalKnowledgeApis, ExternalKnowledgeBindings from models.enums import DataSourceType from services.dataset_service import DatasetService from services.errors.account import NoPermissionError @@ -103,6 +104,34 @@ class DatasetUpdateTestDataFactory: db_session_with_containers.commit() return binding + @staticmethod + def create_external_knowledge_api( + db_session_with_containers: Session, + tenant_id: str, + created_by: str, + api_id: str | None = None, + name: str = "test-api", + ) -> ExternalKnowledgeApis: + """Create a real external knowledge API template for tenant-scoped update validation.""" + external_api = ExternalKnowledgeApis( + tenant_id=tenant_id, + created_by=created_by, + updated_by=created_by, + name=name, + description="test description", + settings=json.dumps( + { + "endpoint": "https://example.com", + "api_key": "test-api-key", + } + ), + ) + if api_id is not None: + external_api.id = api_id + db_session_with_containers.add(external_api) + db_session_with_containers.commit() + return external_api + class TestDatasetServiceUpdateDataset: """ @@ -138,6 +167,11 @@ class TestDatasetServiceUpdateDataset: ) binding_id = binding.id db_session_with_containers.expunge(binding) + external_api = DatasetUpdateTestDataFactory.create_external_knowledge_api( + db_session_with_containers, + tenant_id=tenant.id, + created_by=user.id, + ) update_data = { "name": "new_name", @@ -145,7 +179,7 @@ class TestDatasetServiceUpdateDataset: "external_retrieval_model": "new_model", "permission": "only_me", "external_knowledge_id": "new_knowledge_id", - "external_knowledge_api_id": str(uuid4()), + "external_knowledge_api_id": external_api.id, } result = DatasetService.update_dataset(dataset.id, update_data, user) @@ -218,11 +252,16 @@ class TestDatasetServiceUpdateDataset: created_by=user.id, provider="external", ) + external_api = DatasetUpdateTestDataFactory.create_external_knowledge_api( + db_session_with_containers, + tenant_id=tenant.id, + created_by=user.id, + ) update_data = { "name": "new_name", "external_knowledge_id": "knowledge_id", - "external_knowledge_api_id": str(uuid4()), + "external_knowledge_api_id": external_api.id, } with pytest.raises(ValueError) as context: diff --git a/api/tests/unit_tests/models/test_dataset_models.py b/api/tests/unit_tests/models/test_dataset_models.py index 6c8a91129b..51d95c4239 100644 --- a/api/tests/unit_tests/models/test_dataset_models.py +++ b/api/tests/unit_tests/models/test_dataset_models.py @@ -12,7 +12,7 @@ This test suite covers: import json import pickle from datetime import UTC, datetime -from unittest.mock import patch +from unittest.mock import Mock, patch from uuid import uuid4 from core.rag.index_processor.constant.index_type import IndexTechniqueType @@ -25,6 +25,7 @@ from models.dataset import ( Document, DocumentSegment, Embedding, + ExternalKnowledgeBindings, ) from models.enums import ( DataSourceType, @@ -180,6 +181,24 @@ class TestDatasetModelValidation: assert result["top_k"] == 2 assert result["score_threshold"] == 0.0 + def test_dataset_external_knowledge_info_returns_none_for_cross_tenant_template(self): + """Test external datasets fail closed when the bound template is outside the tenant.""" + dataset = Dataset( + tenant_id=str(uuid4()), + name="External Dataset", + data_source_type=DataSourceType.UPLOAD_FILE, + created_by=str(uuid4()), + provider="external", + ) + binding = Mock(spec=ExternalKnowledgeBindings) + binding.external_knowledge_id = "knowledge-1" + binding.external_knowledge_api_id = str(uuid4()) + + with patch("models.dataset.db") as mock_db: + mock_db.session.scalar.side_effect = [binding, None] + + assert dataset.external_knowledge_info is None + def test_dataset_retrieval_model_dict_property(self): """Test retrieval_model_dict property with default values.""" # Arrange diff --git a/api/tests/unit_tests/services/test_dataset_service_dataset.py b/api/tests/unit_tests/services/test_dataset_service_dataset.py index c65ce24b3c..2913ae20fe 100644 --- a/api/tests/unit_tests/services/test_dataset_service_dataset.py +++ b/api/tests/unit_tests/services/test_dataset_service_dataset.py @@ -532,6 +532,9 @@ class TestDatasetServiceCreationAndUpdate: with ( patch.object(DatasetService, "_update_external_knowledge_binding") as update_binding, + patch( + "services.dataset_service.ExternalDatasetService.get_external_knowledge_api", return_value=object() + ) as get_external_knowledge_api, patch("services.dataset_service.naive_utc_now", return_value=now), patch("services.dataset_service.db") as mock_db, ): @@ -557,6 +560,7 @@ class TestDatasetServiceCreationAndUpdate: assert dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM assert dataset.updated_by == "user-1" assert dataset.updated_at is now + get_external_knowledge_api.assert_called_once_with("api-1", dataset.tenant_id) update_binding.assert_called_once_with("dataset-1", "knowledge-1", "api-1") mock_db.session.add.assert_called_once_with(dataset) mock_db.session.commit.assert_called_once() @@ -574,6 +578,31 @@ class TestDatasetServiceCreationAndUpdate: with pytest.raises(ValueError, match=message): DatasetService._update_external_dataset(dataset, payload, SimpleNamespace(id="user-1")) + def test_update_external_dataset_rejects_cross_tenant_external_api_id(self): + dataset = DatasetServiceUnitDataFactory.create_dataset_mock(dataset_id="dataset-1") + + with ( + patch( + "services.dataset_service.ExternalDatasetService.get_external_knowledge_api", + side_effect=ValueError("api template not found"), + ) as get_external_knowledge_api, + patch.object(DatasetService, "_update_external_knowledge_binding") as update_binding, + patch("services.dataset_service.db") as mock_db, + ): + with pytest.raises(ValueError, match="api template not found"): + DatasetService._update_external_dataset( + dataset, + { + "external_knowledge_id": "knowledge-1", + "external_knowledge_api_id": "foreign-api", + }, + SimpleNamespace(id="user-1"), + ) + + get_external_knowledge_api.assert_called_once_with("foreign-api", dataset.tenant_id) + update_binding.assert_not_called() + mock_db.session.commit.assert_not_called() + def test_update_external_knowledge_binding_updates_changed_binding_values(self): binding = SimpleNamespace(external_knowledge_id="old-knowledge", external_knowledge_api_id="old-api") session = MagicMock() diff --git a/api/tests/unit_tests/services/test_external_dataset_service.py b/api/tests/unit_tests/services/test_external_dataset_service.py index 0777e6a8a4..b802f6931f 100644 --- a/api/tests/unit_tests/services/test_external_dataset_service.py +++ b/api/tests/unit_tests/services/test_external_dataset_service.py @@ -1560,6 +1560,17 @@ class TestExternalDatasetServiceFetchRetrieval: with pytest.raises(ValueError, match="external knowledge binding not found"): ExternalDatasetService.fetch_external_knowledge_retrieval("tenant-123", "dataset-123", "query", {}) + @patch("services.external_knowledge_service.db") + def test_fetch_external_knowledge_retrieval_cross_tenant_api_template_error(self, mock_db, factory): + """Test error when a binding points to an API template outside the dataset tenant.""" + # Arrange + binding = factory.create_external_knowledge_binding_mock() + mock_db.session.scalar.side_effect = [binding, None] + + # Act & Assert + with pytest.raises(ValueError, match="external api template not found"): + ExternalDatasetService.fetch_external_knowledge_retrieval("tenant-123", "dataset-123", "query", {}) + @patch("services.external_knowledge_service.ExternalDatasetService.process_external_api") @patch("services.external_knowledge_service.db") def test_fetch_external_knowledge_retrieval_empty_results(self, mock_db, mock_process, factory):