From 693080aa12e4c53d90931442dff192f605a6ead3 Mon Sep 17 00:00:00 2001 From: James <63717587+jamesrayammons@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:52:31 +0200 Subject: [PATCH] test: migrate dataset service dataset mock tests to testcontainers (#35194) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../test_dataset_service_permissions.py | 613 ++++++++++++++++++ .../services/test_dataset_service_dataset.py | 499 -------------- 2 files changed, 613 insertions(+), 499 deletions(-) create mode 100644 api/tests/test_containers_integration_tests/services/test_dataset_service_permissions.py diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_service_permissions.py b/api/tests/test_containers_integration_tests/services/test_dataset_service_permissions.py new file mode 100644 index 0000000000..1b4179c9c7 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_dataset_service_permissions.py @@ -0,0 +1,613 @@ +"""Testcontainers integration tests for DatasetService permission and lifecycle SQL paths.""" + +from datetime import datetime +from types import SimpleNamespace +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from sqlalchemy.orm import Session +from werkzeug.exceptions import NotFound + +from core.rag.index_processor.constant.index_type import IndexTechniqueType +from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.dataset import ( + AppDatasetJoin, + Dataset, + DatasetAutoDisableLog, + DatasetCollectionBinding, + DatasetPermission, + DatasetPermissionEnum, +) +from models.enums import DataSourceType +from services.dataset_service import DatasetCollectionBindingService, DatasetPermissionService, DatasetService +from services.errors.account import NoPermissionError + + +class DatasetPermissionIntegrationFactory: + @staticmethod + def create_account_with_tenant( + db_session_with_containers: Session, + role: TenantAccountRole = TenantAccountRole.OWNER, + ) -> tuple[Account, Tenant]: + account = Account( + email=f"{uuid4()}@example.com", + name=f"user-{uuid4()}", + interface_language="en-US", + status="active", + ) + tenant = Tenant(name=f"tenant-{uuid4()}", status="normal") + db_session_with_containers.add_all([account, tenant]) + db_session_with_containers.flush() + + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=role, + current=True, + ) + db_session_with_containers.add(join) + db_session_with_containers.commit() + + account.role = role + account._current_tenant = tenant + return account, tenant + + @staticmethod + def create_account_in_tenant( + db_session_with_containers: Session, + tenant: Tenant, + role: TenantAccountRole = TenantAccountRole.EDITOR, + ) -> Account: + account = Account( + email=f"{uuid4()}@example.com", + name=f"user-{uuid4()}", + interface_language="en-US", + status="active", + ) + db_session_with_containers.add(account) + db_session_with_containers.flush() + + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=role, + current=True, + ) + db_session_with_containers.add(join) + db_session_with_containers.commit() + + account.role = role + account._current_tenant = tenant + return account + + @staticmethod + def create_dataset( + db_session_with_containers: Session, + *, + tenant_id: str, + created_by: str, + name: str | None = None, + permission: DatasetPermissionEnum = DatasetPermissionEnum.ONLY_ME, + indexing_technique: str | None = IndexTechniqueType.HIGH_QUALITY, + enable_api: bool = True, + ) -> Dataset: + dataset = Dataset( + tenant_id=tenant_id, + name=name or f"dataset-{uuid4()}", + description="desc", + data_source_type=DataSourceType.UPLOAD_FILE, + indexing_technique=indexing_technique, + created_by=created_by, + provider="vendor", + permission=permission, + retrieval_model={"top_k": 2}, + ) + dataset.enable_api = enable_api + db_session_with_containers.add(dataset) + db_session_with_containers.commit() + return dataset + + @staticmethod + def create_dataset_permission( + db_session_with_containers: Session, + *, + dataset_id: str, + tenant_id: str, + account_id: str, + ) -> DatasetPermission: + permission = DatasetPermission( + dataset_id=dataset_id, + tenant_id=tenant_id, + account_id=account_id, + has_permission=True, + ) + db_session_with_containers.add(permission) + db_session_with_containers.commit() + return permission + + @staticmethod + def create_app_dataset_join( + db_session_with_containers: Session, + *, + dataset_id: str, + ) -> AppDatasetJoin: + join = AppDatasetJoin( + app_id=str(uuid4()), + dataset_id=dataset_id, + ) + db_session_with_containers.add(join) + db_session_with_containers.commit() + return join + + @staticmethod + def create_collection_binding( + db_session_with_containers: Session, + *, + provider_name: str, + model_name: str, + collection_type: str = "dataset", + ) -> DatasetCollectionBinding: + binding = DatasetCollectionBinding( + provider_name=provider_name, + model_name=model_name, + collection_name=f"collection_{uuid4().hex}", + type=collection_type, + ) + db_session_with_containers.add(binding) + db_session_with_containers.commit() + return binding + + @staticmethod + def create_auto_disable_log( + db_session_with_containers: Session, + *, + tenant_id: str, + dataset_id: str, + document_id: str, + ) -> DatasetAutoDisableLog: + log = DatasetAutoDisableLog( + tenant_id=tenant_id, + dataset_id=dataset_id, + document_id=document_id, + ) + db_session_with_containers.add(log) + db_session_with_containers.commit() + return log + + +class TestDatasetServicePermissionsAndLifecycle: + def test_delete_dataset_returns_false_when_dataset_is_missing(self, db_session_with_containers: Session): + owner, _tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + + result = DatasetService.delete_dataset(str(uuid4()), user=owner) + + assert result is False + + def test_delete_dataset_checks_permission_and_deletes_dataset(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + ) + + with patch("services.dataset_service.dataset_was_deleted.send") as send_deleted_signal: + result = DatasetService.delete_dataset(dataset.id, user=owner) + + assert result is True + assert db_session_with_containers.get(Dataset, dataset.id) is None + send_deleted_signal.assert_called_once_with(dataset) + + def test_dataset_use_check_returns_true_when_join_exists(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + ) + DatasetPermissionIntegrationFactory.create_app_dataset_join( + db_session_with_containers, + dataset_id=dataset.id, + ) + + assert DatasetService.dataset_use_check(dataset.id) is True + + def test_dataset_use_check_returns_false_when_join_missing(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + ) + + assert DatasetService.dataset_use_check(dataset.id) is False + + def test_check_dataset_permission_rejects_cross_tenant_access(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + outsider, _other_tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant( + db_session_with_containers + ) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + ) + + with pytest.raises(NoPermissionError, match="do not have permission"): + DatasetService.check_dataset_permission(dataset, outsider) + + def test_check_dataset_permission_rejects_only_me_dataset_for_non_creator( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.ONLY_ME, + ) + + with pytest.raises(NoPermissionError, match="do not have permission"): + DatasetService.check_dataset_permission(dataset, member) + + def test_check_dataset_permission_rejects_partial_team_user_without_binding( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + + with pytest.raises(NoPermissionError, match="do not have permission"): + DatasetService.check_dataset_permission(dataset, member) + + def test_check_dataset_permission_allows_partial_team_creator(self, db_session_with_containers: Session): + creator, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant( + db_session_with_containers, + role=TenantAccountRole.EDITOR, + ) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=creator.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + + DatasetService.check_dataset_permission(dataset, creator) + + def test_check_dataset_permission_allows_partial_team_member_with_binding( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + DatasetPermissionIntegrationFactory.create_dataset_permission( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=tenant.id, + account_id=member.id, + ) + + DatasetService.check_dataset_permission(dataset, member) + + def test_check_dataset_operator_permission_rejects_only_me_for_non_creator( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + operator = DatasetPermissionIntegrationFactory.create_account_in_tenant( + db_session_with_containers, + tenant, + role=TenantAccountRole.EDITOR, + ) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.ONLY_ME, + ) + + with pytest.raises(NoPermissionError, match="do not have permission"): + DatasetService.check_dataset_operator_permission(user=operator, dataset=dataset) + + def test_check_dataset_operator_permission_rejects_partial_team_without_binding( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + operator = DatasetPermissionIntegrationFactory.create_account_in_tenant( + db_session_with_containers, + tenant, + role=TenantAccountRole.EDITOR, + ) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + + with pytest.raises(NoPermissionError, match="do not have permission"): + DatasetService.check_dataset_operator_permission(user=operator, dataset=dataset) + + def test_check_dataset_operator_permission_allows_partial_team_with_binding( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + operator = DatasetPermissionIntegrationFactory.create_account_in_tenant( + db_session_with_containers, + tenant, + role=TenantAccountRole.EDITOR, + ) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + DatasetPermissionIntegrationFactory.create_dataset_permission( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=tenant.id, + account_id=operator.id, + ) + + DatasetService.check_dataset_operator_permission(user=operator, dataset=dataset) + + def test_update_dataset_api_status_raises_not_found_for_missing_dataset(self, flask_app_with_containers): + with flask_app_with_containers.app_context(): + with pytest.raises(NotFound, match="Dataset not found"): + DatasetService.update_dataset_api_status(str(uuid4()), True) + + def test_update_dataset_api_status_requires_current_user_id(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + enable_api=False, + ) + + with patch("services.dataset_service.current_user", SimpleNamespace(id=None)): + with pytest.raises(ValueError, match="Current user or current user id not found"): + DatasetService.update_dataset_api_status(dataset.id, True) + + def test_update_dataset_api_status_updates_fields_and_commits(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + enable_api=False, + ) + now = datetime(2026, 4, 14, 18, 0, 0) + + with ( + patch("services.dataset_service.current_user", owner), + patch("services.dataset_service.naive_utc_now", return_value=now), + ): + DatasetService.update_dataset_api_status(dataset.id, True) + + db_session_with_containers.refresh(dataset) + assert dataset.enable_api is True + assert dataset.updated_by == owner.id + assert dataset.updated_at == now + + def test_get_dataset_auto_disable_logs_returns_empty_when_billing_is_disabled( + self, db_session_with_containers: Session + ): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + features = SimpleNamespace( + billing=SimpleNamespace(enabled=False, subscription=SimpleNamespace(plan="professional")) + ) + + with ( + patch("services.dataset_service.current_user", owner), + patch("services.dataset_service.FeatureService.get_features", return_value=features), + ): + result = DatasetService.get_dataset_auto_disable_logs(str(uuid4())) + + assert result == {"document_ids": [], "count": 0} + + def test_get_dataset_auto_disable_logs_returns_recent_document_ids(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + ) + DatasetPermissionIntegrationFactory.create_auto_disable_log( + db_session_with_containers, + tenant_id=tenant.id, + dataset_id=dataset.id, + document_id=str(uuid4()), + ) + DatasetPermissionIntegrationFactory.create_auto_disable_log( + db_session_with_containers, + tenant_id=tenant.id, + dataset_id=dataset.id, + document_id=str(uuid4()), + ) + features = SimpleNamespace( + billing=SimpleNamespace(enabled=True, subscription=SimpleNamespace(plan="professional")) + ) + + with ( + patch("services.dataset_service.current_user", owner), + patch("services.dataset_service.FeatureService.get_features", return_value=features), + ): + result = DatasetService.get_dataset_auto_disable_logs(dataset.id) + + assert result["count"] == 2 + assert len(result["document_ids"]) == 2 + + +class TestDatasetCollectionBindingServiceIntegration: + def test_get_dataset_collection_binding_returns_existing_binding(self, db_session_with_containers: Session): + binding = DatasetPermissionIntegrationFactory.create_collection_binding( + db_session_with_containers, + provider_name="provider", + model_name="model", + ) + + result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "model") + + assert result.id == binding.id + + def test_get_dataset_collection_binding_creates_binding_when_missing(self, db_session_with_containers: Session): + result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "missing-model") + + persisted = db_session_with_containers.get(DatasetCollectionBinding, result.id) + assert persisted is not None + assert persisted.provider_name == "provider" + assert persisted.model_name == "missing-model" + assert persisted.type == "dataset" + assert persisted.collection_name + + def test_get_dataset_collection_binding_by_id_and_type_raises_when_missing(self, flask_app_with_containers): + with flask_app_with_containers.app_context(): + with pytest.raises(ValueError, match="Dataset collection binding not found"): + DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(str(uuid4())) + + def test_get_dataset_collection_binding_by_id_and_type_returns_binding(self, db_session_with_containers: Session): + binding = DatasetPermissionIntegrationFactory.create_collection_binding( + db_session_with_containers, + provider_name="provider", + model_name="model", + ) + + result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(binding.id) + + assert result.id == binding.id + + +class TestDatasetPermissionServiceIntegration: + def test_get_dataset_partial_member_list_returns_scalar_results(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + member_a = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + member_b = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + DatasetPermissionIntegrationFactory.create_dataset_permission( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=tenant.id, + account_id=member_a.id, + ) + DatasetPermissionIntegrationFactory.create_dataset_permission( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=tenant.id, + account_id=member_b.id, + ) + + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + + assert set(result) == {member_a.id, member_b.id} + + def test_update_partial_member_list_replaces_permissions_and_commits(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + member_a = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + member_b = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + stale_member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + DatasetPermissionIntegrationFactory.create_dataset_permission( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=tenant.id, + account_id=stale_member.id, + ) + + DatasetPermissionService.update_partial_member_list( + tenant.id, + dataset.id, + [{"user_id": member_a.id}, {"user_id": member_b.id}], + ) + + permissions = db_session_with_containers.query(DatasetPermission).filter_by(dataset_id=dataset.id).all() + assert {permission.account_id for permission in permissions} == {member_a.id, member_b.id} + + def test_check_permission_requires_dataset_editor(self): + user = SimpleNamespace(is_dataset_editor=False, is_dataset_operator=False) + dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.ALL_TEAM) + + with pytest.raises(NoPermissionError, match="does not have permission"): + DatasetPermissionService.check_permission(user, dataset, DatasetPermissionEnum.ALL_TEAM, []) + + def test_check_permission_prevents_dataset_operator_from_changing_permission_mode(self): + user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True) + dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.ALL_TEAM) + + with pytest.raises(NoPermissionError, match="cannot change the dataset permissions"): + DatasetPermissionService.check_permission(user, dataset, DatasetPermissionEnum.ONLY_ME, []) + + def test_check_permission_requires_partial_member_list_for_partial_members_mode(self): + user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True) + dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.PARTIAL_TEAM) + + with pytest.raises(ValueError, match="Partial member list is required"): + DatasetPermissionService.check_permission(user, dataset, DatasetPermissionEnum.PARTIAL_TEAM, []) + + def test_check_permission_rejects_dataset_operator_member_list_changes(self): + user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True) + dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.PARTIAL_TEAM) + + with patch.object(DatasetPermissionService, "get_dataset_partial_member_list", return_value=["user-1"]): + with pytest.raises(ValueError, match="cannot change the dataset permissions"): + DatasetPermissionService.check_permission( + user, + dataset, + DatasetPermissionEnum.PARTIAL_TEAM, + [{"user_id": "user-2"}], + ) + + def test_check_permission_allows_dataset_operator_when_member_list_is_unchanged(self): + user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True) + dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.PARTIAL_TEAM) + + with patch.object(DatasetPermissionService, "get_dataset_partial_member_list", return_value=["user-1"]): + DatasetPermissionService.check_permission( + user, + dataset, + DatasetPermissionEnum.PARTIAL_TEAM, + [{"user_id": "user-1"}], + ) + + def test_clear_partial_member_list_deletes_permissions_and_commits(self, db_session_with_containers: Session): + owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers) + member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant) + dataset = DatasetPermissionIntegrationFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + DatasetPermissionIntegrationFactory.create_dataset_permission( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=tenant.id, + account_id=member.id, + ) + + DatasetPermissionService.clear_partial_member_list(dataset.id) + + remaining = db_session_with_containers.query(DatasetPermission).filter_by(dataset_id=dataset.id).all() + assert remaining == [] diff --git a/api/tests/unit_tests/services/test_dataset_service_dataset.py b/api/tests/unit_tests/services/test_dataset_service_dataset.py index 2913ae20fe..3d08b6fd09 100644 --- a/api/tests/unit_tests/services/test_dataset_service_dataset.py +++ b/api/tests/unit_tests/services/test_dataset_service_dataset.py @@ -1,29 +1,20 @@ """Unit tests for DatasetService and dataset-related collaborators.""" from .dataset_service_test_helpers import ( - CloudPlan, - Dataset, - DatasetCollectionBindingService, DatasetNameDuplicateError, DatasetPermissionEnum, DatasetPermissionService, - DatasetProcessRule, DatasetService, DatasetServiceUnitDataFactory, - DocumentIndexingError, - DocumentService, LLMBadRequestError, MagicMock, - Mock, ModelFeature, ModelType, NoPermissionError, - NotFound, PipelineIconInfo, ProviderTokenNotInitError, RagPipelineDatasetCreateEntity, SimpleNamespace, - TenantAccountRole, _make_knowledge_configuration, _make_retrieval_model, _make_session_context, @@ -33,127 +24,6 @@ from .dataset_service_test_helpers import ( ) -class TestDatasetServiceQueries: - """Unit tests for DatasetService query composition and fallback branches.""" - - @pytest.fixture - def mock_dataset_query_dependencies(self): - with ( - patch("services.dataset_service.db") as mock_db, - patch("services.dataset_service.helper.escape_like_pattern", return_value="escaped-search") as escape_like, - patch("services.dataset_service.TagService.get_target_ids_by_tag_ids") as get_target_ids, - ): - mock_db.paginate.return_value = SimpleNamespace(items=["dataset"], total=1) - yield { - "db": mock_db, - "escape_like_pattern": escape_like, - "get_target_ids": get_target_ids, - } - - def test_get_datasets_returns_paginated_results_for_public_view(self, mock_dataset_query_dependencies): - items, total = DatasetService.get_datasets(page=1, per_page=20, tenant_id="tenant-1") - - assert items == ["dataset"] - assert total == 1 - mock_dataset_query_dependencies["db"].paginate.assert_called_once() - mock_dataset_query_dependencies["escape_like_pattern"].assert_not_called() - - def test_get_datasets_short_circuits_for_dataset_operator_without_permissions( - self, mock_dataset_query_dependencies - ): - user = DatasetServiceUnitDataFactory.create_user_mock(role=TenantAccountRole.DATASET_OPERATOR) - mock_dataset_query_dependencies["db"].session.scalars.return_value.all.return_value = [] - - items, total = DatasetService.get_datasets(page=1, per_page=20, tenant_id="tenant-1", user=user) - - assert items == [] - assert total == 0 - mock_dataset_query_dependencies["db"].paginate.assert_not_called() - - def test_get_datasets_short_circuits_when_tag_lookup_returns_no_target_ids(self, mock_dataset_query_dependencies): - mock_dataset_query_dependencies["get_target_ids"].return_value = [] - - items, total = DatasetService.get_datasets( - page=1, - per_page=20, - tenant_id="tenant-1", - tag_ids=["tag-1"], - ) - - assert items == [] - assert total == 0 - mock_dataset_query_dependencies["get_target_ids"].assert_called_once_with("knowledge", "tenant-1", ["tag-1"]) - mock_dataset_query_dependencies["db"].paginate.assert_not_called() - - def test_get_datasets_search_and_tag_filters_call_collaborators(self, mock_dataset_query_dependencies): - mock_dataset_query_dependencies["get_target_ids"].return_value = ["dataset-1"] - - items, total = DatasetService.get_datasets( - page=2, - per_page=10, - tenant_id="tenant-1", - search="report", - tag_ids=["tag-1"], - ) - - assert items == ["dataset"] - assert total == 1 - mock_dataset_query_dependencies["escape_like_pattern"].assert_called_once_with("report") - mock_dataset_query_dependencies["get_target_ids"].assert_called_once_with("knowledge", "tenant-1", ["tag-1"]) - mock_dataset_query_dependencies["db"].paginate.assert_called_once() - - def test_get_process_rules_returns_latest_rule_when_present(self): - dataset_process_rule = Mock(spec=DatasetProcessRule) - dataset_process_rule.mode = "automatic" - dataset_process_rule.rules_dict = {"delimiter": "\n"} - - with patch("services.dataset_service.db") as mock_db: - (mock_db.session.execute.return_value.scalar_one_or_none.return_value) = dataset_process_rule - - result = DatasetService.get_process_rules("dataset-1") - - assert result == {"mode": "automatic", "rules": {"delimiter": "\n"}} - - def test_get_process_rules_falls_back_to_default_rules_when_missing(self): - with patch("services.dataset_service.db") as mock_db: - (mock_db.session.execute.return_value.scalar_one_or_none.return_value) = None - - result = DatasetService.get_process_rules("dataset-1") - - assert result == { - "mode": DocumentService.DEFAULT_RULES["mode"], - "rules": DocumentService.DEFAULT_RULES["rules"], - } - - def test_get_datasets_by_ids_returns_empty_for_missing_ids(self): - with patch("services.dataset_service.db") as mock_db: - items, total = DatasetService.get_datasets_by_ids([], "tenant-1") - - assert items == [] - assert total == 0 - mock_db.paginate.assert_not_called() - - def test_get_datasets_by_ids_uses_paginate_for_non_empty_input(self): - with patch("services.dataset_service.db") as mock_db: - mock_db.paginate.return_value = SimpleNamespace(items=["dataset-1"], total=1) - - items, total = DatasetService.get_datasets_by_ids(["dataset-1"], "tenant-1") - - assert items == ["dataset-1"] - assert total == 1 - mock_db.paginate.assert_called_once() - - def test_get_dataset_returns_first_match(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock() - - with patch("services.dataset_service.db") as mock_db: - mock_db.session.get.return_value = dataset - - result = DatasetService.get_dataset(dataset.id) - - assert result is dataset - - class TestDatasetServiceValidation: """Unit tests for DatasetService validation helpers.""" @@ -1337,103 +1207,6 @@ class TestDatasetServiceRagPipelineSettings: class TestDatasetServicePermissionsAndLifecycle: """Unit tests for dataset permissions, deletion, and metadata helpers.""" - def test_delete_dataset_returns_false_when_dataset_is_missing(self): - with patch.object(DatasetService, "get_dataset", return_value=None): - result = DatasetService.delete_dataset("dataset-1", user=SimpleNamespace(id="user-1")) - - assert result is False - - def test_delete_dataset_checks_permission_and_deletes_dataset(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock() - - with ( - patch.object(DatasetService, "get_dataset", return_value=dataset), - patch.object(DatasetService, "check_dataset_permission") as check_permission, - patch("services.dataset_service.dataset_was_deleted.send") as send_deleted_signal, - patch("services.dataset_service.db") as mock_db, - ): - result = DatasetService.delete_dataset(dataset.id, user=SimpleNamespace(id="user-1")) - - assert result is True - check_permission.assert_called_once_with(dataset, SimpleNamespace(id="user-1")) - send_deleted_signal.assert_called_once_with(dataset) - mock_db.session.delete.assert_called_once_with(dataset) - mock_db.session.commit.assert_called_once() - - def test_dataset_use_check_returns_scalar_result(self): - with patch("services.dataset_service.db") as mock_db: - mock_db.session.execute.return_value.scalar_one.return_value = True - - result = DatasetService.dataset_use_check("dataset-1") - - assert result is True - - def test_check_dataset_permission_rejects_cross_tenant_access(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock(tenant_id="tenant-a") - user = DatasetServiceUnitDataFactory.create_user_mock(tenant_id="tenant-b") - - with pytest.raises(NoPermissionError, match="do not have permission"): - DatasetService.check_dataset_permission(dataset, user) - - def test_check_dataset_permission_rejects_only_me_dataset_for_non_creator(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock( - permission=DatasetPermissionEnum.ONLY_ME, - created_by="owner-1", - ) - user = DatasetServiceUnitDataFactory.create_user_mock( - user_id="member-1", - role=TenantAccountRole.EDITOR, - ) - - with pytest.raises(NoPermissionError, match="do not have permission"): - DatasetService.check_dataset_permission(dataset, user) - - def test_check_dataset_permission_rejects_partial_team_user_without_binding(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock( - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="owner-1", - ) - user = DatasetServiceUnitDataFactory.create_user_mock( - user_id="member-1", - role=TenantAccountRole.EDITOR, - ) - - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalar.return_value = None - - with pytest.raises(NoPermissionError, match="do not have permission"): - DatasetService.check_dataset_permission(dataset, user) - - def test_check_dataset_permission_allows_partial_team_creator_without_lookup(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock( - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="creator-1", - ) - user = DatasetServiceUnitDataFactory.create_user_mock( - user_id="creator-1", - role=TenantAccountRole.EDITOR, - ) - - with patch("services.dataset_service.db") as mock_db: - DatasetService.check_dataset_permission(dataset, user) - - mock_db.session.scalar.assert_not_called() - - def test_check_dataset_permission_allows_partial_team_member_with_binding(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock( - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="owner-1", - ) - user = DatasetServiceUnitDataFactory.create_user_mock( - user_id="member-1", - role=TenantAccountRole.EDITOR, - ) - - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalar.return_value = object() - - DatasetService.check_dataset_permission(dataset, user) - def test_check_dataset_operator_permission_validates_required_arguments(self): with pytest.raises(ValueError, match="Dataset not found"): DatasetService.check_dataset_operator_permission(user=SimpleNamespace(id="user-1"), dataset=None) @@ -1441,279 +1214,14 @@ class TestDatasetServicePermissionsAndLifecycle: with pytest.raises(ValueError, match="User not found"): DatasetService.check_dataset_operator_permission(user=None, dataset=SimpleNamespace(id="dataset-1")) - def test_check_dataset_operator_permission_rejects_only_me_for_non_creator(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock( - permission=DatasetPermissionEnum.ONLY_ME, - created_by="owner-1", - ) - user = DatasetServiceUnitDataFactory.create_user_mock( - user_id="member-1", - role=TenantAccountRole.EDITOR, - ) - - with pytest.raises(NoPermissionError, match="do not have permission"): - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - def test_check_dataset_operator_permission_rejects_partial_team_without_binding(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.PARTIAL_TEAM) - user = DatasetServiceUnitDataFactory.create_user_mock( - user_id="member-1", - role=TenantAccountRole.EDITOR, - ) - - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalars.return_value.all.return_value = [] - - with pytest.raises(NoPermissionError, match="do not have permission"): - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - def test_get_dataset_queries_delegates_to_paginate(self): - with patch("services.dataset_service.db") as mock_db: - mock_db.desc.side_effect = lambda column: column - mock_db.paginate.return_value = SimpleNamespace(items=["query"], total=1) - - items, total = DatasetService.get_dataset_queries("dataset-1", page=1, per_page=20) - - assert items == ["query"] - assert total == 1 - mock_db.paginate.assert_called_once() - - def test_get_related_apps_returns_ordered_query_results(self): - with patch("services.dataset_service.db") as mock_db: - mock_db.desc.side_effect = lambda column: column - mock_db.session.scalars.return_value.all.return_value = ["relation-1"] - - result = DatasetService.get_related_apps("dataset-1") - - assert result == ["relation-1"] - - def test_update_dataset_api_status_raises_not_found_for_missing_dataset(self): - with patch.object(DatasetService, "get_dataset", return_value=None): - with pytest.raises(NotFound, match="Dataset not found"): - DatasetService.update_dataset_api_status("dataset-1", True) - - def test_update_dataset_api_status_requires_current_user_id(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock(enable_api=False) - - with ( - patch.object(DatasetService, "get_dataset", return_value=dataset), - patch("services.dataset_service.current_user", SimpleNamespace(id=None)), - ): - with pytest.raises(ValueError, match="Current user or current user id not found"): - DatasetService.update_dataset_api_status(dataset.id, True) - - def test_update_dataset_api_status_updates_fields_and_commits(self): - dataset = DatasetServiceUnitDataFactory.create_dataset_mock(enable_api=False) - now = object() - - with ( - patch.object(DatasetService, "get_dataset", return_value=dataset), - patch("services.dataset_service.current_user", SimpleNamespace(id="user-1")), - patch("services.dataset_service.naive_utc_now", return_value=now), - patch("services.dataset_service.db") as mock_db, - ): - DatasetService.update_dataset_api_status(dataset.id, True) - - assert dataset.enable_api is True - assert dataset.updated_by == "user-1" - assert dataset.updated_at is now - mock_db.session.commit.assert_called_once() - - def test_get_dataset_auto_disable_logs_returns_empty_when_billing_is_disabled(self): - class FakeAccount: - pass - - current_user = FakeAccount() - current_user.current_tenant_id = "tenant-1" - - features = SimpleNamespace( - billing=SimpleNamespace(enabled=False, subscription=SimpleNamespace(plan=CloudPlan.PROFESSIONAL)) - ) - - with ( - patch("services.dataset_service.Account", FakeAccount), - patch("services.dataset_service.current_user", current_user), - patch("services.dataset_service.FeatureService.get_features", return_value=features), - patch("services.dataset_service.db") as mock_db, - ): - result = DatasetService.get_dataset_auto_disable_logs("dataset-1") - - assert result == {"document_ids": [], "count": 0} - mock_db.session.scalars.assert_not_called() - - def test_get_dataset_auto_disable_logs_returns_recent_document_ids(self): - class FakeAccount: - pass - - current_user = FakeAccount() - current_user.current_tenant_id = "tenant-1" - logs = [SimpleNamespace(document_id="doc-1"), SimpleNamespace(document_id="doc-2")] - features = SimpleNamespace( - billing=SimpleNamespace(enabled=True, subscription=SimpleNamespace(plan=CloudPlan.PROFESSIONAL)) - ) - - with ( - patch("services.dataset_service.Account", FakeAccount), - patch("services.dataset_service.current_user", current_user), - patch("services.dataset_service.FeatureService.get_features", return_value=features), - patch("services.dataset_service.db") as mock_db, - ): - mock_db.session.scalars.return_value.all.return_value = logs - - result = DatasetService.get_dataset_auto_disable_logs("dataset-1") - - assert result == {"document_ids": ["doc-1", "doc-2"], "count": 2} - - -class TestDatasetServiceDocumentIndexing: - """Unit tests for pause/recover/retry orchestration without SQL assertions.""" - - @pytest.fixture - def mock_document_service_dependencies(self): - with ( - patch("services.dataset_service.redis_client") as mock_redis, - patch("services.dataset_service.db.session") as mock_db_session, - patch("services.dataset_service.current_user") as mock_current_user, - ): - mock_current_user.id = "user-123" - yield { - "redis_client": mock_redis, - "db_session": mock_db_session, - "current_user": mock_current_user, - } - - def test_pause_document_success(self, mock_document_service_dependencies): - document = DatasetServiceUnitDataFactory.create_document_mock(indexing_status="indexing") - - DocumentService.pause_document(document) - - assert document.is_paused is True - assert document.paused_by == "user-123" - mock_document_service_dependencies["db_session"].add.assert_called_once_with(document) - mock_document_service_dependencies["db_session"].commit.assert_called_once() - mock_document_service_dependencies["redis_client"].setnx.assert_called_once_with( - f"document_{document.id}_is_paused", - "True", - ) - - def test_pause_document_invalid_status_error(self, mock_document_service_dependencies): - document = DatasetServiceUnitDataFactory.create_document_mock(indexing_status="completed") - - with pytest.raises(DocumentIndexingError): - DocumentService.pause_document(document) - - def test_recover_document_success(self, mock_document_service_dependencies): - document = DatasetServiceUnitDataFactory.create_document_mock(indexing_status="indexing", is_paused=True) - - with patch("services.dataset_service.recover_document_indexing_task") as recover_task: - DocumentService.recover_document(document) - - assert document.is_paused is False - assert document.paused_by is None - assert document.paused_at is None - mock_document_service_dependencies["db_session"].add.assert_called_once_with(document) - mock_document_service_dependencies["db_session"].commit.assert_called_once() - mock_document_service_dependencies["redis_client"].delete.assert_called_once_with( - f"document_{document.id}_is_paused" - ) - recover_task.delay.assert_called_once_with(document.dataset_id, document.id) - - def test_retry_document_indexing_success(self, mock_document_service_dependencies): - dataset_id = "dataset-123" - documents = [ - DatasetServiceUnitDataFactory.create_document_mock(document_id="doc-1", indexing_status="error"), - DatasetServiceUnitDataFactory.create_document_mock(document_id="doc-2", indexing_status="error"), - ] - mock_document_service_dependencies["redis_client"].get.return_value = None - - with patch("services.dataset_service.retry_document_indexing_task") as retry_task: - DocumentService.retry_document(dataset_id, documents) - - assert all(document.indexing_status == "waiting" for document in documents) - assert mock_document_service_dependencies["db_session"].add.call_count == 2 - assert mock_document_service_dependencies["db_session"].commit.call_count == 2 - assert mock_document_service_dependencies["redis_client"].setex.call_count == 2 - retry_task.delay.assert_called_once_with(dataset_id, ["doc-1", "doc-2"], "user-123") - class TestDatasetCollectionBindingService: """Unit tests for dataset collection binding lookups and creation.""" - def test_get_dataset_collection_binding_returns_existing_binding(self): - binding = SimpleNamespace(id="binding-1") - - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalar.return_value = binding - - result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "model") - - assert result is binding - mock_db.session.add.assert_not_called() - - def test_get_dataset_collection_binding_creates_binding_when_missing(self): - created_binding = SimpleNamespace(id="binding-2") - - with ( - patch("services.dataset_service.db") as mock_db, - patch("services.dataset_service.select"), - patch("services.dataset_service.DatasetCollectionBinding", return_value=created_binding) as binding_cls, - patch.object(Dataset, "gen_collection_name_by_id", return_value="generated-collection"), - ): - mock_db.session.scalar.return_value = None - - result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "model", "dataset") - - assert result is created_binding - binding_cls.assert_called_once_with( - provider_name="provider", - model_name="model", - collection_name="generated-collection", - type="dataset", - ) - mock_db.session.add.assert_called_once_with(created_binding) - mock_db.session.commit.assert_called_once() - - def test_get_dataset_collection_binding_by_id_and_type_raises_when_missing(self): - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalar.return_value = None - - with pytest.raises(ValueError, match="Dataset collection binding not found"): - DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type("binding-1") - - def test_get_dataset_collection_binding_by_id_and_type_returns_binding(self): - binding = SimpleNamespace(id="binding-1") - - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalar.return_value = binding - - result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type("binding-1") - - assert result is binding - class TestDatasetPermissionService: """Unit tests for dataset partial-member management helpers.""" - def test_get_dataset_partial_member_list_returns_scalar_results(self): - with patch("services.dataset_service.db") as mock_db: - mock_db.session.scalars.return_value.all.return_value = ["user-1", "user-2"] - - result = DatasetPermissionService.get_dataset_partial_member_list("dataset-1") - - assert result == ["user-1", "user-2"] - - def test_update_partial_member_list_replaces_permissions_and_commits(self): - with patch("services.dataset_service.db") as mock_db: - DatasetPermissionService.update_partial_member_list( - "tenant-1", - "dataset-1", - [{"user_id": "user-1"}, {"user_id": "user-2"}], - ) - - mock_db.session.execute.assert_called() - mock_db.session.add_all.assert_called_once() - mock_db.session.commit.assert_called_once() - def test_update_partial_member_list_rolls_back_on_exception(self): with patch("services.dataset_service.db") as mock_db: mock_db.session.add_all.side_effect = RuntimeError("boom") @@ -1777,13 +1285,6 @@ class TestDatasetPermissionService: [{"user_id": "user-1"}], ) - def test_clear_partial_member_list_deletes_permissions_and_commits(self): - with patch("services.dataset_service.db") as mock_db: - DatasetPermissionService.clear_partial_member_list("dataset-1") - - mock_db.session.execute.assert_called() - mock_db.session.commit.assert_called_once() - def test_clear_partial_member_list_rolls_back_on_exception(self): with patch("services.dataset_service.db") as mock_db: mock_db.session.execute.side_effect = RuntimeError("boom")