test: migrate dataset service dataset mock tests to testcontainers (#35194)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
James 2026-04-14 21:52:31 +02:00 committed by GitHub
parent 25c388d0db
commit 693080aa12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 613 additions and 499 deletions

View File

@ -0,0 +1,613 @@
"""Testcontainers integration tests for DatasetService permission and lifecycle SQL paths."""
from datetime import datetime
from types import SimpleNamespace
from unittest.mock import patch
from uuid import uuid4
import pytest
from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import (
AppDatasetJoin,
Dataset,
DatasetAutoDisableLog,
DatasetCollectionBinding,
DatasetPermission,
DatasetPermissionEnum,
)
from models.enums import DataSourceType
from services.dataset_service import DatasetCollectionBindingService, DatasetPermissionService, DatasetService
from services.errors.account import NoPermissionError
class DatasetPermissionIntegrationFactory:
@staticmethod
def create_account_with_tenant(
db_session_with_containers: Session,
role: TenantAccountRole = TenantAccountRole.OWNER,
) -> tuple[Account, Tenant]:
account = Account(
email=f"{uuid4()}@example.com",
name=f"user-{uuid4()}",
interface_language="en-US",
status="active",
)
tenant = Tenant(name=f"tenant-{uuid4()}", status="normal")
db_session_with_containers.add_all([account, tenant])
db_session_with_containers.flush()
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=role,
current=True,
)
db_session_with_containers.add(join)
db_session_with_containers.commit()
account.role = role
account._current_tenant = tenant
return account, tenant
@staticmethod
def create_account_in_tenant(
db_session_with_containers: Session,
tenant: Tenant,
role: TenantAccountRole = TenantAccountRole.EDITOR,
) -> Account:
account = Account(
email=f"{uuid4()}@example.com",
name=f"user-{uuid4()}",
interface_language="en-US",
status="active",
)
db_session_with_containers.add(account)
db_session_with_containers.flush()
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=role,
current=True,
)
db_session_with_containers.add(join)
db_session_with_containers.commit()
account.role = role
account._current_tenant = tenant
return account
@staticmethod
def create_dataset(
db_session_with_containers: Session,
*,
tenant_id: str,
created_by: str,
name: str | None = None,
permission: DatasetPermissionEnum = DatasetPermissionEnum.ONLY_ME,
indexing_technique: str | None = IndexTechniqueType.HIGH_QUALITY,
enable_api: bool = True,
) -> Dataset:
dataset = Dataset(
tenant_id=tenant_id,
name=name or f"dataset-{uuid4()}",
description="desc",
data_source_type=DataSourceType.UPLOAD_FILE,
indexing_technique=indexing_technique,
created_by=created_by,
provider="vendor",
permission=permission,
retrieval_model={"top_k": 2},
)
dataset.enable_api = enable_api
db_session_with_containers.add(dataset)
db_session_with_containers.commit()
return dataset
@staticmethod
def create_dataset_permission(
db_session_with_containers: Session,
*,
dataset_id: str,
tenant_id: str,
account_id: str,
) -> DatasetPermission:
permission = DatasetPermission(
dataset_id=dataset_id,
tenant_id=tenant_id,
account_id=account_id,
has_permission=True,
)
db_session_with_containers.add(permission)
db_session_with_containers.commit()
return permission
@staticmethod
def create_app_dataset_join(
db_session_with_containers: Session,
*,
dataset_id: str,
) -> AppDatasetJoin:
join = AppDatasetJoin(
app_id=str(uuid4()),
dataset_id=dataset_id,
)
db_session_with_containers.add(join)
db_session_with_containers.commit()
return join
@staticmethod
def create_collection_binding(
db_session_with_containers: Session,
*,
provider_name: str,
model_name: str,
collection_type: str = "dataset",
) -> DatasetCollectionBinding:
binding = DatasetCollectionBinding(
provider_name=provider_name,
model_name=model_name,
collection_name=f"collection_{uuid4().hex}",
type=collection_type,
)
db_session_with_containers.add(binding)
db_session_with_containers.commit()
return binding
@staticmethod
def create_auto_disable_log(
db_session_with_containers: Session,
*,
tenant_id: str,
dataset_id: str,
document_id: str,
) -> DatasetAutoDisableLog:
log = DatasetAutoDisableLog(
tenant_id=tenant_id,
dataset_id=dataset_id,
document_id=document_id,
)
db_session_with_containers.add(log)
db_session_with_containers.commit()
return log
class TestDatasetServicePermissionsAndLifecycle:
def test_delete_dataset_returns_false_when_dataset_is_missing(self, db_session_with_containers: Session):
owner, _tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
result = DatasetService.delete_dataset(str(uuid4()), user=owner)
assert result is False
def test_delete_dataset_checks_permission_and_deletes_dataset(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
)
with patch("services.dataset_service.dataset_was_deleted.send") as send_deleted_signal:
result = DatasetService.delete_dataset(dataset.id, user=owner)
assert result is True
assert db_session_with_containers.get(Dataset, dataset.id) is None
send_deleted_signal.assert_called_once_with(dataset)
def test_dataset_use_check_returns_true_when_join_exists(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
)
DatasetPermissionIntegrationFactory.create_app_dataset_join(
db_session_with_containers,
dataset_id=dataset.id,
)
assert DatasetService.dataset_use_check(dataset.id) is True
def test_dataset_use_check_returns_false_when_join_missing(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
)
assert DatasetService.dataset_use_check(dataset.id) is False
def test_check_dataset_permission_rejects_cross_tenant_access(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
outsider, _other_tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(
db_session_with_containers
)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_permission(dataset, outsider)
def test_check_dataset_permission_rejects_only_me_dataset_for_non_creator(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.ONLY_ME,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_permission(dataset, member)
def test_check_dataset_permission_rejects_partial_team_user_without_binding(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_permission(dataset, member)
def test_check_dataset_permission_allows_partial_team_creator(self, db_session_with_containers: Session):
creator, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(
db_session_with_containers,
role=TenantAccountRole.EDITOR,
)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=creator.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
DatasetService.check_dataset_permission(dataset, creator)
def test_check_dataset_permission_allows_partial_team_member_with_binding(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
DatasetPermissionIntegrationFactory.create_dataset_permission(
db_session_with_containers,
dataset_id=dataset.id,
tenant_id=tenant.id,
account_id=member.id,
)
DatasetService.check_dataset_permission(dataset, member)
def test_check_dataset_operator_permission_rejects_only_me_for_non_creator(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
operator = DatasetPermissionIntegrationFactory.create_account_in_tenant(
db_session_with_containers,
tenant,
role=TenantAccountRole.EDITOR,
)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.ONLY_ME,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_operator_permission(user=operator, dataset=dataset)
def test_check_dataset_operator_permission_rejects_partial_team_without_binding(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
operator = DatasetPermissionIntegrationFactory.create_account_in_tenant(
db_session_with_containers,
tenant,
role=TenantAccountRole.EDITOR,
)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_operator_permission(user=operator, dataset=dataset)
def test_check_dataset_operator_permission_allows_partial_team_with_binding(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
operator = DatasetPermissionIntegrationFactory.create_account_in_tenant(
db_session_with_containers,
tenant,
role=TenantAccountRole.EDITOR,
)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
DatasetPermissionIntegrationFactory.create_dataset_permission(
db_session_with_containers,
dataset_id=dataset.id,
tenant_id=tenant.id,
account_id=operator.id,
)
DatasetService.check_dataset_operator_permission(user=operator, dataset=dataset)
def test_update_dataset_api_status_raises_not_found_for_missing_dataset(self, flask_app_with_containers):
with flask_app_with_containers.app_context():
with pytest.raises(NotFound, match="Dataset not found"):
DatasetService.update_dataset_api_status(str(uuid4()), True)
def test_update_dataset_api_status_requires_current_user_id(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
enable_api=False,
)
with patch("services.dataset_service.current_user", SimpleNamespace(id=None)):
with pytest.raises(ValueError, match="Current user or current user id not found"):
DatasetService.update_dataset_api_status(dataset.id, True)
def test_update_dataset_api_status_updates_fields_and_commits(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
enable_api=False,
)
now = datetime(2026, 4, 14, 18, 0, 0)
with (
patch("services.dataset_service.current_user", owner),
patch("services.dataset_service.naive_utc_now", return_value=now),
):
DatasetService.update_dataset_api_status(dataset.id, True)
db_session_with_containers.refresh(dataset)
assert dataset.enable_api is True
assert dataset.updated_by == owner.id
assert dataset.updated_at == now
def test_get_dataset_auto_disable_logs_returns_empty_when_billing_is_disabled(
self, db_session_with_containers: Session
):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
features = SimpleNamespace(
billing=SimpleNamespace(enabled=False, subscription=SimpleNamespace(plan="professional"))
)
with (
patch("services.dataset_service.current_user", owner),
patch("services.dataset_service.FeatureService.get_features", return_value=features),
):
result = DatasetService.get_dataset_auto_disable_logs(str(uuid4()))
assert result == {"document_ids": [], "count": 0}
def test_get_dataset_auto_disable_logs_returns_recent_document_ids(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
)
DatasetPermissionIntegrationFactory.create_auto_disable_log(
db_session_with_containers,
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=str(uuid4()),
)
DatasetPermissionIntegrationFactory.create_auto_disable_log(
db_session_with_containers,
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=str(uuid4()),
)
features = SimpleNamespace(
billing=SimpleNamespace(enabled=True, subscription=SimpleNamespace(plan="professional"))
)
with (
patch("services.dataset_service.current_user", owner),
patch("services.dataset_service.FeatureService.get_features", return_value=features),
):
result = DatasetService.get_dataset_auto_disable_logs(dataset.id)
assert result["count"] == 2
assert len(result["document_ids"]) == 2
class TestDatasetCollectionBindingServiceIntegration:
def test_get_dataset_collection_binding_returns_existing_binding(self, db_session_with_containers: Session):
binding = DatasetPermissionIntegrationFactory.create_collection_binding(
db_session_with_containers,
provider_name="provider",
model_name="model",
)
result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "model")
assert result.id == binding.id
def test_get_dataset_collection_binding_creates_binding_when_missing(self, db_session_with_containers: Session):
result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "missing-model")
persisted = db_session_with_containers.get(DatasetCollectionBinding, result.id)
assert persisted is not None
assert persisted.provider_name == "provider"
assert persisted.model_name == "missing-model"
assert persisted.type == "dataset"
assert persisted.collection_name
def test_get_dataset_collection_binding_by_id_and_type_raises_when_missing(self, flask_app_with_containers):
with flask_app_with_containers.app_context():
with pytest.raises(ValueError, match="Dataset collection binding not found"):
DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(str(uuid4()))
def test_get_dataset_collection_binding_by_id_and_type_returns_binding(self, db_session_with_containers: Session):
binding = DatasetPermissionIntegrationFactory.create_collection_binding(
db_session_with_containers,
provider_name="provider",
model_name="model",
)
result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(binding.id)
assert result.id == binding.id
class TestDatasetPermissionServiceIntegration:
def test_get_dataset_partial_member_list_returns_scalar_results(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
member_a = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
member_b = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
DatasetPermissionIntegrationFactory.create_dataset_permission(
db_session_with_containers,
dataset_id=dataset.id,
tenant_id=tenant.id,
account_id=member_a.id,
)
DatasetPermissionIntegrationFactory.create_dataset_permission(
db_session_with_containers,
dataset_id=dataset.id,
tenant_id=tenant.id,
account_id=member_b.id,
)
result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id)
assert set(result) == {member_a.id, member_b.id}
def test_update_partial_member_list_replaces_permissions_and_commits(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
member_a = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
member_b = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
stale_member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
DatasetPermissionIntegrationFactory.create_dataset_permission(
db_session_with_containers,
dataset_id=dataset.id,
tenant_id=tenant.id,
account_id=stale_member.id,
)
DatasetPermissionService.update_partial_member_list(
tenant.id,
dataset.id,
[{"user_id": member_a.id}, {"user_id": member_b.id}],
)
permissions = db_session_with_containers.query(DatasetPermission).filter_by(dataset_id=dataset.id).all()
assert {permission.account_id for permission in permissions} == {member_a.id, member_b.id}
def test_check_permission_requires_dataset_editor(self):
user = SimpleNamespace(is_dataset_editor=False, is_dataset_operator=False)
dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.ALL_TEAM)
with pytest.raises(NoPermissionError, match="does not have permission"):
DatasetPermissionService.check_permission(user, dataset, DatasetPermissionEnum.ALL_TEAM, [])
def test_check_permission_prevents_dataset_operator_from_changing_permission_mode(self):
user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True)
dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.ALL_TEAM)
with pytest.raises(NoPermissionError, match="cannot change the dataset permissions"):
DatasetPermissionService.check_permission(user, dataset, DatasetPermissionEnum.ONLY_ME, [])
def test_check_permission_requires_partial_member_list_for_partial_members_mode(self):
user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True)
dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.PARTIAL_TEAM)
with pytest.raises(ValueError, match="Partial member list is required"):
DatasetPermissionService.check_permission(user, dataset, DatasetPermissionEnum.PARTIAL_TEAM, [])
def test_check_permission_rejects_dataset_operator_member_list_changes(self):
user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True)
dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.PARTIAL_TEAM)
with patch.object(DatasetPermissionService, "get_dataset_partial_member_list", return_value=["user-1"]):
with pytest.raises(ValueError, match="cannot change the dataset permissions"):
DatasetPermissionService.check_permission(
user,
dataset,
DatasetPermissionEnum.PARTIAL_TEAM,
[{"user_id": "user-2"}],
)
def test_check_permission_allows_dataset_operator_when_member_list_is_unchanged(self):
user = SimpleNamespace(is_dataset_editor=True, is_dataset_operator=True)
dataset = SimpleNamespace(id="dataset-1", permission=DatasetPermissionEnum.PARTIAL_TEAM)
with patch.object(DatasetPermissionService, "get_dataset_partial_member_list", return_value=["user-1"]):
DatasetPermissionService.check_permission(
user,
dataset,
DatasetPermissionEnum.PARTIAL_TEAM,
[{"user_id": "user-1"}],
)
def test_clear_partial_member_list_deletes_permissions_and_commits(self, db_session_with_containers: Session):
owner, tenant = DatasetPermissionIntegrationFactory.create_account_with_tenant(db_session_with_containers)
member = DatasetPermissionIntegrationFactory.create_account_in_tenant(db_session_with_containers, tenant)
dataset = DatasetPermissionIntegrationFactory.create_dataset(
db_session_with_containers,
tenant_id=tenant.id,
created_by=owner.id,
permission=DatasetPermissionEnum.PARTIAL_TEAM,
)
DatasetPermissionIntegrationFactory.create_dataset_permission(
db_session_with_containers,
dataset_id=dataset.id,
tenant_id=tenant.id,
account_id=member.id,
)
DatasetPermissionService.clear_partial_member_list(dataset.id)
remaining = db_session_with_containers.query(DatasetPermission).filter_by(dataset_id=dataset.id).all()
assert remaining == []

View File

@ -1,29 +1,20 @@
"""Unit tests for DatasetService and dataset-related collaborators."""
from .dataset_service_test_helpers import (
CloudPlan,
Dataset,
DatasetCollectionBindingService,
DatasetNameDuplicateError,
DatasetPermissionEnum,
DatasetPermissionService,
DatasetProcessRule,
DatasetService,
DatasetServiceUnitDataFactory,
DocumentIndexingError,
DocumentService,
LLMBadRequestError,
MagicMock,
Mock,
ModelFeature,
ModelType,
NoPermissionError,
NotFound,
PipelineIconInfo,
ProviderTokenNotInitError,
RagPipelineDatasetCreateEntity,
SimpleNamespace,
TenantAccountRole,
_make_knowledge_configuration,
_make_retrieval_model,
_make_session_context,
@ -33,127 +24,6 @@ from .dataset_service_test_helpers import (
)
class TestDatasetServiceQueries:
"""Unit tests for DatasetService query composition and fallback branches."""
@pytest.fixture
def mock_dataset_query_dependencies(self):
with (
patch("services.dataset_service.db") as mock_db,
patch("services.dataset_service.helper.escape_like_pattern", return_value="escaped-search") as escape_like,
patch("services.dataset_service.TagService.get_target_ids_by_tag_ids") as get_target_ids,
):
mock_db.paginate.return_value = SimpleNamespace(items=["dataset"], total=1)
yield {
"db": mock_db,
"escape_like_pattern": escape_like,
"get_target_ids": get_target_ids,
}
def test_get_datasets_returns_paginated_results_for_public_view(self, mock_dataset_query_dependencies):
items, total = DatasetService.get_datasets(page=1, per_page=20, tenant_id="tenant-1")
assert items == ["dataset"]
assert total == 1
mock_dataset_query_dependencies["db"].paginate.assert_called_once()
mock_dataset_query_dependencies["escape_like_pattern"].assert_not_called()
def test_get_datasets_short_circuits_for_dataset_operator_without_permissions(
self, mock_dataset_query_dependencies
):
user = DatasetServiceUnitDataFactory.create_user_mock(role=TenantAccountRole.DATASET_OPERATOR)
mock_dataset_query_dependencies["db"].session.scalars.return_value.all.return_value = []
items, total = DatasetService.get_datasets(page=1, per_page=20, tenant_id="tenant-1", user=user)
assert items == []
assert total == 0
mock_dataset_query_dependencies["db"].paginate.assert_not_called()
def test_get_datasets_short_circuits_when_tag_lookup_returns_no_target_ids(self, mock_dataset_query_dependencies):
mock_dataset_query_dependencies["get_target_ids"].return_value = []
items, total = DatasetService.get_datasets(
page=1,
per_page=20,
tenant_id="tenant-1",
tag_ids=["tag-1"],
)
assert items == []
assert total == 0
mock_dataset_query_dependencies["get_target_ids"].assert_called_once_with("knowledge", "tenant-1", ["tag-1"])
mock_dataset_query_dependencies["db"].paginate.assert_not_called()
def test_get_datasets_search_and_tag_filters_call_collaborators(self, mock_dataset_query_dependencies):
mock_dataset_query_dependencies["get_target_ids"].return_value = ["dataset-1"]
items, total = DatasetService.get_datasets(
page=2,
per_page=10,
tenant_id="tenant-1",
search="report",
tag_ids=["tag-1"],
)
assert items == ["dataset"]
assert total == 1
mock_dataset_query_dependencies["escape_like_pattern"].assert_called_once_with("report")
mock_dataset_query_dependencies["get_target_ids"].assert_called_once_with("knowledge", "tenant-1", ["tag-1"])
mock_dataset_query_dependencies["db"].paginate.assert_called_once()
def test_get_process_rules_returns_latest_rule_when_present(self):
dataset_process_rule = Mock(spec=DatasetProcessRule)
dataset_process_rule.mode = "automatic"
dataset_process_rule.rules_dict = {"delimiter": "\n"}
with patch("services.dataset_service.db") as mock_db:
(mock_db.session.execute.return_value.scalar_one_or_none.return_value) = dataset_process_rule
result = DatasetService.get_process_rules("dataset-1")
assert result == {"mode": "automatic", "rules": {"delimiter": "\n"}}
def test_get_process_rules_falls_back_to_default_rules_when_missing(self):
with patch("services.dataset_service.db") as mock_db:
(mock_db.session.execute.return_value.scalar_one_or_none.return_value) = None
result = DatasetService.get_process_rules("dataset-1")
assert result == {
"mode": DocumentService.DEFAULT_RULES["mode"],
"rules": DocumentService.DEFAULT_RULES["rules"],
}
def test_get_datasets_by_ids_returns_empty_for_missing_ids(self):
with patch("services.dataset_service.db") as mock_db:
items, total = DatasetService.get_datasets_by_ids([], "tenant-1")
assert items == []
assert total == 0
mock_db.paginate.assert_not_called()
def test_get_datasets_by_ids_uses_paginate_for_non_empty_input(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.paginate.return_value = SimpleNamespace(items=["dataset-1"], total=1)
items, total = DatasetService.get_datasets_by_ids(["dataset-1"], "tenant-1")
assert items == ["dataset-1"]
assert total == 1
mock_db.paginate.assert_called_once()
def test_get_dataset_returns_first_match(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock()
with patch("services.dataset_service.db") as mock_db:
mock_db.session.get.return_value = dataset
result = DatasetService.get_dataset(dataset.id)
assert result is dataset
class TestDatasetServiceValidation:
"""Unit tests for DatasetService validation helpers."""
@ -1337,103 +1207,6 @@ class TestDatasetServiceRagPipelineSettings:
class TestDatasetServicePermissionsAndLifecycle:
"""Unit tests for dataset permissions, deletion, and metadata helpers."""
def test_delete_dataset_returns_false_when_dataset_is_missing(self):
with patch.object(DatasetService, "get_dataset", return_value=None):
result = DatasetService.delete_dataset("dataset-1", user=SimpleNamespace(id="user-1"))
assert result is False
def test_delete_dataset_checks_permission_and_deletes_dataset(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock()
with (
patch.object(DatasetService, "get_dataset", return_value=dataset),
patch.object(DatasetService, "check_dataset_permission") as check_permission,
patch("services.dataset_service.dataset_was_deleted.send") as send_deleted_signal,
patch("services.dataset_service.db") as mock_db,
):
result = DatasetService.delete_dataset(dataset.id, user=SimpleNamespace(id="user-1"))
assert result is True
check_permission.assert_called_once_with(dataset, SimpleNamespace(id="user-1"))
send_deleted_signal.assert_called_once_with(dataset)
mock_db.session.delete.assert_called_once_with(dataset)
mock_db.session.commit.assert_called_once()
def test_dataset_use_check_returns_scalar_result(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.session.execute.return_value.scalar_one.return_value = True
result = DatasetService.dataset_use_check("dataset-1")
assert result is True
def test_check_dataset_permission_rejects_cross_tenant_access(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(tenant_id="tenant-a")
user = DatasetServiceUnitDataFactory.create_user_mock(tenant_id="tenant-b")
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_permission(dataset, user)
def test_check_dataset_permission_rejects_only_me_dataset_for_non_creator(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(
permission=DatasetPermissionEnum.ONLY_ME,
created_by="owner-1",
)
user = DatasetServiceUnitDataFactory.create_user_mock(
user_id="member-1",
role=TenantAccountRole.EDITOR,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_permission(dataset, user)
def test_check_dataset_permission_rejects_partial_team_user_without_binding(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(
permission=DatasetPermissionEnum.PARTIAL_TEAM,
created_by="owner-1",
)
user = DatasetServiceUnitDataFactory.create_user_mock(
user_id="member-1",
role=TenantAccountRole.EDITOR,
)
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalar.return_value = None
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_permission(dataset, user)
def test_check_dataset_permission_allows_partial_team_creator_without_lookup(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(
permission=DatasetPermissionEnum.PARTIAL_TEAM,
created_by="creator-1",
)
user = DatasetServiceUnitDataFactory.create_user_mock(
user_id="creator-1",
role=TenantAccountRole.EDITOR,
)
with patch("services.dataset_service.db") as mock_db:
DatasetService.check_dataset_permission(dataset, user)
mock_db.session.scalar.assert_not_called()
def test_check_dataset_permission_allows_partial_team_member_with_binding(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(
permission=DatasetPermissionEnum.PARTIAL_TEAM,
created_by="owner-1",
)
user = DatasetServiceUnitDataFactory.create_user_mock(
user_id="member-1",
role=TenantAccountRole.EDITOR,
)
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalar.return_value = object()
DatasetService.check_dataset_permission(dataset, user)
def test_check_dataset_operator_permission_validates_required_arguments(self):
with pytest.raises(ValueError, match="Dataset not found"):
DatasetService.check_dataset_operator_permission(user=SimpleNamespace(id="user-1"), dataset=None)
@ -1441,279 +1214,14 @@ class TestDatasetServicePermissionsAndLifecycle:
with pytest.raises(ValueError, match="User not found"):
DatasetService.check_dataset_operator_permission(user=None, dataset=SimpleNamespace(id="dataset-1"))
def test_check_dataset_operator_permission_rejects_only_me_for_non_creator(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(
permission=DatasetPermissionEnum.ONLY_ME,
created_by="owner-1",
)
user = DatasetServiceUnitDataFactory.create_user_mock(
user_id="member-1",
role=TenantAccountRole.EDITOR,
)
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_operator_permission(user=user, dataset=dataset)
def test_check_dataset_operator_permission_rejects_partial_team_without_binding(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.PARTIAL_TEAM)
user = DatasetServiceUnitDataFactory.create_user_mock(
user_id="member-1",
role=TenantAccountRole.EDITOR,
)
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalars.return_value.all.return_value = []
with pytest.raises(NoPermissionError, match="do not have permission"):
DatasetService.check_dataset_operator_permission(user=user, dataset=dataset)
def test_get_dataset_queries_delegates_to_paginate(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.desc.side_effect = lambda column: column
mock_db.paginate.return_value = SimpleNamespace(items=["query"], total=1)
items, total = DatasetService.get_dataset_queries("dataset-1", page=1, per_page=20)
assert items == ["query"]
assert total == 1
mock_db.paginate.assert_called_once()
def test_get_related_apps_returns_ordered_query_results(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.desc.side_effect = lambda column: column
mock_db.session.scalars.return_value.all.return_value = ["relation-1"]
result = DatasetService.get_related_apps("dataset-1")
assert result == ["relation-1"]
def test_update_dataset_api_status_raises_not_found_for_missing_dataset(self):
with patch.object(DatasetService, "get_dataset", return_value=None):
with pytest.raises(NotFound, match="Dataset not found"):
DatasetService.update_dataset_api_status("dataset-1", True)
def test_update_dataset_api_status_requires_current_user_id(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(enable_api=False)
with (
patch.object(DatasetService, "get_dataset", return_value=dataset),
patch("services.dataset_service.current_user", SimpleNamespace(id=None)),
):
with pytest.raises(ValueError, match="Current user or current user id not found"):
DatasetService.update_dataset_api_status(dataset.id, True)
def test_update_dataset_api_status_updates_fields_and_commits(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(enable_api=False)
now = object()
with (
patch.object(DatasetService, "get_dataset", return_value=dataset),
patch("services.dataset_service.current_user", SimpleNamespace(id="user-1")),
patch("services.dataset_service.naive_utc_now", return_value=now),
patch("services.dataset_service.db") as mock_db,
):
DatasetService.update_dataset_api_status(dataset.id, True)
assert dataset.enable_api is True
assert dataset.updated_by == "user-1"
assert dataset.updated_at is now
mock_db.session.commit.assert_called_once()
def test_get_dataset_auto_disable_logs_returns_empty_when_billing_is_disabled(self):
class FakeAccount:
pass
current_user = FakeAccount()
current_user.current_tenant_id = "tenant-1"
features = SimpleNamespace(
billing=SimpleNamespace(enabled=False, subscription=SimpleNamespace(plan=CloudPlan.PROFESSIONAL))
)
with (
patch("services.dataset_service.Account", FakeAccount),
patch("services.dataset_service.current_user", current_user),
patch("services.dataset_service.FeatureService.get_features", return_value=features),
patch("services.dataset_service.db") as mock_db,
):
result = DatasetService.get_dataset_auto_disable_logs("dataset-1")
assert result == {"document_ids": [], "count": 0}
mock_db.session.scalars.assert_not_called()
def test_get_dataset_auto_disable_logs_returns_recent_document_ids(self):
class FakeAccount:
pass
current_user = FakeAccount()
current_user.current_tenant_id = "tenant-1"
logs = [SimpleNamespace(document_id="doc-1"), SimpleNamespace(document_id="doc-2")]
features = SimpleNamespace(
billing=SimpleNamespace(enabled=True, subscription=SimpleNamespace(plan=CloudPlan.PROFESSIONAL))
)
with (
patch("services.dataset_service.Account", FakeAccount),
patch("services.dataset_service.current_user", current_user),
patch("services.dataset_service.FeatureService.get_features", return_value=features),
patch("services.dataset_service.db") as mock_db,
):
mock_db.session.scalars.return_value.all.return_value = logs
result = DatasetService.get_dataset_auto_disable_logs("dataset-1")
assert result == {"document_ids": ["doc-1", "doc-2"], "count": 2}
class TestDatasetServiceDocumentIndexing:
"""Unit tests for pause/recover/retry orchestration without SQL assertions."""
@pytest.fixture
def mock_document_service_dependencies(self):
with (
patch("services.dataset_service.redis_client") as mock_redis,
patch("services.dataset_service.db.session") as mock_db_session,
patch("services.dataset_service.current_user") as mock_current_user,
):
mock_current_user.id = "user-123"
yield {
"redis_client": mock_redis,
"db_session": mock_db_session,
"current_user": mock_current_user,
}
def test_pause_document_success(self, mock_document_service_dependencies):
document = DatasetServiceUnitDataFactory.create_document_mock(indexing_status="indexing")
DocumentService.pause_document(document)
assert document.is_paused is True
assert document.paused_by == "user-123"
mock_document_service_dependencies["db_session"].add.assert_called_once_with(document)
mock_document_service_dependencies["db_session"].commit.assert_called_once()
mock_document_service_dependencies["redis_client"].setnx.assert_called_once_with(
f"document_{document.id}_is_paused",
"True",
)
def test_pause_document_invalid_status_error(self, mock_document_service_dependencies):
document = DatasetServiceUnitDataFactory.create_document_mock(indexing_status="completed")
with pytest.raises(DocumentIndexingError):
DocumentService.pause_document(document)
def test_recover_document_success(self, mock_document_service_dependencies):
document = DatasetServiceUnitDataFactory.create_document_mock(indexing_status="indexing", is_paused=True)
with patch("services.dataset_service.recover_document_indexing_task") as recover_task:
DocumentService.recover_document(document)
assert document.is_paused is False
assert document.paused_by is None
assert document.paused_at is None
mock_document_service_dependencies["db_session"].add.assert_called_once_with(document)
mock_document_service_dependencies["db_session"].commit.assert_called_once()
mock_document_service_dependencies["redis_client"].delete.assert_called_once_with(
f"document_{document.id}_is_paused"
)
recover_task.delay.assert_called_once_with(document.dataset_id, document.id)
def test_retry_document_indexing_success(self, mock_document_service_dependencies):
dataset_id = "dataset-123"
documents = [
DatasetServiceUnitDataFactory.create_document_mock(document_id="doc-1", indexing_status="error"),
DatasetServiceUnitDataFactory.create_document_mock(document_id="doc-2", indexing_status="error"),
]
mock_document_service_dependencies["redis_client"].get.return_value = None
with patch("services.dataset_service.retry_document_indexing_task") as retry_task:
DocumentService.retry_document(dataset_id, documents)
assert all(document.indexing_status == "waiting" for document in documents)
assert mock_document_service_dependencies["db_session"].add.call_count == 2
assert mock_document_service_dependencies["db_session"].commit.call_count == 2
assert mock_document_service_dependencies["redis_client"].setex.call_count == 2
retry_task.delay.assert_called_once_with(dataset_id, ["doc-1", "doc-2"], "user-123")
class TestDatasetCollectionBindingService:
"""Unit tests for dataset collection binding lookups and creation."""
def test_get_dataset_collection_binding_returns_existing_binding(self):
binding = SimpleNamespace(id="binding-1")
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalar.return_value = binding
result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "model")
assert result is binding
mock_db.session.add.assert_not_called()
def test_get_dataset_collection_binding_creates_binding_when_missing(self):
created_binding = SimpleNamespace(id="binding-2")
with (
patch("services.dataset_service.db") as mock_db,
patch("services.dataset_service.select"),
patch("services.dataset_service.DatasetCollectionBinding", return_value=created_binding) as binding_cls,
patch.object(Dataset, "gen_collection_name_by_id", return_value="generated-collection"),
):
mock_db.session.scalar.return_value = None
result = DatasetCollectionBindingService.get_dataset_collection_binding("provider", "model", "dataset")
assert result is created_binding
binding_cls.assert_called_once_with(
provider_name="provider",
model_name="model",
collection_name="generated-collection",
type="dataset",
)
mock_db.session.add.assert_called_once_with(created_binding)
mock_db.session.commit.assert_called_once()
def test_get_dataset_collection_binding_by_id_and_type_raises_when_missing(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalar.return_value = None
with pytest.raises(ValueError, match="Dataset collection binding not found"):
DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type("binding-1")
def test_get_dataset_collection_binding_by_id_and_type_returns_binding(self):
binding = SimpleNamespace(id="binding-1")
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalar.return_value = binding
result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type("binding-1")
assert result is binding
class TestDatasetPermissionService:
"""Unit tests for dataset partial-member management helpers."""
def test_get_dataset_partial_member_list_returns_scalar_results(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.session.scalars.return_value.all.return_value = ["user-1", "user-2"]
result = DatasetPermissionService.get_dataset_partial_member_list("dataset-1")
assert result == ["user-1", "user-2"]
def test_update_partial_member_list_replaces_permissions_and_commits(self):
with patch("services.dataset_service.db") as mock_db:
DatasetPermissionService.update_partial_member_list(
"tenant-1",
"dataset-1",
[{"user_id": "user-1"}, {"user_id": "user-2"}],
)
mock_db.session.execute.assert_called()
mock_db.session.add_all.assert_called_once()
mock_db.session.commit.assert_called_once()
def test_update_partial_member_list_rolls_back_on_exception(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.session.add_all.side_effect = RuntimeError("boom")
@ -1777,13 +1285,6 @@ class TestDatasetPermissionService:
[{"user_id": "user-1"}],
)
def test_clear_partial_member_list_deletes_permissions_and_commits(self):
with patch("services.dataset_service.db") as mock_db:
DatasetPermissionService.clear_partial_member_list("dataset-1")
mock_db.session.execute.assert_called()
mock_db.session.commit.assert_called_once()
def test_clear_partial_member_list_rolls_back_on_exception(self):
with patch("services.dataset_service.db") as mock_db:
mock_db.session.execute.side_effect = RuntimeError("boom")