From 98d3bcd079343ab1e2f2ffba55c5693b26e74a6d Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Fri, 10 Apr 2026 19:35:52 +0200 Subject: [PATCH 001/147] test: migrate SQLAlchemyWorkflowNodeExecutionRepository tests to testcontainers (#34926) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- ...hemy_workflow_node_execution_repository.py | 395 ++++++++++++++++++ .../workflow_node_execution/__init__.py | 3 - .../test_sqlalchemy_repository.py | 340 --------------- ...hemy_workflow_node_execution_repository.py | 106 ----- 4 files changed, 395 insertions(+), 449 deletions(-) create mode 100644 api/tests/test_containers_integration_tests/repositories/test_sqlalchemy_workflow_node_execution_repository.py delete mode 100644 api/tests/unit_tests/repositories/workflow_node_execution/__init__.py delete mode 100644 api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py delete mode 100644 api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py diff --git a/api/tests/test_containers_integration_tests/repositories/test_sqlalchemy_workflow_node_execution_repository.py b/api/tests/test_containers_integration_tests/repositories/test_sqlalchemy_workflow_node_execution_repository.py new file mode 100644 index 0000000000..22e0aa34ff --- /dev/null +++ b/api/tests/test_containers_integration_tests/repositories/test_sqlalchemy_workflow_node_execution_repository.py @@ -0,0 +1,395 @@ +"""Testcontainers integration tests for SQLAlchemyWorkflowNodeExecutionRepository.""" + +from __future__ import annotations + +import json +from datetime import datetime +from decimal import Decimal +from uuid import uuid4 + +from graphon.entities import WorkflowNodeExecution +from graphon.enums import ( + BuiltinNodeTypes, + WorkflowNodeExecutionMetadataKey, + WorkflowNodeExecutionStatus, +) +from graphon.model_runtime.utils.encoders import jsonable_encoder +from sqlalchemy import Engine +from sqlalchemy.orm import Session, sessionmaker + +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.repositories.factory import OrderConfig +from models.account import Account, Tenant +from models.enums import CreatorUserRole +from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom + + +def _create_account_with_tenant(session: Session) -> Account: + tenant = Tenant(name="Test Workspace") + session.add(tenant) + session.flush() + + account = Account(name="test", email=f"test-{uuid4()}@example.com") + session.add(account) + session.flush() + + account._current_tenant = tenant + return account + + +def _make_repo(session: Session, account: Account, app_id: str) -> SQLAlchemyWorkflowNodeExecutionRepository: + engine = session.get_bind() + assert isinstance(engine, Engine) + return SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=sessionmaker(bind=engine, expire_on_commit=False), + user=account, + app_id=app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + +def _create_node_execution_model( + session: Session, + *, + tenant_id: str, + app_id: str, + workflow_id: str, + workflow_run_id: str, + index: int = 1, + status: WorkflowNodeExecutionStatus = WorkflowNodeExecutionStatus.RUNNING, +) -> WorkflowNodeExecutionModel: + model = WorkflowNodeExecutionModel( + id=str(uuid4()), + tenant_id=tenant_id, + app_id=app_id, + workflow_id=workflow_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + workflow_run_id=workflow_run_id, + index=index, + predecessor_node_id=None, + node_execution_id=str(uuid4()), + node_id=f"node-{index}", + node_type=BuiltinNodeTypes.START, + title=f"Test Node {index}", + inputs='{"input_key": "input_value"}', + process_data='{"process_key": "process_value"}', + outputs='{"output_key": "output_value"}', + status=status, + error=None, + elapsed_time=1.5, + execution_metadata="{}", + created_at=datetime.now(), + created_by_role=CreatorUserRole.ACCOUNT, + created_by=str(uuid4()), + finished_at=None, + ) + session.add(model) + session.flush() + return model + + +class TestSave: + def test_save_new_record(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + app_id = str(uuid4()) + repo = _make_repo(db_session_with_containers, account, app_id) + + execution = WorkflowNodeExecution( + id=str(uuid4()), + workflow_id=str(uuid4()), + node_execution_id=str(uuid4()), + workflow_execution_id=str(uuid4()), + index=1, + predecessor_node_id=None, + node_id="node-1", + node_type=BuiltinNodeTypes.START, + title="Test Node", + inputs={"input_key": "input_value"}, + process_data={"process_key": "process_value"}, + outputs={"result": "success"}, + status=WorkflowNodeExecutionStatus.RUNNING, + error=None, + elapsed_time=1.5, + metadata={WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 100}, + created_at=datetime.now(), + finished_at=None, + ) + + repo.save(execution) + + engine = db_session_with_containers.get_bind() + assert isinstance(engine, Engine) + with sessionmaker(bind=engine, expire_on_commit=False)() as verify_session: + saved = verify_session.get(WorkflowNodeExecutionModel, execution.id) + assert saved is not None + assert saved.tenant_id == account.current_tenant_id + assert saved.app_id == app_id + assert saved.node_id == "node-1" + assert saved.status == WorkflowNodeExecutionStatus.RUNNING + + def test_save_updates_existing_record(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + repo = _make_repo(db_session_with_containers, account, str(uuid4())) + + execution = WorkflowNodeExecution( + id=str(uuid4()), + workflow_id=str(uuid4()), + node_execution_id=str(uuid4()), + workflow_execution_id=str(uuid4()), + index=1, + predecessor_node_id=None, + node_id="node-1", + node_type=BuiltinNodeTypes.START, + title="Test Node", + inputs=None, + process_data=None, + outputs=None, + status=WorkflowNodeExecutionStatus.RUNNING, + error=None, + elapsed_time=0.0, + metadata=None, + created_at=datetime.now(), + finished_at=None, + ) + + repo.save(execution) + + execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + execution.elapsed_time = 2.5 + repo.save(execution) + + engine = db_session_with_containers.get_bind() + assert isinstance(engine, Engine) + with sessionmaker(bind=engine, expire_on_commit=False)() as verify_session: + saved = verify_session.get(WorkflowNodeExecutionModel, execution.id) + assert saved is not None + assert saved.status == WorkflowNodeExecutionStatus.SUCCEEDED + assert saved.elapsed_time == 2.5 + + +class TestGetByWorkflowExecution: + def test_returns_executions_ordered(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + tenant_id = account.current_tenant_id + app_id = str(uuid4()) + workflow_id = str(uuid4()) + workflow_run_id = str(uuid4()) + repo = _make_repo(db_session_with_containers, account, app_id) + + _create_node_execution_model( + db_session_with_containers, + tenant_id=tenant_id, + app_id=app_id, + workflow_id=workflow_id, + workflow_run_id=workflow_run_id, + index=1, + status=WorkflowNodeExecutionStatus.SUCCEEDED, + ) + _create_node_execution_model( + db_session_with_containers, + tenant_id=tenant_id, + app_id=app_id, + workflow_id=workflow_id, + workflow_run_id=workflow_run_id, + index=2, + status=WorkflowNodeExecutionStatus.SUCCEEDED, + ) + db_session_with_containers.commit() + + order_config = OrderConfig(order_by=["index"], order_direction="desc") + result = repo.get_by_workflow_execution( + workflow_execution_id=workflow_run_id, + order_config=order_config, + ) + + assert len(result) == 2 + assert result[0].index == 2 + assert result[1].index == 1 + assert all(isinstance(r, WorkflowNodeExecution) for r in result) + + def test_excludes_paused_executions(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + tenant_id = account.current_tenant_id + app_id = str(uuid4()) + workflow_id = str(uuid4()) + workflow_run_id = str(uuid4()) + repo = _make_repo(db_session_with_containers, account, app_id) + + _create_node_execution_model( + db_session_with_containers, + tenant_id=tenant_id, + app_id=app_id, + workflow_id=workflow_id, + workflow_run_id=workflow_run_id, + index=1, + status=WorkflowNodeExecutionStatus.RUNNING, + ) + _create_node_execution_model( + db_session_with_containers, + tenant_id=tenant_id, + app_id=app_id, + workflow_id=workflow_id, + workflow_run_id=workflow_run_id, + index=2, + status=WorkflowNodeExecutionStatus.PAUSED, + ) + db_session_with_containers.commit() + + result = repo.get_by_workflow_execution(workflow_execution_id=workflow_run_id) + + assert len(result) == 1 + assert result[0].index == 1 + + +class TestToDbModel: + def test_converts_domain_to_db_model(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + app_id = str(uuid4()) + repo = _make_repo(db_session_with_containers, account, app_id) + + domain_model = WorkflowNodeExecution( + id="test-id", + workflow_id="test-workflow-id", + node_execution_id="test-node-execution-id", + workflow_execution_id="test-workflow-run-id", + index=1, + predecessor_node_id="test-predecessor-id", + node_id="test-node-id", + node_type=BuiltinNodeTypes.START, + title="Test Node", + inputs={"input_key": "input_value"}, + process_data={"process_key": "process_value"}, + outputs={"output_key": "output_value"}, + status=WorkflowNodeExecutionStatus.RUNNING, + error=None, + elapsed_time=1.5, + metadata={ + WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 100, + WorkflowNodeExecutionMetadataKey.TOTAL_PRICE: Decimal("0.0"), + }, + created_at=datetime.now(), + finished_at=None, + ) + + db_model = repo._to_db_model(domain_model) + + assert isinstance(db_model, WorkflowNodeExecutionModel) + assert db_model.id == domain_model.id + assert db_model.tenant_id == account.current_tenant_id + assert db_model.app_id == app_id + assert db_model.workflow_id == domain_model.workflow_id + assert db_model.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN + assert db_model.workflow_run_id == domain_model.workflow_execution_id + assert db_model.index == domain_model.index + assert db_model.predecessor_node_id == domain_model.predecessor_node_id + assert db_model.node_execution_id == domain_model.node_execution_id + assert db_model.node_id == domain_model.node_id + assert db_model.node_type == domain_model.node_type + assert db_model.title == domain_model.title + assert db_model.inputs_dict == domain_model.inputs + assert db_model.process_data_dict == domain_model.process_data + assert db_model.outputs_dict == domain_model.outputs + assert db_model.execution_metadata_dict == jsonable_encoder(domain_model.metadata) + assert db_model.status == domain_model.status + assert db_model.error == domain_model.error + assert db_model.elapsed_time == domain_model.elapsed_time + assert db_model.created_at == domain_model.created_at + assert db_model.created_by_role == CreatorUserRole.ACCOUNT + assert db_model.created_by == account.id + assert db_model.finished_at == domain_model.finished_at + + +class TestToDomainModel: + def test_converts_db_to_domain_model(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + app_id = str(uuid4()) + repo = _make_repo(db_session_with_containers, account, app_id) + + inputs_dict = {"input_key": "input_value"} + process_data_dict = {"process_key": "process_value"} + outputs_dict = {"output_key": "output_value"} + metadata_dict = {str(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS): 100} + now = datetime.now() + + db_model = WorkflowNodeExecutionModel() + db_model.id = "test-id" + db_model.tenant_id = account.current_tenant_id + db_model.app_id = app_id + db_model.workflow_id = "test-workflow-id" + db_model.triggered_from = "workflow-run" + db_model.workflow_run_id = "test-workflow-run-id" + db_model.index = 1 + db_model.predecessor_node_id = "test-predecessor-id" + db_model.node_execution_id = "test-node-execution-id" + db_model.node_id = "test-node-id" + db_model.node_type = BuiltinNodeTypes.START + db_model.title = "Test Node" + db_model.inputs = json.dumps(inputs_dict) + db_model.process_data = json.dumps(process_data_dict) + db_model.outputs = json.dumps(outputs_dict) + db_model.status = WorkflowNodeExecutionStatus.RUNNING + db_model.error = None + db_model.elapsed_time = 1.5 + db_model.execution_metadata = json.dumps(metadata_dict) + db_model.created_at = now + db_model.created_by_role = "account" + db_model.created_by = account.id + db_model.finished_at = None + + domain_model = repo._to_domain_model(db_model) + + assert isinstance(domain_model, WorkflowNodeExecution) + assert domain_model.id == "test-id" + assert domain_model.workflow_id == "test-workflow-id" + assert domain_model.workflow_execution_id == "test-workflow-run-id" + assert domain_model.index == 1 + assert domain_model.predecessor_node_id == "test-predecessor-id" + assert domain_model.node_execution_id == "test-node-execution-id" + assert domain_model.node_id == "test-node-id" + assert domain_model.node_type == BuiltinNodeTypes.START + assert domain_model.title == "Test Node" + assert domain_model.inputs == inputs_dict + assert domain_model.process_data == process_data_dict + assert domain_model.outputs == outputs_dict + assert domain_model.status == WorkflowNodeExecutionStatus.RUNNING + assert domain_model.error is None + assert domain_model.elapsed_time == 1.5 + assert domain_model.metadata == {WorkflowNodeExecutionMetadataKey(k): v for k, v in metadata_dict.items()} + assert domain_model.created_at == now + assert domain_model.finished_at is None + + def test_domain_model_without_offload_data(self, db_session_with_containers: Session) -> None: + account = _create_account_with_tenant(db_session_with_containers) + repo = _make_repo(db_session_with_containers, account, str(uuid4())) + + process_data = {"normal": "data"} + db_model = WorkflowNodeExecutionModel() + db_model.id = str(uuid4()) + db_model.tenant_id = account.current_tenant_id + db_model.app_id = str(uuid4()) + db_model.workflow_id = str(uuid4()) + db_model.triggered_from = "workflow-run" + db_model.workflow_run_id = None + db_model.index = 1 + db_model.predecessor_node_id = None + db_model.node_execution_id = str(uuid4()) + db_model.node_id = "test-node-id" + db_model.node_type = "llm" + db_model.title = "Test Node" + db_model.inputs = None + db_model.process_data = json.dumps(process_data) + db_model.outputs = None + db_model.status = "succeeded" + db_model.error = None + db_model.elapsed_time = 1.5 + db_model.execution_metadata = "{}" + db_model.created_at = datetime.now() + db_model.created_by_role = "account" + db_model.created_by = account.id + db_model.finished_at = None + + domain_model = repo._to_domain_model(db_model) + + assert domain_model.process_data == process_data + assert domain_model.process_data_truncated is False + assert domain_model.get_truncated_process_data() is None diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/__init__.py b/api/tests/unit_tests/repositories/workflow_node_execution/__init__.py deleted file mode 100644 index 78815a8d1a..0000000000 --- a/api/tests/unit_tests/repositories/workflow_node_execution/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Unit tests for workflow_node_execution repositories. -""" diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py deleted file mode 100644 index 10850970d8..0000000000 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py +++ /dev/null @@ -1,340 +0,0 @@ -""" -Unit tests for the SQLAlchemy implementation of WorkflowNodeExecutionRepository. -""" - -import json -import uuid -from datetime import datetime -from decimal import Decimal -from unittest.mock import MagicMock, PropertyMock - -import pytest -from graphon.entities import ( - WorkflowNodeExecution, -) -from graphon.enums import ( - BuiltinNodeTypes, - WorkflowNodeExecutionMetadataKey, - WorkflowNodeExecutionStatus, -) -from graphon.model_runtime.utils.encoders import jsonable_encoder -from pytest_mock import MockerFixture -from sqlalchemy.orm import Session, sessionmaker - -from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository -from core.repositories.factory import OrderConfig -from models.account import Account, Tenant -from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom - - -def configure_mock_execution(mock_execution): - """Configure a mock execution with proper JSON serializable values.""" - # Configure inputs, outputs, process_data, and execution_metadata to return JSON serializable values - type(mock_execution).inputs = PropertyMock(return_value='{"key": "value"}') - type(mock_execution).outputs = PropertyMock(return_value='{"result": "success"}') - type(mock_execution).process_data = PropertyMock(return_value='{"process": "data"}') - type(mock_execution).execution_metadata = PropertyMock(return_value='{"metadata": "info"}') - - # Configure status and triggered_from to be valid enum values - mock_execution.status = "running" - mock_execution.triggered_from = "workflow-run" - - return mock_execution - - -@pytest.fixture -def session(): - """Create a mock SQLAlchemy session.""" - session = MagicMock(spec=Session) - # Configure the session to be used as a context manager - session.__enter__ = MagicMock(return_value=session) - session.__exit__ = MagicMock(return_value=None) - - # Configure the session factory to return the session - session_factory = MagicMock(spec=sessionmaker) - session_factory.return_value = session - return session, session_factory - - -@pytest.fixture -def mock_user(): - """Create a user instance for testing.""" - user = Account(name="test", email="test@example.com") - user.id = "test-user-id" - - tenant = Tenant(name="Test Workspace") - tenant.id = "test-tenant" - user._current_tenant = MagicMock() - user._current_tenant.id = "test-tenant" - - return user - - -@pytest.fixture -def repository(session, mock_user): - """Create a repository instance with test data.""" - _, session_factory = session - app_id = "test-app" - return SQLAlchemyWorkflowNodeExecutionRepository( - session_factory=session_factory, - user=mock_user, - app_id=app_id, - triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, - ) - - -def test_save(repository, session): - """Test save method.""" - session_obj, _ = session - # Create a mock execution - execution = MagicMock(spec=WorkflowNodeExecution) - execution.id = "test-id" - execution.node_execution_id = "test-node-execution-id" - execution.tenant_id = None - execution.app_id = None - execution.inputs = None - execution.process_data = None - execution.outputs = None - execution.metadata = None - execution.workflow_id = str(uuid.uuid4()) - - # Mock the to_db_model method to return the execution itself - # This simulates the behavior of setting tenant_id and app_id - db_model = MagicMock(spec=WorkflowNodeExecutionModel) - db_model.id = "test-id" - db_model.node_execution_id = "test-node-execution-id" - repository._to_db_model = MagicMock(return_value=db_model) - - # Mock session.get to return None (no existing record) - session_obj.get.return_value = None - - # Call save method - repository.save(execution) - - # Assert to_db_model was called with the execution - repository._to_db_model.assert_called_once_with(execution) - - # Assert session.get was called to check for existing record - session_obj.get.assert_called_once_with(WorkflowNodeExecutionModel, db_model.id) - - # Assert session.add was called for new record - session_obj.add.assert_called_once_with(db_model) - - # Assert session.commit was called - session_obj.commit.assert_called_once() - - -def test_save_with_existing_tenant_id(repository, session): - """Test save method with existing tenant_id.""" - session_obj, _ = session - # Create a mock execution with existing tenant_id - execution = MagicMock(spec=WorkflowNodeExecutionModel) - execution.id = "existing-id" - execution.node_execution_id = "existing-node-execution-id" - execution.tenant_id = "existing-tenant" - execution.app_id = None - execution.inputs = None - execution.process_data = None - execution.outputs = None - execution.metadata = None - - # Create a modified execution that will be returned by _to_db_model - modified_execution = MagicMock(spec=WorkflowNodeExecutionModel) - modified_execution.id = "existing-id" - modified_execution.node_execution_id = "existing-node-execution-id" - modified_execution.tenant_id = "existing-tenant" # Tenant ID should not change - modified_execution.app_id = repository._app_id # App ID should be set - # Create a dictionary to simulate __dict__ for updating attributes - modified_execution.__dict__ = { - "id": "existing-id", - "node_execution_id": "existing-node-execution-id", - "tenant_id": "existing-tenant", - "app_id": repository._app_id, - } - - # Mock the to_db_model method to return the modified execution - repository._to_db_model = MagicMock(return_value=modified_execution) - - # Mock session.get to return an existing record - existing_model = MagicMock(spec=WorkflowNodeExecutionModel) - session_obj.get.return_value = existing_model - - # Call save method - repository.save(execution) - - # Assert to_db_model was called with the execution - repository._to_db_model.assert_called_once_with(execution) - - # Assert session.get was called to check for existing record - session_obj.get.assert_called_once_with(WorkflowNodeExecutionModel, modified_execution.id) - - # Assert session.add was NOT called since we're updating existing - session_obj.add.assert_not_called() - - # Assert session.commit was called - session_obj.commit.assert_called_once() - - -def test_get_by_workflow_execution(repository, session, mocker: MockerFixture): - """Test get_by_workflow_execution method.""" - session_obj, _ = session - # Set up mock - mock_select = mocker.patch("core.repositories.sqlalchemy_workflow_node_execution_repository.select") - mock_asc = mocker.patch("core.repositories.sqlalchemy_workflow_node_execution_repository.asc") - mock_desc = mocker.patch("core.repositories.sqlalchemy_workflow_node_execution_repository.desc") - - mock_WorkflowNodeExecutionModel = mocker.patch( - "core.repositories.sqlalchemy_workflow_node_execution_repository.WorkflowNodeExecutionModel" - ) - mock_stmt = mocker.MagicMock() - mock_select.return_value = mock_stmt - mock_stmt.where.return_value = mock_stmt - mock_stmt.order_by.return_value = mock_stmt - mock_asc.return_value = mock_stmt - mock_desc.return_value = mock_stmt - mock_WorkflowNodeExecutionModel.preload_offload_data_and_files.return_value = mock_stmt - - # Create a properly configured mock execution - mock_execution = mocker.MagicMock(spec=WorkflowNodeExecutionModel) - configure_mock_execution(mock_execution) - session_obj.scalars.return_value.all.return_value = [mock_execution] - - # Create a mock domain model to be returned by _to_domain_model - mock_domain_model = mocker.MagicMock() - # Mock the _to_domain_model method to return our mock domain model - repository._to_domain_model = mocker.MagicMock(return_value=mock_domain_model) - - # Call method - order_config = OrderConfig(order_by=["index"], order_direction="desc") - result = repository.get_by_workflow_execution( - workflow_execution_id="test-workflow-run-id", - order_config=order_config, - ) - - # Assert select was called with correct parameters - mock_select.assert_called_once() - session_obj.scalars.assert_called_once_with(mock_stmt) - mock_WorkflowNodeExecutionModel.preload_offload_data_and_files.assert_called_once_with(mock_stmt) - # Assert _to_domain_model was called with the mock execution - repository._to_domain_model.assert_called_once_with(mock_execution) - # Assert the result contains our mock domain model - assert len(result) == 1 - assert result[0] is mock_domain_model - - -def test_to_db_model(repository): - """Test to_db_model method.""" - # Create a domain model - domain_model = WorkflowNodeExecution( - id="test-id", - workflow_id="test-workflow-id", - node_execution_id="test-node-execution-id", - workflow_execution_id="test-workflow-run-id", - index=1, - predecessor_node_id="test-predecessor-id", - node_id="test-node-id", - node_type=BuiltinNodeTypes.START, - title="Test Node", - inputs={"input_key": "input_value"}, - process_data={"process_key": "process_value"}, - outputs={"output_key": "output_value"}, - status=WorkflowNodeExecutionStatus.RUNNING, - error=None, - elapsed_time=1.5, - metadata={ - WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 100, - WorkflowNodeExecutionMetadataKey.TOTAL_PRICE: Decimal("0.0"), - }, - created_at=datetime.now(), - finished_at=None, - ) - - # Convert to DB model - db_model = repository._to_db_model(domain_model) - - # Assert DB model has correct values - assert isinstance(db_model, WorkflowNodeExecutionModel) - assert db_model.id == domain_model.id - assert db_model.tenant_id == repository._tenant_id - assert db_model.app_id == repository._app_id - assert db_model.workflow_id == domain_model.workflow_id - assert db_model.triggered_from == repository._triggered_from - assert db_model.workflow_run_id == domain_model.workflow_execution_id - assert db_model.index == domain_model.index - assert db_model.predecessor_node_id == domain_model.predecessor_node_id - assert db_model.node_execution_id == domain_model.node_execution_id - assert db_model.node_id == domain_model.node_id - assert db_model.node_type == domain_model.node_type - assert db_model.title == domain_model.title - - assert db_model.inputs_dict == domain_model.inputs - assert db_model.process_data_dict == domain_model.process_data - assert db_model.outputs_dict == domain_model.outputs - assert db_model.execution_metadata_dict == jsonable_encoder(domain_model.metadata) - - assert db_model.status == domain_model.status - assert db_model.error == domain_model.error - assert db_model.elapsed_time == domain_model.elapsed_time - assert db_model.created_at == domain_model.created_at - assert db_model.created_by_role == repository._creator_user_role - assert db_model.created_by == repository._creator_user_id - assert db_model.finished_at == domain_model.finished_at - - -def test_to_domain_model(repository): - """Test _to_domain_model method.""" - # Create input dictionaries - inputs_dict = {"input_key": "input_value"} - process_data_dict = {"process_key": "process_value"} - outputs_dict = {"output_key": "output_value"} - metadata_dict = {str(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS): 100} - - # Create a DB model using our custom subclass - db_model = WorkflowNodeExecutionModel() - db_model.id = "test-id" - db_model.tenant_id = "test-tenant-id" - db_model.app_id = "test-app-id" - db_model.workflow_id = "test-workflow-id" - db_model.triggered_from = "workflow-run" - db_model.workflow_run_id = "test-workflow-run-id" - db_model.index = 1 - db_model.predecessor_node_id = "test-predecessor-id" - db_model.node_execution_id = "test-node-execution-id" - db_model.node_id = "test-node-id" - db_model.node_type = BuiltinNodeTypes.START - db_model.title = "Test Node" - db_model.inputs = json.dumps(inputs_dict) - db_model.process_data = json.dumps(process_data_dict) - db_model.outputs = json.dumps(outputs_dict) - db_model.status = WorkflowNodeExecutionStatus.RUNNING - db_model.error = None - db_model.elapsed_time = 1.5 - db_model.execution_metadata = json.dumps(metadata_dict) - db_model.created_at = datetime.now() - db_model.created_by_role = "account" - db_model.created_by = "test-user-id" - db_model.finished_at = None - - # Convert to domain model - domain_model = repository._to_domain_model(db_model) - - # Assert domain model has correct values - assert isinstance(domain_model, WorkflowNodeExecution) - assert domain_model.id == db_model.id - assert domain_model.workflow_id == db_model.workflow_id - assert domain_model.workflow_execution_id == db_model.workflow_run_id - assert domain_model.index == db_model.index - assert domain_model.predecessor_node_id == db_model.predecessor_node_id - assert domain_model.node_execution_id == db_model.node_execution_id - assert domain_model.node_id == db_model.node_id - assert domain_model.node_type == db_model.node_type - assert domain_model.title == db_model.title - assert domain_model.inputs == inputs_dict - assert domain_model.process_data == process_data_dict - assert domain_model.outputs == outputs_dict - assert domain_model.status == WorkflowNodeExecutionStatus(db_model.status) - assert domain_model.error == db_model.error - assert domain_model.elapsed_time == db_model.elapsed_time - assert domain_model.metadata == metadata_dict - assert domain_model.created_at == db_model.created_at - assert domain_model.finished_at == db_model.finished_at diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py deleted file mode 100644 index 2322be9e80..0000000000 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Unit tests for SQLAlchemyWorkflowNodeExecutionRepository, focusing on process_data truncation functionality. -""" - -from datetime import datetime -from typing import Any -from unittest.mock import MagicMock, Mock - -from graphon.entities import WorkflowNodeExecution -from graphon.enums import BuiltinNodeTypes -from sqlalchemy.orm import sessionmaker - -from core.repositories.sqlalchemy_workflow_node_execution_repository import ( - SQLAlchemyWorkflowNodeExecutionRepository, -) -from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom - - -class TestSQLAlchemyWorkflowNodeExecutionRepositoryProcessData: - """Test process_data truncation functionality in SQLAlchemyWorkflowNodeExecutionRepository.""" - - def create_mock_account(self) -> Account: - """Create a mock Account for testing.""" - account = Mock(spec=Account) - account.id = "test-user-id" - account.tenant_id = "test-tenant-id" - return account - - def create_mock_session_factory(self) -> sessionmaker: - """Create a mock session factory for testing.""" - mock_session = MagicMock() - mock_session_factory = MagicMock(spec=sessionmaker) - mock_session_factory.return_value.__enter__.return_value = mock_session - mock_session_factory.return_value.__exit__.return_value = None - return mock_session_factory - - def create_repository(self, mock_file_service=None) -> SQLAlchemyWorkflowNodeExecutionRepository: - """Create a repository instance for testing.""" - mock_account = self.create_mock_account() - mock_session_factory = self.create_mock_session_factory() - - repository = SQLAlchemyWorkflowNodeExecutionRepository( - session_factory=mock_session_factory, - user=mock_account, - app_id="test-app-id", - triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, - ) - - if mock_file_service: - repository._file_service = mock_file_service - - return repository - - def create_workflow_node_execution( - self, - process_data: dict[str, Any] | None = None, - execution_id: str = "test-execution-id", - ) -> WorkflowNodeExecution: - """Create a WorkflowNodeExecution instance for testing.""" - return WorkflowNodeExecution( - id=execution_id, - workflow_id="test-workflow-id", - index=1, - node_id="test-node-id", - node_type=BuiltinNodeTypes.LLM, - title="Test Node", - process_data=process_data, - created_at=datetime.now(), - ) - - def test_to_domain_model_without_offload_data(self): - """Test _to_domain_model without offload data.""" - repository = self.create_repository() - - # Create mock database model without offload data - db_model = Mock(spec=WorkflowNodeExecutionModel) - db_model.id = "test-execution-id" - db_model.node_execution_id = "test-node-execution-id" - db_model.workflow_id = "test-workflow-id" - db_model.workflow_run_id = None - db_model.index = 1 - db_model.predecessor_node_id = None - db_model.node_id = "test-node-id" - db_model.node_type = "llm" - db_model.title = "Test Node" - db_model.status = "succeeded" - db_model.error = None - db_model.elapsed_time = 1.5 - db_model.created_at = datetime.now() - db_model.finished_at = None - - process_data = {"normal": "data"} - db_model.process_data_dict = process_data - db_model.inputs_dict = None - db_model.outputs_dict = None - db_model.execution_metadata_dict = {} - db_model.offload_data = None - - domain_model = repository._to_domain_model(db_model) - - # Domain model should have the data from database - assert domain_model.process_data == process_data - - # Should not be truncated - assert domain_model.process_data_truncated is False - assert domain_model.get_truncated_process_data() is None From b3aebb71ff9eb86855c936bf552d61fedcb8d2e8 Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Fri, 10 Apr 2026 19:36:50 +0200 Subject: [PATCH 002/147] refactor(api): type Document.to_dict with DocumentDict TypedDict (#34924) Co-authored-by: bittoby --- api/models/dataset.py | 57 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/api/models/dataset.py b/api/models/dataset.py index 97604848af..f6aa81f35d 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -108,6 +108,56 @@ class ExternalKnowledgeApiDict(TypedDict): created_at: str +class DocumentDict(TypedDict): + id: str + tenant_id: str + dataset_id: str + position: int + data_source_type: str + data_source_info: str | None + dataset_process_rule_id: str | None + batch: str + name: str + created_from: str + created_by: str + created_api_request_id: str | None + created_at: datetime + processing_started_at: datetime | None + file_id: str | None + word_count: int | None + parsing_completed_at: datetime | None + cleaning_completed_at: datetime | None + splitting_completed_at: datetime | None + tokens: int | None + indexing_latency: float | None + completed_at: datetime | None + is_paused: bool | None + paused_by: str | None + paused_at: datetime | None + error: str | None + stopped_at: datetime | None + indexing_status: str + enabled: bool + disabled_at: datetime | None + disabled_by: str | None + archived: bool + archived_reason: str | None + archived_by: str | None + archived_at: datetime | None + updated_at: datetime + doc_type: str | None + doc_metadata: Any + doc_form: IndexStructureType + doc_language: str | None + display_status: str | None + data_source_info_dict: dict[str, Any] + average_segment_length: int + dataset_process_rule: ProcessRuleDict | None + dataset: None + segment_count: int | None + hit_count: int | None + + class DatasetPermissionEnum(enum.StrEnum): ONLY_ME = "only_me" ALL_TEAM = "all_team_members" @@ -675,8 +725,8 @@ class Document(Base): ) return built_in_fields - def to_dict(self) -> dict[str, Any]: - return { + def to_dict(self) -> DocumentDict: + result: DocumentDict = { "id": self.id, "tenant_id": self.tenant_id, "dataset_id": self.dataset_id, @@ -721,10 +771,11 @@ class Document(Base): "data_source_info_dict": self.data_source_info_dict, "average_segment_length": self.average_segment_length, "dataset_process_rule": self.dataset_process_rule.to_dict() if self.dataset_process_rule else None, - "dataset": None, # Dataset class doesn't have a to_dict method + "dataset": None, "segment_count": self.segment_count, "hit_count": self.hit_count, } + return result @classmethod def from_dict(cls, data: dict[str, Any]): From f962e61315e83b677e729a8674d28b8868a638eb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 09:36:28 +0900 Subject: [PATCH 003/147] chore(deps): bump pypdf from 6.9.2 to 6.10.0 in /api (#34946) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- api/uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/uv.lock b/api/uv.lock index b67646cb71..242015e837 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -4763,11 +4763,11 @@ wheels = [ [[package]] name = "pypdf" -version = "6.9.2" +version = "6.10.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/31/83/691bdb309306232362503083cb15777491045dd54f45393a317dc7d8082f/pypdf-6.9.2.tar.gz", hash = "sha256:7f850faf2b0d4ab936582c05da32c52214c2b089d61a316627b5bfb5b0dab46c", size = 5311837, upload-time = "2026-03-23T14:53:27.983Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b8/9f/ca96abf18683ca12602065e4ed2bec9050b672c87d317f1079abc7b6d993/pypdf-6.10.0.tar.gz", hash = "sha256:4c5a48ba258c37024ec2505f7e8fd858525f5502784a2e1c8d415604af29f6ef", size = 5314833, upload-time = "2026-04-10T09:34:57.102Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/7e/c85f41243086a8fe5d1baeba527cb26a1918158a565932b41e0f7c0b32e9/pypdf-6.9.2-py3-none-any.whl", hash = "sha256:662cf29bcb419a36a1365232449624ab40b7c2d0cfc28e54f42eeecd1fd7e844", size = 333744, upload-time = "2026-03-23T14:53:26.573Z" }, + { url = "https://files.pythonhosted.org/packages/55/f2/7ebe366f633f30a6ad105f650f44f24f98cb1335c4157d21ae47138b3482/pypdf-6.10.0-py3-none-any.whl", hash = "sha256:90005e959e1596c6e6c84c8b0ad383285b3e17011751cedd17f2ce8fcdfc86de", size = 334459, upload-time = "2026-04-10T09:34:54.966Z" }, ] [[package]] From 992ac38d0d332497344bd63ce4c90f7888b577a9 Mon Sep 17 00:00:00 2001 From: YBoy <231405196+YB0y@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:37:10 +0200 Subject: [PATCH 004/147] refactor(api): type ToolInvokeMeta.to_dict with TypedDict (#34942) --- api/core/tools/entities/tool_entities.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py index 31e879add2..b4253652f9 100644 --- a/api/core/tools/entities/tool_entities.py +++ b/api/core/tools/entities/tool_entities.py @@ -450,6 +450,12 @@ class WorkflowToolParameterConfiguration(BaseModel): form: ToolParameter.ToolParameterForm = Field(..., description="The form of the parameter") +class ToolInvokeMetaDict(TypedDict): + time_cost: float + error: str | None + tool_config: dict[str, Any] | None + + class ToolInvokeMeta(BaseModel): """ Tool invoke meta @@ -473,12 +479,13 @@ class ToolInvokeMeta(BaseModel): """ return cls(time_cost=0.0, error=error, tool_config={}) - def to_dict(self): - return { + def to_dict(self) -> ToolInvokeMetaDict: + result: ToolInvokeMetaDict = { "time_cost": self.time_cost, "error": self.error, "tool_config": self.tool_config, } + return result class ToolLabel(BaseModel): From f2d6275da405b27724f423c4266bc9bd5b3f0c5c Mon Sep 17 00:00:00 2001 From: YBoy <231405196+YB0y@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:38:16 +0200 Subject: [PATCH 005/147] refactor(api): type get_prompt_template with TypedDict (#34943) --- api/core/prompt/simple_prompt_transform.py | 31 ++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/api/core/prompt/simple_prompt_transform.py b/api/core/prompt/simple_prompt_transform.py index c706353ffe..36fca60db3 100644 --- a/api/core/prompt/simple_prompt_transform.py +++ b/api/core/prompt/simple_prompt_transform.py @@ -2,7 +2,7 @@ import json import os from collections.abc import Mapping, Sequence from enum import StrEnum, auto -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, TypedDict, cast from graphon.file import file_manager from graphon.model_runtime.entities.message_entities import ( @@ -34,6 +34,13 @@ class ModelMode(StrEnum): prompt_file_contents: dict[str, Any] = {} +class PromptTemplateConfigDict(TypedDict): + prompt_template: PromptTemplateParser + custom_variable_keys: list[str] + special_variable_keys: list[str] + prompt_rules: dict[str, Any] + + class SimplePromptTransform(PromptTransform): """ Simple Prompt Transform for Chatbot App Basic Mode. @@ -105,18 +112,13 @@ class SimplePromptTransform(PromptTransform): with_memory_prompt=histories is not None, ) - custom_variable_keys_obj = prompt_template_config["custom_variable_keys"] - special_variable_keys_obj = prompt_template_config["special_variable_keys"] + custom_variable_keys = prompt_template_config["custom_variable_keys"] + if not isinstance(custom_variable_keys, list): + raise TypeError(f"Expected list for custom_variable_keys, got {type(custom_variable_keys)}") - # Type check for custom_variable_keys - if not isinstance(custom_variable_keys_obj, list): - raise TypeError(f"Expected list for custom_variable_keys, got {type(custom_variable_keys_obj)}") - custom_variable_keys = cast(list[str], custom_variable_keys_obj) - - # Type check for special_variable_keys - if not isinstance(special_variable_keys_obj, list): - raise TypeError(f"Expected list for special_variable_keys, got {type(special_variable_keys_obj)}") - special_variable_keys = cast(list[str], special_variable_keys_obj) + special_variable_keys = prompt_template_config["special_variable_keys"] + if not isinstance(special_variable_keys, list): + raise TypeError(f"Expected list for special_variable_keys, got {type(special_variable_keys)}") variables = {k: inputs[k] for k in custom_variable_keys if k in inputs} @@ -150,7 +152,7 @@ class SimplePromptTransform(PromptTransform): has_context: bool, query_in_prompt: bool, with_memory_prompt: bool = False, - ) -> dict[str, object]: + ) -> PromptTemplateConfigDict: prompt_rules = self._get_prompt_rule(app_mode=app_mode, provider=provider, model=model) custom_variable_keys: list[str] = [] @@ -173,12 +175,13 @@ class SimplePromptTransform(PromptTransform): prompt += prompt_rules.get("query_prompt", "{{#query#}}") special_variable_keys.append("#query#") - return { + result: PromptTemplateConfigDict = { "prompt_template": PromptTemplateParser(template=prompt), "custom_variable_keys": custom_variable_keys, "special_variable_keys": special_variable_keys, "prompt_rules": prompt_rules, } + return result def _get_chat_model_prompt_messages( self, From e0d69204cdc46be01cff8cd91025e2e8fdb4387f Mon Sep 17 00:00:00 2001 From: YBoy <231405196+YB0y@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:39:06 +0200 Subject: [PATCH 006/147] refactor(api): type DatasourceInvokeMeta.to_dict with TypedDict (#34940) --- api/core/datasource/entities/datasource_entities.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/api/core/datasource/entities/datasource_entities.py b/api/core/datasource/entities/datasource_entities.py index f20bab53f0..01f87b67f8 100644 --- a/api/core/datasource/entities/datasource_entities.py +++ b/api/core/datasource/entities/datasource_entities.py @@ -2,7 +2,7 @@ from __future__ import annotations import enum from enum import StrEnum -from typing import Any +from typing import Any, TypedDict from pydantic import BaseModel, Field, ValidationInfo, field_validator from yarl import URL @@ -179,6 +179,12 @@ class DatasourceProviderEntityWithPlugin(DatasourceProviderEntity): datasources: list[DatasourceEntity] = Field(default_factory=list) +class DatasourceInvokeMetaDict(TypedDict): + time_cost: float + error: str | None + tool_config: dict[str, Any] | None + + class DatasourceInvokeMeta(BaseModel): """ Datasource invoke meta @@ -202,12 +208,13 @@ class DatasourceInvokeMeta(BaseModel): """ return cls(time_cost=0.0, error=error, tool_config={}) - def to_dict(self) -> dict: - return { + def to_dict(self) -> DatasourceInvokeMetaDict: + result: DatasourceInvokeMetaDict = { "time_cost": self.time_cost, "error": self.error, "tool_config": self.tool_config, } + return result class DatasourceLabel(BaseModel): From 4be479fa067433f80537e68fdfb74cf10be6aa00 Mon Sep 17 00:00:00 2001 From: YBoy <231405196+YB0y@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:39:37 +0200 Subject: [PATCH 007/147] refactor(api): type SQLALCHEMY_ENGINE_OPTIONS with TypedDict (#34941) --- api/configs/middleware/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 15ac8bf0bf..817284d26f 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -1,5 +1,5 @@ import os -from typing import Any, Literal +from typing import Any, Literal, TypedDict from urllib.parse import parse_qsl, quote_plus from pydantic import Field, NonNegativeFloat, NonNegativeInt, PositiveFloat, PositiveInt, computed_field @@ -107,6 +107,17 @@ class KeywordStoreConfig(BaseSettings): ) +class SQLAlchemyEngineOptionsDict(TypedDict): + pool_size: int + max_overflow: int + pool_recycle: int + pool_pre_ping: bool + connect_args: dict[str, str] + pool_use_lifo: bool + pool_reset_on_return: None + pool_timeout: int + + class DatabaseConfig(BaseSettings): # Database type selector DB_TYPE: Literal["postgresql", "mysql", "oceanbase", "seekdb"] = Field( @@ -209,11 +220,11 @@ class DatabaseConfig(BaseSettings): @computed_field # type: ignore[prop-decorator] @property - def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]: + def SQLALCHEMY_ENGINE_OPTIONS(self) -> SQLAlchemyEngineOptionsDict: # Parse DB_EXTRAS for 'options' db_extras_dict = dict(parse_qsl(self.DB_EXTRAS)) options = db_extras_dict.get("options", "") - connect_args = {} + connect_args: dict[str, str] = {} # Use the dynamic SQLALCHEMY_DATABASE_URI_SCHEME property if self.SQLALCHEMY_DATABASE_URI_SCHEME.startswith("postgresql"): timezone_opt = "-c timezone=UTC" @@ -223,7 +234,7 @@ class DatabaseConfig(BaseSettings): merged_options = timezone_opt connect_args = {"options": merged_options} - return { + result: SQLAlchemyEngineOptionsDict = { "pool_size": self.SQLALCHEMY_POOL_SIZE, "max_overflow": self.SQLALCHEMY_MAX_OVERFLOW, "pool_recycle": self.SQLALCHEMY_POOL_RECYCLE, @@ -233,6 +244,7 @@ class DatabaseConfig(BaseSettings): "pool_reset_on_return": None, "pool_timeout": self.SQLALCHEMY_POOL_TIMEOUT, } + return result class CeleryConfig(DatabaseConfig): From 5ec387b6448a1809d30e69c8dd24b3351999304e Mon Sep 17 00:00:00 2001 From: NVIDIAN Date: Fri, 10 Apr 2026 17:53:13 -0700 Subject: [PATCH 008/147] refactor: replace inline api.model with Pydantic BaseModel in model_config (#34930) Co-authored-by: ai-hpc --- api/controllers/console/app/model_config.py | 44 +++++++++++---------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/api/controllers/console/app/model_config.py b/api/controllers/console/app/model_config.py index 8bb5aa2c1b..1869cbf5f6 100644 --- a/api/controllers/console/app/model_config.py +++ b/api/controllers/console/app/model_config.py @@ -1,9 +1,11 @@ import json -from typing import cast +from typing import Any, cast from flask import request -from flask_restx import Resource, fields +from flask_restx import Resource +from pydantic import BaseModel, Field +from controllers.common.schema import register_schema_models from controllers.console import console_ns from controllers.console.app.wraps import get_app_model from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required @@ -18,30 +20,30 @@ from models.model import AppMode, AppModelConfig from services.app_model_config_service import AppModelConfigService +class ModelConfigRequest(BaseModel): + provider: str | None = Field(default=None, description="Model provider") + model: str | None = Field(default=None, description="Model name") + configs: dict[str, Any] | None = Field(default=None, description="Model configuration parameters") + opening_statement: str | None = Field(default=None, description="Opening statement") + suggested_questions: list[str] | None = Field(default=None, description="Suggested questions") + more_like_this: dict[str, Any] | None = Field(default=None, description="More like this configuration") + speech_to_text: dict[str, Any] | None = Field(default=None, description="Speech to text configuration") + text_to_speech: dict[str, Any] | None = Field(default=None, description="Text to speech configuration") + retrieval_model: dict[str, Any] | None = Field(default=None, description="Retrieval model configuration") + tools: list[dict[str, Any]] | None = Field(default=None, description="Available tools") + dataset_configs: dict[str, Any] | None = Field(default=None, description="Dataset configurations") + agent_mode: dict[str, Any] | None = Field(default=None, description="Agent mode configuration") + + +register_schema_models(console_ns, ModelConfigRequest) + + @console_ns.route("/apps//model-config") class ModelConfigResource(Resource): @console_ns.doc("update_app_model_config") @console_ns.doc(description="Update application model configuration") @console_ns.doc(params={"app_id": "Application ID"}) - @console_ns.expect( - console_ns.model( - "ModelConfigRequest", - { - "provider": fields.String(description="Model provider"), - "model": fields.String(description="Model name"), - "configs": fields.Raw(description="Model configuration parameters"), - "opening_statement": fields.String(description="Opening statement"), - "suggested_questions": fields.List(fields.String(), description="Suggested questions"), - "more_like_this": fields.Raw(description="More like this configuration"), - "speech_to_text": fields.Raw(description="Speech to text configuration"), - "text_to_speech": fields.Raw(description="Text to speech configuration"), - "retrieval_model": fields.Raw(description="Retrieval model configuration"), - "tools": fields.List(fields.Raw(), description="Available tools"), - "dataset_configs": fields.Raw(description="Dataset configurations"), - "agent_mode": fields.Raw(description="Agent mode configuration"), - }, - ) - ) + @console_ns.expect(console_ns.models[ModelConfigRequest.__name__]) @console_ns.response(200, "Model configuration updated successfully") @console_ns.response(400, "Invalid configuration") @console_ns.response(404, "App not found") From 7192af41e45fb59b0f69f46991104d7c02d3c715 Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:54:58 +0200 Subject: [PATCH 009/147] test: remove dataset service update/delete mock tests superseded by testcontainers (#34937) --- .../services/dataset_service_update_delete.py | 818 ------------------ 1 file changed, 818 deletions(-) delete mode 100644 api/tests/unit_tests/services/dataset_service_update_delete.py diff --git a/api/tests/unit_tests/services/dataset_service_update_delete.py b/api/tests/unit_tests/services/dataset_service_update_delete.py deleted file mode 100644 index 62c39f96d3..0000000000 --- a/api/tests/unit_tests/services/dataset_service_update_delete.py +++ /dev/null @@ -1,818 +0,0 @@ -""" -Comprehensive unit tests for DatasetService update and delete operations. - -This module contains extensive unit tests for the DatasetService class, -specifically focusing on update and delete operations for datasets. - -The DatasetService provides methods for: -- Updating dataset configuration and settings (update_dataset) -- Deleting datasets with proper cleanup (delete_dataset) -- Updating RAG pipeline dataset settings (update_rag_pipeline_dataset_settings) -- Checking if dataset is in use (dataset_use_check) -- Updating dataset API access status (update_dataset_api_status) - -These operations are critical for dataset lifecycle management and require -careful handling of permissions, dependencies, and data integrity. - -This test suite ensures: -- Correct update of dataset properties -- Proper permission validation before updates/deletes -- Cascade deletion handling -- Event signaling for cleanup operations -- RAG pipeline dataset configuration updates -- API status management -- Use check validation - -================================================================================ -ARCHITECTURE OVERVIEW -================================================================================ - -The DatasetService update and delete operations are part of the dataset -lifecycle management system. These operations interact with multiple -components: - -1. Permission System: All update/delete operations require proper - permission validation to ensure users can only modify datasets they - have access to. - -2. Event System: Dataset deletion triggers the dataset_was_deleted event, - which notifies other components to clean up related data (documents, - segments, vector indices, etc.). - -3. Dependency Checking: Before deletion, the system checks if the dataset - is in use by any applications (via AppDatasetJoin). - -4. RAG Pipeline Integration: RAG pipeline datasets have special update - logic that handles chunk structure, indexing techniques, and embedding - model configuration. - -5. API Status Management: Datasets can have their API access enabled or - disabled, which affects whether they can be accessed via the API. - -================================================================================ -TESTING STRATEGY -================================================================================ - -This test suite follows a comprehensive testing strategy that covers: - -1. Update Operations: - - Internal dataset updates - - External dataset updates - - RAG pipeline dataset updates - - Permission validation - - Name duplicate checking - - Configuration validation - -2. Delete Operations: - - Successful deletion - - Permission validation - - Event signaling - - Database cleanup - - Not found handling - -3. Use Check Operations: - - Dataset in use detection - - Dataset not in use detection - - AppDatasetJoin query validation - -4. API Status Operations: - - Enable API access - - Disable API access - - Permission validation - - Current user validation - -5. RAG Pipeline Operations: - - Unpublished dataset updates - - Published dataset updates - - Chunk structure validation - - Indexing technique changes - - Embedding model configuration - -================================================================================ -""" - -import datetime -from unittest.mock import Mock, create_autospec, patch - -import pytest -from sqlalchemy.orm import Session - -from core.rag.index_processor.constant.index_type import IndexTechniqueType -from models import Account, TenantAccountRole -from models.dataset import ( - AppDatasetJoin, - Dataset, - DatasetPermissionEnum, -) -from services.dataset_service import DatasetService -from services.errors.account import NoPermissionError - -# ============================================================================ -# Test Data Factory -# ============================================================================ -# The Test Data Factory pattern is used here to centralize the creation of -# test objects and mock instances. This approach provides several benefits: -# -# 1. Consistency: All test objects are created using the same factory methods, -# ensuring consistent structure across all tests. -# -# 2. Maintainability: If the structure of models or services changes, we only -# need to update the factory methods rather than every individual test. -# -# 3. Reusability: Factory methods can be reused across multiple test classes, -# reducing code duplication. -# -# 4. Readability: Tests become more readable when they use descriptive factory -# method calls instead of complex object construction logic. -# -# ============================================================================ - - -class DatasetUpdateDeleteTestDataFactory: - """ - Factory class for creating test data and mock objects for dataset update/delete tests. - - This factory provides static methods to create mock objects for: - - Dataset instances with various configurations - - User/Account instances with different roles - - Knowledge configuration objects - - Database session mocks - - Event signal mocks - - The factory methods help maintain consistency across tests and reduce - code duplication when setting up test scenarios. - """ - - @staticmethod - def create_dataset_mock( - dataset_id: str = "dataset-123", - provider: str = "vendor", - name: str = "Test Dataset", - description: str = "Test description", - tenant_id: str = "tenant-123", - indexing_technique: str = IndexTechniqueType.HIGH_QUALITY, - embedding_model_provider: str | None = "openai", - embedding_model: str | None = "text-embedding-ada-002", - collection_binding_id: str | None = "binding-123", - enable_api: bool = True, - permission: DatasetPermissionEnum = DatasetPermissionEnum.ONLY_ME, - created_by: str = "user-123", - chunk_structure: str | None = None, - runtime_mode: str = "general", - **kwargs, - ) -> Mock: - """ - Create a mock Dataset with specified attributes. - - Args: - dataset_id: Unique identifier for the dataset - provider: Dataset provider (vendor, external) - name: Dataset name - description: Dataset description - tenant_id: Tenant identifier - indexing_technique: Indexing technique (high_quality, economy) - embedding_model_provider: Embedding model provider - embedding_model: Embedding model name - collection_binding_id: Collection binding ID - enable_api: Whether API access is enabled - permission: Dataset permission level - created_by: ID of user who created the dataset - chunk_structure: Chunk structure for RAG pipeline datasets - runtime_mode: Runtime mode (general, rag_pipeline) - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a Dataset instance - """ - dataset = Mock(spec=Dataset) - dataset.id = dataset_id - dataset.provider = provider - dataset.name = name - dataset.description = description - dataset.tenant_id = tenant_id - dataset.indexing_technique = indexing_technique - dataset.embedding_model_provider = embedding_model_provider - dataset.embedding_model = embedding_model - dataset.collection_binding_id = collection_binding_id - dataset.enable_api = enable_api - dataset.permission = permission - dataset.created_by = created_by - dataset.chunk_structure = chunk_structure - dataset.runtime_mode = runtime_mode - dataset.retrieval_model = {} - dataset.keyword_number = 10 - for key, value in kwargs.items(): - setattr(dataset, key, value) - return dataset - - @staticmethod - def create_user_mock( - user_id: str = "user-123", - tenant_id: str = "tenant-123", - role: TenantAccountRole = TenantAccountRole.NORMAL, - is_dataset_editor: bool = True, - **kwargs, - ) -> Mock: - """ - Create a mock user (Account) with specified attributes. - - Args: - user_id: Unique identifier for the user - tenant_id: Tenant identifier - role: User role (OWNER, ADMIN, NORMAL, etc.) - is_dataset_editor: Whether user has dataset editor permissions - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as an Account instance - """ - user = create_autospec(Account, instance=True) - user.id = user_id - user.current_tenant_id = tenant_id - user.current_role = role - user.is_dataset_editor = is_dataset_editor - for key, value in kwargs.items(): - setattr(user, key, value) - return user - - @staticmethod - def create_knowledge_configuration_mock( - chunk_structure: str = "tree", - indexing_technique: str = IndexTechniqueType.HIGH_QUALITY, - embedding_model_provider: str = "openai", - embedding_model: str = "text-embedding-ada-002", - keyword_number: int = 10, - retrieval_model: dict | None = None, - **kwargs, - ) -> Mock: - """ - Create a mock KnowledgeConfiguration entity. - - Args: - chunk_structure: Chunk structure type - indexing_technique: Indexing technique - embedding_model_provider: Embedding model provider - embedding_model: Embedding model name - keyword_number: Keyword number for economy indexing - retrieval_model: Retrieval model configuration - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a KnowledgeConfiguration instance - """ - config = Mock() - config.chunk_structure = chunk_structure - config.indexing_technique = indexing_technique - config.embedding_model_provider = embedding_model_provider - config.embedding_model = embedding_model - config.keyword_number = keyword_number - config.retrieval_model = Mock() - config.retrieval_model.model_dump.return_value = retrieval_model or { - "search_method": "semantic_search", - "top_k": 2, - } - for key, value in kwargs.items(): - setattr(config, key, value) - return config - - @staticmethod - def create_app_dataset_join_mock( - app_id: str = "app-123", - dataset_id: str = "dataset-123", - **kwargs, - ) -> Mock: - """ - Create a mock AppDatasetJoin instance. - - Args: - app_id: Application ID - dataset_id: Dataset ID - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as an AppDatasetJoin instance - """ - join = Mock(spec=AppDatasetJoin) - join.app_id = app_id - join.dataset_id = dataset_id - for key, value in kwargs.items(): - setattr(join, key, value) - return join - - -# ============================================================================ -# Tests for update_dataset -# ============================================================================ - - -class TestDatasetServiceUpdateDataset: - """ - Comprehensive unit tests for DatasetService.update_dataset method. - - This test class covers the dataset update functionality, including - internal and external dataset updates, permission validation, and - name duplicate checking. - - The update_dataset method: - 1. Retrieves the dataset by ID - 2. Validates dataset exists - 3. Checks for duplicate names - 4. Validates user permissions - 5. Routes to appropriate update handler (internal or external) - 6. Returns the updated dataset - - Test scenarios include: - - Successful internal dataset updates - - Successful external dataset updates - - Permission validation - - Duplicate name detection - - Dataset not found errors - """ - - @pytest.fixture - def mock_dataset_service_dependencies(self): - """ - Mock dataset service dependencies for testing. - - Provides mocked dependencies including: - - get_dataset method - - check_dataset_permission method - - _has_dataset_same_name method - - Database session - - Current time utilities - """ - with ( - patch("services.dataset_service.DatasetService.get_dataset") as mock_get_dataset, - patch("services.dataset_service.DatasetService.check_dataset_permission") as mock_check_perm, - patch("services.dataset_service.DatasetService._has_dataset_same_name") as mock_has_same_name, - patch("extensions.ext_database.db.session") as mock_db, - patch("services.dataset_service.naive_utc_now") as mock_naive_utc_now, - ): - current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) - mock_naive_utc_now.return_value = current_time - - yield { - "get_dataset": mock_get_dataset, - "check_permission": mock_check_perm, - "has_same_name": mock_has_same_name, - "db_session": mock_db, - "naive_utc_now": mock_naive_utc_now, - "current_time": current_time, - } - - def test_update_dataset_internal_success(self, mock_dataset_service_dependencies): - """ - Test successful update of an internal dataset. - - Verifies that when all validation passes, an internal dataset - is updated correctly through the _update_internal_dataset method. - - This test ensures: - - Dataset is retrieved correctly - - Permission is checked - - Name duplicate check is performed - - Internal update handler is called - - Updated dataset is returned - """ - # Arrange - dataset_id = "dataset-123" - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock( - dataset_id=dataset_id, provider="vendor", name="Old Name" - ) - user = DatasetUpdateDeleteTestDataFactory.create_user_mock() - - update_data = { - "name": "New Name", - "description": "New Description", - } - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - mock_dataset_service_dependencies["has_same_name"].return_value = False - - with patch("services.dataset_service.DatasetService._update_internal_dataset") as mock_update_internal: - mock_update_internal.return_value = dataset - - # Act - result = DatasetService.update_dataset(dataset_id, update_data, user) - - # Assert - assert result == dataset - - # Verify dataset was retrieved - mock_dataset_service_dependencies["get_dataset"].assert_called_once_with(dataset_id) - - # Verify permission was checked - mock_dataset_service_dependencies["check_permission"].assert_called_once_with(dataset, user) - - # Verify name duplicate check was performed - mock_dataset_service_dependencies["has_same_name"].assert_called_once() - - # Verify internal update handler was called - mock_update_internal.assert_called_once() - - def test_update_dataset_external_success(self, mock_dataset_service_dependencies): - """ - Test successful update of an external dataset. - - Verifies that when all validation passes, an external dataset - is updated correctly through the _update_external_dataset method. - - This test ensures: - - Dataset is retrieved correctly - - Permission is checked - - Name duplicate check is performed - - External update handler is called - - Updated dataset is returned - """ - # Arrange - dataset_id = "dataset-123" - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock( - dataset_id=dataset_id, provider="external", name="Old Name" - ) - user = DatasetUpdateDeleteTestDataFactory.create_user_mock() - - update_data = { - "name": "New Name", - "external_knowledge_id": "new-knowledge-id", - } - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - mock_dataset_service_dependencies["has_same_name"].return_value = False - - with patch("services.dataset_service.DatasetService._update_external_dataset") as mock_update_external: - mock_update_external.return_value = dataset - - # Act - result = DatasetService.update_dataset(dataset_id, update_data, user) - - # Assert - assert result == dataset - - # Verify external update handler was called - mock_update_external.assert_called_once() - - def test_update_dataset_not_found_error(self, mock_dataset_service_dependencies): - """ - Test error handling when dataset is not found. - - Verifies that when the dataset ID doesn't exist, a ValueError - is raised with an appropriate message. - - This test ensures: - - Dataset not found error is handled correctly - - No update operations are performed - - Error message is clear - """ - # Arrange - dataset_id = "non-existent-dataset" - user = DatasetUpdateDeleteTestDataFactory.create_user_mock() - - update_data = {"name": "New Name"} - - mock_dataset_service_dependencies["get_dataset"].return_value = None - - # Act & Assert - with pytest.raises(ValueError, match="Dataset not found"): - DatasetService.update_dataset(dataset_id, update_data, user) - - # Verify no update operations were attempted - mock_dataset_service_dependencies["check_permission"].assert_not_called() - mock_dataset_service_dependencies["has_same_name"].assert_not_called() - - def test_update_dataset_duplicate_name_error(self, mock_dataset_service_dependencies): - """ - Test error handling when dataset name already exists. - - Verifies that when a dataset with the same name already exists - in the tenant, a ValueError is raised. - - This test ensures: - - Duplicate name detection works correctly - - Error message is clear - - No update operations are performed - """ - # Arrange - dataset_id = "dataset-123" - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock(dataset_id=dataset_id) - user = DatasetUpdateDeleteTestDataFactory.create_user_mock() - - update_data = {"name": "Existing Name"} - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - mock_dataset_service_dependencies["has_same_name"].return_value = True # Duplicate exists - - # Act & Assert - with pytest.raises(ValueError, match="Dataset name already exists"): - DatasetService.update_dataset(dataset_id, update_data, user) - - # Verify permission check was not called (fails before that) - mock_dataset_service_dependencies["check_permission"].assert_not_called() - - def test_update_dataset_permission_denied_error(self, mock_dataset_service_dependencies): - """ - Test error handling when user lacks permission. - - Verifies that when the user doesn't have permission to update - the dataset, a NoPermissionError is raised. - - This test ensures: - - Permission validation works correctly - - Error is raised before any updates - - Error type is correct - """ - # Arrange - dataset_id = "dataset-123" - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock(dataset_id=dataset_id) - user = DatasetUpdateDeleteTestDataFactory.create_user_mock() - - update_data = {"name": "New Name"} - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - mock_dataset_service_dependencies["has_same_name"].return_value = False - mock_dataset_service_dependencies["check_permission"].side_effect = NoPermissionError("No permission") - - # Act & Assert - with pytest.raises(NoPermissionError): - DatasetService.update_dataset(dataset_id, update_data, user) - - -# ============================================================================ -# Tests for update_rag_pipeline_dataset_settings -# ============================================================================ - - -class TestDatasetServiceUpdateRagPipelineDatasetSettings: - """ - Comprehensive unit tests for DatasetService.update_rag_pipeline_dataset_settings method. - - This test class covers the RAG pipeline dataset settings update functionality, - including chunk structure, indexing technique, and embedding model configuration. - - The update_rag_pipeline_dataset_settings method: - 1. Validates current_user and tenant - 2. Merges dataset into session - 3. Handles unpublished vs published datasets differently - 4. Updates chunk structure, indexing technique, and retrieval model - 5. Configures embedding model for high_quality indexing - 6. Updates keyword_number for economy indexing - 7. Commits transaction - 8. Triggers index update tasks if needed - - Test scenarios include: - - Unpublished dataset updates - - Published dataset updates - - Chunk structure validation - - Indexing technique changes - - Embedding model configuration - - Error handling - """ - - @pytest.fixture - def mock_session(self): - """ - Mock database session for testing. - - Provides a mocked SQLAlchemy session for testing session operations. - """ - return Mock(spec=Session) - - @pytest.fixture - def mock_dataset_service_dependencies(self): - """ - Mock dataset service dependencies for testing. - - Provides mocked dependencies including: - - current_user context - - ModelManager - - DatasetCollectionBindingService - - Database session operations - - Task scheduling - """ - with ( - patch( - "services.dataset_service.current_user", create_autospec(Account, instance=True) - ) as mock_current_user, - patch("services.dataset_service.ModelManager.for_tenant") as mock_model_manager, - patch( - "services.dataset_service.DatasetCollectionBindingService.get_dataset_collection_binding" - ) as mock_get_binding, - patch("services.dataset_service.deal_dataset_index_update_task") as mock_task, - ): - mock_current_user.current_tenant_id = "tenant-123" - mock_current_user.id = "user-123" - - yield { - "current_user": mock_current_user, - "model_manager": mock_model_manager, - "get_binding": mock_get_binding, - "task": mock_task, - } - - def test_update_rag_pipeline_dataset_settings_unpublished_success( - self, mock_session, mock_dataset_service_dependencies - ): - """ - Test successful update of unpublished RAG pipeline dataset. - - Verifies that when a dataset is not published, all settings can - be updated including chunk structure and indexing technique. - - This test ensures: - - Current user validation passes - - Dataset is merged into session - - Chunk structure is updated - - Indexing technique is updated - - Embedding model is configured for high_quality - - Retrieval model is updated - - Dataset is added to session - """ - # Arrange - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock( - dataset_id="dataset-123", - runtime_mode="rag_pipeline", - chunk_structure="tree", - indexing_technique=IndexTechniqueType.HIGH_QUALITY, - ) - - knowledge_config = DatasetUpdateDeleteTestDataFactory.create_knowledge_configuration_mock( - chunk_structure="list", - indexing_technique=IndexTechniqueType.HIGH_QUALITY, - embedding_model_provider="openai", - embedding_model="text-embedding-ada-002", - ) - - # Mock embedding model - mock_embedding_model = Mock() - mock_embedding_model.model_name = "text-embedding-ada-002" - mock_embedding_model.provider = "openai" - mock_embedding_model.credentials = {} - - mock_model_schema = Mock() - mock_model_schema.features = [] - - mock_text_embedding_model = Mock() - mock_text_embedding_model.get_model_schema.return_value = mock_model_schema - mock_embedding_model.model_type_instance = mock_text_embedding_model - - mock_model_instance = Mock() - mock_model_instance.get_model_instance.return_value = mock_embedding_model - mock_dataset_service_dependencies["model_manager"].return_value = mock_model_instance - - # Mock collection binding - mock_binding = Mock() - mock_binding.id = "binding-123" - mock_dataset_service_dependencies["get_binding"].return_value = mock_binding - - mock_session.merge.return_value = dataset - - # Act - DatasetService.update_rag_pipeline_dataset_settings( - mock_session, dataset, knowledge_config, has_published=False - ) - - # Assert - assert dataset.chunk_structure == "list" - assert dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY - assert dataset.embedding_model == "text-embedding-ada-002" - assert dataset.embedding_model_provider == "openai" - assert dataset.collection_binding_id == "binding-123" - - # Verify dataset was added to session - mock_session.add.assert_called_once_with(dataset) - - def test_update_rag_pipeline_dataset_settings_published_chunk_structure_error( - self, mock_session, mock_dataset_service_dependencies - ): - """ - Test error handling when trying to update chunk structure of published dataset. - - Verifies that when a dataset is published and has an existing chunk structure, - attempting to change it raises a ValueError. - - This test ensures: - - Chunk structure change is detected - - ValueError is raised with appropriate message - - No updates are committed - """ - # Arrange - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock( - dataset_id="dataset-123", - runtime_mode="rag_pipeline", - chunk_structure="tree", # Existing structure - indexing_technique=IndexTechniqueType.HIGH_QUALITY, - ) - - knowledge_config = DatasetUpdateDeleteTestDataFactory.create_knowledge_configuration_mock( - chunk_structure="list", # Different structure - indexing_technique=IndexTechniqueType.HIGH_QUALITY, - ) - - mock_session.merge.return_value = dataset - - # Act & Assert - with pytest.raises(ValueError, match="Chunk structure is not allowed to be updated"): - DatasetService.update_rag_pipeline_dataset_settings( - mock_session, dataset, knowledge_config, has_published=True - ) - - # Verify no commit was attempted - mock_session.commit.assert_not_called() - - def test_update_rag_pipeline_dataset_settings_published_economy_error( - self, mock_session, mock_dataset_service_dependencies - ): - """ - Test error handling when trying to change to economy indexing on published dataset. - - Verifies that when a dataset is published, changing indexing technique to - economy is not allowed and raises a ValueError. - - This test ensures: - - Economy indexing change is detected - - ValueError is raised with appropriate message - - No updates are committed - """ - # Arrange - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock( - dataset_id="dataset-123", - runtime_mode="rag_pipeline", - indexing_technique=IndexTechniqueType.HIGH_QUALITY, # Current technique - ) - - knowledge_config = DatasetUpdateDeleteTestDataFactory.create_knowledge_configuration_mock( - indexing_technique=IndexTechniqueType.ECONOMY, # Trying to change to economy - ) - - mock_session.merge.return_value = dataset - - # Act & Assert - with pytest.raises( - ValueError, match="Knowledge base indexing technique is not allowed to be updated to economy" - ): - DatasetService.update_rag_pipeline_dataset_settings( - mock_session, dataset, knowledge_config, has_published=True - ) - - def test_update_rag_pipeline_dataset_settings_missing_current_user_error( - self, mock_session, mock_dataset_service_dependencies - ): - """ - Test error handling when current_user is missing. - - Verifies that when current_user is None or has no tenant ID, a ValueError - is raised. - - This test ensures: - - Current user validation works correctly - - Error message is clear - - No updates are performed - """ - # Arrange - dataset = DatasetUpdateDeleteTestDataFactory.create_dataset_mock() - knowledge_config = DatasetUpdateDeleteTestDataFactory.create_knowledge_configuration_mock() - - mock_dataset_service_dependencies["current_user"].current_tenant_id = None # Missing tenant - - # Act & Assert - with pytest.raises(ValueError, match="Current user or current tenant not found"): - DatasetService.update_rag_pipeline_dataset_settings( - mock_session, dataset, knowledge_config, has_published=False - ) - - -# ============================================================================ -# Additional Documentation and Notes -# ============================================================================ -# -# This test suite covers the core update and delete operations for datasets. -# Additional test scenarios that could be added: -# -# 1. Update Operations: -# - Testing with different indexing techniques -# - Testing embedding model provider changes -# - Testing retrieval model updates -# - Testing icon_info updates -# - Testing partial_member_list updates -# -# 2. Delete Operations: -# - Testing cascade deletion of related data -# - Testing event handler execution -# - Testing with datasets that have documents -# - Testing with datasets that have segments -# -# 3. RAG Pipeline Operations: -# - Testing economy indexing technique updates -# - Testing embedding model provider errors -# - Testing keyword_number updates -# - Testing index update task triggering -# -# 4. Integration Scenarios: -# - Testing update followed by delete -# - Testing multiple updates in sequence -# - Testing concurrent update attempts -# - Testing with different user roles -# -# These scenarios are not currently implemented but could be added if needed -# based on real-world usage patterns or discovered edge cases. -# -# ============================================================================ From 0ff41a1127ebe179ff22c144d43f825f141f4d08 Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Sat, 11 Apr 2026 05:37:20 +0200 Subject: [PATCH 010/147] test: remove dataset metadata mock tests superseded by testcontainers (#34931) --- .../unit_tests/services/dataset_metadata.py | 1014 ----------------- 1 file changed, 1014 deletions(-) delete mode 100644 api/tests/unit_tests/services/dataset_metadata.py diff --git a/api/tests/unit_tests/services/dataset_metadata.py b/api/tests/unit_tests/services/dataset_metadata.py deleted file mode 100644 index b825a8686a..0000000000 --- a/api/tests/unit_tests/services/dataset_metadata.py +++ /dev/null @@ -1,1014 +0,0 @@ -""" -Comprehensive unit tests for MetadataService. - -This module contains extensive unit tests for the MetadataService class, -which handles dataset metadata CRUD operations and filtering/querying functionality. - -The MetadataService provides methods for: -- Creating, reading, updating, and deleting metadata fields -- Managing built-in metadata fields -- Updating document metadata values -- Metadata filtering and querying operations -- Lock management for concurrent metadata operations - -Metadata in Dify allows users to add custom fields to datasets and documents, -enabling rich filtering and search capabilities. Metadata can be of various -types (string, number, date, boolean, etc.) and can be used to categorize -and filter documents within a dataset. - -This test suite ensures: -- Correct creation of metadata fields with validation -- Proper updating of metadata names and values -- Accurate deletion of metadata fields -- Built-in field management (enable/disable) -- Document metadata updates (partial and full) -- Lock management for concurrent operations -- Metadata querying and filtering functionality - -================================================================================ -ARCHITECTURE OVERVIEW -================================================================================ - -The MetadataService is a critical component in the Dify platform's metadata -management system. It serves as the primary interface for all metadata-related -operations, including field definitions and document-level metadata values. - -Key Concepts: -1. DatasetMetadata: Defines a metadata field for a dataset. Each metadata - field has a name, type, and is associated with a specific dataset. - -2. DatasetMetadataBinding: Links metadata fields to documents. This allows - tracking which documents have which metadata fields assigned. - -3. Document Metadata: The actual metadata values stored on documents. This - is stored as a JSON object in the document's doc_metadata field. - -4. Built-in Fields: System-defined metadata fields that are automatically - available when enabled (document_name, uploader, upload_date, etc.). - -5. Lock Management: Redis-based locking to prevent concurrent metadata - operations that could cause data corruption. - -================================================================================ -TESTING STRATEGY -================================================================================ - -This test suite follows a comprehensive testing strategy that covers: - -1. CRUD Operations: - - Creating metadata fields with validation - - Reading/retrieving metadata fields - - Updating metadata field names - - Deleting metadata fields - -2. Built-in Field Management: - - Enabling built-in fields - - Disabling built-in fields - - Getting built-in field definitions - -3. Document Metadata Operations: - - Updating document metadata (partial and full) - - Managing metadata bindings - - Handling built-in field updates - -4. Lock Management: - - Acquiring locks for dataset operations - - Acquiring locks for document operations - - Handling lock conflicts - -5. Error Handling: - - Validation errors (name length, duplicates) - - Not found errors - - Lock conflict errors - -================================================================================ -""" - -from unittest.mock import Mock, patch - -import pytest - -from core.rag.index_processor.constant.built_in_field import BuiltInField -from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding -from services.entities.knowledge_entities.knowledge_entities import ( - MetadataArgs, - MetadataValue, -) -from services.metadata_service import MetadataService - -# ============================================================================ -# Test Data Factory -# ============================================================================ -# The Test Data Factory pattern is used here to centralize the creation of -# test objects and mock instances. This approach provides several benefits: -# -# 1. Consistency: All test objects are created using the same factory methods, -# ensuring consistent structure across all tests. -# -# 2. Maintainability: If the structure of models changes, we only need to -# update the factory methods rather than every individual test. -# -# 3. Reusability: Factory methods can be reused across multiple test classes, -# reducing code duplication. -# -# 4. Readability: Tests become more readable when they use descriptive factory -# method calls instead of complex object construction logic. -# -# ============================================================================ - - -class MetadataTestDataFactory: - """ - Factory class for creating test data and mock objects for metadata service tests. - - This factory provides static methods to create mock objects for: - - DatasetMetadata instances - - DatasetMetadataBinding instances - - Dataset instances - - Document instances - - MetadataArgs and MetadataOperationData entities - - User and tenant context - - The factory methods help maintain consistency across tests and reduce - code duplication when setting up test scenarios. - """ - - @staticmethod - def create_metadata_mock( - metadata_id: str = "metadata-123", - dataset_id: str = "dataset-123", - tenant_id: str = "tenant-123", - name: str = "category", - metadata_type: str = "string", - created_by: str = "user-123", - **kwargs, - ) -> Mock: - """ - Create a mock DatasetMetadata with specified attributes. - - Args: - metadata_id: Unique identifier for the metadata field - dataset_id: ID of the dataset this metadata belongs to - tenant_id: Tenant identifier - name: Name of the metadata field - metadata_type: Type of metadata (string, number, date, etc.) - created_by: ID of the user who created the metadata - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a DatasetMetadata instance - """ - metadata = Mock(spec=DatasetMetadata) - metadata.id = metadata_id - metadata.dataset_id = dataset_id - metadata.tenant_id = tenant_id - metadata.name = name - metadata.type = metadata_type - metadata.created_by = created_by - metadata.updated_by = None - metadata.updated_at = None - for key, value in kwargs.items(): - setattr(metadata, key, value) - return metadata - - @staticmethod - def create_metadata_binding_mock( - binding_id: str = "binding-123", - dataset_id: str = "dataset-123", - tenant_id: str = "tenant-123", - metadata_id: str = "metadata-123", - document_id: str = "document-123", - created_by: str = "user-123", - **kwargs, - ) -> Mock: - """ - Create a mock DatasetMetadataBinding with specified attributes. - - Args: - binding_id: Unique identifier for the binding - dataset_id: ID of the dataset - tenant_id: Tenant identifier - metadata_id: ID of the metadata field - document_id: ID of the document - created_by: ID of the user who created the binding - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a DatasetMetadataBinding instance - """ - binding = Mock(spec=DatasetMetadataBinding) - binding.id = binding_id - binding.dataset_id = dataset_id - binding.tenant_id = tenant_id - binding.metadata_id = metadata_id - binding.document_id = document_id - binding.created_by = created_by - for key, value in kwargs.items(): - setattr(binding, key, value) - return binding - - @staticmethod - def create_dataset_mock( - dataset_id: str = "dataset-123", - tenant_id: str = "tenant-123", - built_in_field_enabled: bool = False, - doc_metadata: list | None = None, - **kwargs, - ) -> Mock: - """ - Create a mock Dataset with specified attributes. - - Args: - dataset_id: Unique identifier for the dataset - tenant_id: Tenant identifier - built_in_field_enabled: Whether built-in fields are enabled - doc_metadata: List of metadata field definitions - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a Dataset instance - """ - dataset = Mock(spec=Dataset) - dataset.id = dataset_id - dataset.tenant_id = tenant_id - dataset.built_in_field_enabled = built_in_field_enabled - dataset.doc_metadata = doc_metadata or [] - for key, value in kwargs.items(): - setattr(dataset, key, value) - return dataset - - @staticmethod - def create_document_mock( - document_id: str = "document-123", - dataset_id: str = "dataset-123", - name: str = "Test Document", - doc_metadata: dict | None = None, - uploader: str = "user-123", - data_source_type: str = "upload_file", - **kwargs, - ) -> Mock: - """ - Create a mock Document with specified attributes. - - Args: - document_id: Unique identifier for the document - dataset_id: ID of the dataset this document belongs to - name: Name of the document - doc_metadata: Dictionary of metadata values - uploader: ID of the user who uploaded the document - data_source_type: Type of data source - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a Document instance - """ - document = Mock() - document.id = document_id - document.dataset_id = dataset_id - document.name = name - document.doc_metadata = doc_metadata or {} - document.uploader = uploader - document.data_source_type = data_source_type - - # Mock datetime objects for upload_date and last_update_date - - document.upload_date = Mock() - document.upload_date.timestamp.return_value = 1234567890.0 - document.last_update_date = Mock() - document.last_update_date.timestamp.return_value = 1234567890.0 - - for key, value in kwargs.items(): - setattr(document, key, value) - return document - - @staticmethod - def create_metadata_args_mock( - name: str = "category", - metadata_type: str = "string", - ) -> Mock: - """ - Create a mock MetadataArgs entity. - - Args: - name: Name of the metadata field - metadata_type: Type of metadata - - Returns: - Mock object configured as a MetadataArgs instance - """ - metadata_args = Mock(spec=MetadataArgs) - metadata_args.name = name - metadata_args.type = metadata_type - return metadata_args - - @staticmethod - def create_metadata_value_mock( - metadata_id: str = "metadata-123", - name: str = "category", - value: str = "test", - ) -> Mock: - """ - Create a mock MetadataValue entity. - - Args: - metadata_id: ID of the metadata field - name: Name of the metadata field - value: Value of the metadata - - Returns: - Mock object configured as a MetadataValue instance - """ - metadata_value = Mock(spec=MetadataValue) - metadata_value.id = metadata_id - metadata_value.name = name - metadata_value.value = value - return metadata_value - - -# ============================================================================ -# Tests for create_metadata -# ============================================================================ - - -class TestMetadataServiceCreateMetadata: - """ - Comprehensive unit tests for MetadataService.create_metadata method. - - This test class covers the metadata field creation functionality, - including validation, duplicate checking, and database operations. - - The create_metadata method: - 1. Validates metadata name length (max 255 characters) - 2. Checks for duplicate metadata names within the dataset - 3. Checks for conflicts with built-in field names - 4. Creates a new DatasetMetadata instance - 5. Adds it to the database session and commits - 6. Returns the created metadata - - Test scenarios include: - - Successful creation with valid data - - Name length validation - - Duplicate name detection - - Built-in field name conflicts - - Database transaction handling - """ - - @pytest.fixture - def mock_db_session(self): - """ - Mock database session for testing database operations. - - Provides a mocked database session that can be used to verify: - - Query construction and execution - - Add operations for new metadata - - Commit operations for transaction completion - """ - with patch("services.metadata_service.db.session") as mock_db: - yield mock_db - - @pytest.fixture - def mock_current_user(self): - """ - Mock current user and tenant context. - - Provides mocked current_account_with_tenant function that returns - a user and tenant ID for testing authentication and authorization. - """ - with patch("services.metadata_service.current_account_with_tenant") as mock_get_user: - mock_user = Mock() - mock_user.id = "user-123" - mock_tenant_id = "tenant-123" - mock_get_user.return_value = (mock_user, mock_tenant_id) - yield mock_get_user - - def test_create_metadata_success(self, mock_db_session, mock_current_user): - """ - Test successful creation of a metadata field. - - Verifies that when all validation passes, a new metadata field - is created and persisted to the database. - - This test ensures: - - Metadata name validation passes - - No duplicate name exists - - No built-in field conflict - - New metadata is added to database - - Transaction is committed - - Created metadata is returned - """ - # Arrange - dataset_id = "dataset-123" - metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name="category", metadata_type="string") - - # Mock query to return None (no existing metadata with same name) - mock_db_session.scalar.return_value = None - - # Mock BuiltInField enum iteration - with patch("services.metadata_service.BuiltInField") as mock_builtin: - mock_builtin.__iter__ = Mock(return_value=iter([])) - - # Act - result = MetadataService.create_metadata(dataset_id, metadata_args) - - # Assert - assert result is not None - assert isinstance(result, DatasetMetadata) - - # Verify metadata was added and committed - mock_db_session.add.assert_called_once() - mock_db_session.commit.assert_called_once() - - def test_create_metadata_name_too_long_error(self, mock_db_session, mock_current_user): - """ - Test error handling when metadata name exceeds 255 characters. - - Verifies that when a metadata name is longer than 255 characters, - a ValueError is raised with an appropriate message. - - This test ensures: - - Name length validation is enforced - - Error message is clear and descriptive - - No database operations are performed - """ - # Arrange - dataset_id = "dataset-123" - long_name = "a" * 256 # 256 characters (exceeds limit) - metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name=long_name, metadata_type="string") - - # Act & Assert - with pytest.raises(ValueError, match="Metadata name cannot exceed 255 characters"): - MetadataService.create_metadata(dataset_id, metadata_args) - - # Verify no database operations were performed - mock_db_session.add.assert_not_called() - mock_db_session.commit.assert_not_called() - - def test_create_metadata_duplicate_name_error(self, mock_db_session, mock_current_user): - """ - Test error handling when metadata name already exists. - - Verifies that when a metadata field with the same name already exists - in the dataset, a ValueError is raised. - - This test ensures: - - Duplicate name detection works correctly - - Error message is clear - - No new metadata is created - """ - # Arrange - dataset_id = "dataset-123" - metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name="category", metadata_type="string") - - # Mock existing metadata with same name - existing_metadata = MetadataTestDataFactory.create_metadata_mock(name="category") - mock_db_session.scalar.return_value = existing_metadata - - # Act & Assert - with pytest.raises(ValueError, match="Metadata name already exists"): - MetadataService.create_metadata(dataset_id, metadata_args) - - # Verify no new metadata was added - mock_db_session.add.assert_not_called() - mock_db_session.commit.assert_not_called() - - def test_create_metadata_builtin_field_conflict_error(self, mock_db_session, mock_current_user): - """ - Test error handling when metadata name conflicts with built-in field. - - Verifies that when a metadata name matches a built-in field name, - a ValueError is raised. - - This test ensures: - - Built-in field name conflicts are detected - - Error message is clear - - No new metadata is created - """ - # Arrange - dataset_id = "dataset-123" - metadata_args = MetadataTestDataFactory.create_metadata_args_mock( - name=BuiltInField.document_name, metadata_type="string" - ) - - # Mock query to return None (no duplicate in database) - mock_db_session.scalar.return_value = None - - # Mock BuiltInField to include the conflicting name - with patch("services.metadata_service.BuiltInField") as mock_builtin: - mock_field = Mock() - mock_field.value = BuiltInField.document_name - mock_builtin.__iter__ = Mock(return_value=iter([mock_field])) - - # Act & Assert - with pytest.raises(ValueError, match="Metadata name already exists in Built-in fields"): - MetadataService.create_metadata(dataset_id, metadata_args) - - # Verify no new metadata was added - mock_db_session.add.assert_not_called() - mock_db_session.commit.assert_not_called() - - -# ============================================================================ -# Tests for update_metadata_name -# ============================================================================ - - -class TestMetadataServiceUpdateMetadataName: - """ - Comprehensive unit tests for MetadataService.update_metadata_name method. - - This test class covers the metadata field name update functionality, - including validation, duplicate checking, and document metadata updates. - - The update_metadata_name method: - 1. Validates new name length (max 255 characters) - 2. Checks for duplicate names - 3. Checks for built-in field conflicts - 4. Acquires a lock for the dataset - 5. Updates the metadata name - 6. Updates all related document metadata - 7. Releases the lock - 8. Returns the updated metadata - - Test scenarios include: - - Successful name update - - Name length validation - - Duplicate name detection - - Built-in field conflicts - - Lock management - - Document metadata updates - """ - - @pytest.fixture - def mock_db_session(self): - """Mock database session for testing.""" - with patch("services.metadata_service.db.session") as mock_db: - yield mock_db - - @pytest.fixture - def mock_current_user(self): - """Mock current user and tenant context.""" - with patch("services.metadata_service.current_account_with_tenant") as mock_get_user: - mock_user = Mock() - mock_user.id = "user-123" - mock_tenant_id = "tenant-123" - mock_get_user.return_value = (mock_user, mock_tenant_id) - yield mock_get_user - - @pytest.fixture - def mock_redis_client(self): - """Mock Redis client for lock management.""" - with patch("services.metadata_service.redis_client") as mock_redis: - mock_redis.get.return_value = None # No existing lock - mock_redis.set.return_value = True - mock_redis.delete.return_value = True - yield mock_redis - - def test_update_metadata_name_success(self, mock_db_session, mock_current_user, mock_redis_client): - """ - Test successful update of metadata field name. - - Verifies that when all validation passes, the metadata name is - updated and all related document metadata is updated accordingly. - - This test ensures: - - Name validation passes - - Lock is acquired and released - - Metadata name is updated - - Related document metadata is updated - - Transaction is committed - """ - # Arrange - dataset_id = "dataset-123" - metadata_id = "metadata-123" - new_name = "updated_category" - - existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category") - - # Mock scalar calls: first for duplicate check (None), second for metadata retrieval - mock_db_session.scalar.side_effect = [None, existing_metadata] - - # Mock no metadata bindings (no documents to update) - mock_db_session.scalars.return_value.all.return_value = [] - - # Mock BuiltInField enum - with patch("services.metadata_service.BuiltInField") as mock_builtin: - mock_builtin.__iter__ = Mock(return_value=iter([])) - - # Act - result = MetadataService.update_metadata_name(dataset_id, metadata_id, new_name) - - # Assert - assert result is not None - assert result.name == new_name - - # Verify lock was acquired and released - mock_redis_client.get.assert_called() - mock_redis_client.set.assert_called() - mock_redis_client.delete.assert_called() - - # Verify metadata was updated and committed - mock_db_session.commit.assert_called() - - def test_update_metadata_name_not_found_error(self, mock_db_session, mock_current_user, mock_redis_client): - """ - Test error handling when metadata is not found. - - Verifies that when the metadata ID doesn't exist, a ValueError - is raised with an appropriate message. - - This test ensures: - - Not found error is handled correctly - - Lock is properly released even on error - - No updates are committed - """ - # Arrange - dataset_id = "dataset-123" - metadata_id = "non-existent-metadata" - new_name = "updated_category" - - # Mock scalar calls: first for duplicate check (None), second for metadata retrieval (None = not found) - mock_db_session.scalar.side_effect = [None, None] - - # Mock BuiltInField enum - with patch("services.metadata_service.BuiltInField") as mock_builtin: - mock_builtin.__iter__ = Mock(return_value=iter([])) - - # Act & Assert - with pytest.raises(ValueError, match="Metadata not found"): - MetadataService.update_metadata_name(dataset_id, metadata_id, new_name) - - # Verify lock was released - mock_redis_client.delete.assert_called() - - -# ============================================================================ -# Tests for delete_metadata -# ============================================================================ - - -class TestMetadataServiceDeleteMetadata: - """ - Comprehensive unit tests for MetadataService.delete_metadata method. - - This test class covers the metadata field deletion functionality, - including document metadata cleanup and lock management. - - The delete_metadata method: - 1. Acquires a lock for the dataset - 2. Retrieves the metadata to delete - 3. Deletes the metadata from the database - 4. Removes metadata from all related documents - 5. Releases the lock - 6. Returns the deleted metadata - - Test scenarios include: - - Successful deletion - - Not found error handling - - Document metadata cleanup - - Lock management - """ - - @pytest.fixture - def mock_db_session(self): - """Mock database session for testing.""" - with patch("services.metadata_service.db.session") as mock_db: - yield mock_db - - @pytest.fixture - def mock_redis_client(self): - """Mock Redis client for lock management.""" - with patch("services.metadata_service.redis_client") as mock_redis: - mock_redis.get.return_value = None - mock_redis.set.return_value = True - mock_redis.delete.return_value = True - yield mock_redis - - def test_delete_metadata_success(self, mock_db_session, mock_redis_client): - """ - Test successful deletion of a metadata field. - - Verifies that when the metadata exists, it is deleted and all - related document metadata is cleaned up. - - This test ensures: - - Lock is acquired and released - - Metadata is deleted from database - - Related document metadata is removed - - Transaction is committed - """ - # Arrange - dataset_id = "dataset-123" - metadata_id = "metadata-123" - - existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category") - - # Mock metadata retrieval - mock_db_session.scalar.return_value = existing_metadata - - # Mock no metadata bindings (no documents to update) - mock_db_session.scalars.return_value.all.return_value = [] - - # Act - result = MetadataService.delete_metadata(dataset_id, metadata_id) - - # Assert - assert result == existing_metadata - - # Verify lock was acquired and released - mock_redis_client.get.assert_called() - mock_redis_client.set.assert_called() - mock_redis_client.delete.assert_called() - - # Verify metadata was deleted and committed - mock_db_session.delete.assert_called_once_with(existing_metadata) - mock_db_session.commit.assert_called() - - def test_delete_metadata_not_found_error(self, mock_db_session, mock_redis_client): - """ - Test error handling when metadata is not found. - - Verifies that when the metadata ID doesn't exist, a ValueError - is raised and the lock is properly released. - - This test ensures: - - Not found error is handled correctly - - Lock is released even on error - - No deletion is performed - """ - # Arrange - dataset_id = "dataset-123" - metadata_id = "non-existent-metadata" - - # Mock metadata retrieval to return None - mock_db_session.scalar.return_value = None - - # Act & Assert - with pytest.raises(ValueError, match="Metadata not found"): - MetadataService.delete_metadata(dataset_id, metadata_id) - - # Verify lock was released - mock_redis_client.delete.assert_called() - - # Verify no deletion was performed - mock_db_session.delete.assert_not_called() - - -# ============================================================================ -# Tests for get_built_in_fields -# ============================================================================ - - -class TestMetadataServiceGetBuiltInFields: - """ - Comprehensive unit tests for MetadataService.get_built_in_fields method. - - This test class covers the built-in field retrieval functionality. - - The get_built_in_fields method: - 1. Returns a list of built-in field definitions - 2. Each definition includes name and type - - Test scenarios include: - - Successful retrieval of built-in fields - - Correct field definitions - """ - - def test_get_built_in_fields_success(self): - """ - Test successful retrieval of built-in fields. - - Verifies that the method returns the correct list of built-in - field definitions with proper structure. - - This test ensures: - - All built-in fields are returned - - Each field has name and type - - Field definitions are correct - """ - # Act - result = MetadataService.get_built_in_fields() - - # Assert - assert isinstance(result, list) - assert len(result) > 0 - - # Verify each field has required properties - for field in result: - assert "name" in field - assert "type" in field - assert isinstance(field["name"], str) - assert isinstance(field["type"], str) - - # Verify specific built-in fields are present - field_names = [field["name"] for field in result] - assert BuiltInField.document_name in field_names - assert BuiltInField.uploader in field_names - - -# ============================================================================ -# Tests for knowledge_base_metadata_lock_check -# ============================================================================ - - -class TestMetadataServiceLockCheck: - """ - Comprehensive unit tests for MetadataService.knowledge_base_metadata_lock_check method. - - This test class covers the lock management functionality for preventing - concurrent metadata operations. - - The knowledge_base_metadata_lock_check method: - 1. Checks if a lock exists for the dataset or document - 2. Raises ValueError if lock exists (operation in progress) - 3. Sets a lock with expiration time (3600 seconds) - 4. Supports both dataset-level and document-level locks - - Test scenarios include: - - Successful lock acquisition - - Lock conflict detection - - Dataset-level locks - - Document-level locks - """ - - @pytest.fixture - def mock_redis_client(self): - """Mock Redis client for lock management.""" - with patch("services.metadata_service.redis_client") as mock_redis: - yield mock_redis - - def test_lock_check_dataset_success(self, mock_redis_client): - """ - Test successful lock acquisition for dataset operations. - - Verifies that when no lock exists, a new lock is acquired - for the dataset. - - This test ensures: - - Lock check passes when no lock exists - - Lock is set with correct key and expiration - - No error is raised - """ - # Arrange - dataset_id = "dataset-123" - mock_redis_client.get.return_value = None # No existing lock - - # Act (should not raise) - MetadataService.knowledge_base_metadata_lock_check(dataset_id, None) - - # Assert - mock_redis_client.get.assert_called_once_with(f"dataset_metadata_lock_{dataset_id}") - mock_redis_client.set.assert_called_once_with(f"dataset_metadata_lock_{dataset_id}", 1, ex=3600) - - def test_lock_check_dataset_conflict_error(self, mock_redis_client): - """ - Test error handling when dataset lock already exists. - - Verifies that when a lock exists for the dataset, a ValueError - is raised with an appropriate message. - - This test ensures: - - Lock conflict is detected - - Error message is clear - - No new lock is set - """ - # Arrange - dataset_id = "dataset-123" - mock_redis_client.get.return_value = "1" # Lock exists - - # Act & Assert - with pytest.raises(ValueError, match="Another knowledge base metadata operation is running"): - MetadataService.knowledge_base_metadata_lock_check(dataset_id, None) - - # Verify lock was checked but not set - mock_redis_client.get.assert_called_once() - mock_redis_client.set.assert_not_called() - - def test_lock_check_document_success(self, mock_redis_client): - """ - Test successful lock acquisition for document operations. - - Verifies that when no lock exists, a new lock is acquired - for the document. - - This test ensures: - - Lock check passes when no lock exists - - Lock is set with correct key and expiration - - No error is raised - """ - # Arrange - document_id = "document-123" - mock_redis_client.get.return_value = None # No existing lock - - # Act (should not raise) - MetadataService.knowledge_base_metadata_lock_check(None, document_id) - - # Assert - mock_redis_client.get.assert_called_once_with(f"document_metadata_lock_{document_id}") - mock_redis_client.set.assert_called_once_with(f"document_metadata_lock_{document_id}", 1, ex=3600) - - -# ============================================================================ -# Tests for get_dataset_metadatas -# ============================================================================ - - -class TestMetadataServiceGetDatasetMetadatas: - """ - Comprehensive unit tests for MetadataService.get_dataset_metadatas method. - - This test class covers the metadata retrieval functionality for datasets. - - The get_dataset_metadatas method: - 1. Retrieves all metadata fields for a dataset - 2. Excludes built-in fields from the list - 3. Includes usage count for each metadata field - 4. Returns built-in field enabled status - - Test scenarios include: - - Successful retrieval with metadata fields - - Empty metadata list - - Built-in field filtering - - Usage count calculation - """ - - @pytest.fixture - def mock_db_session(self): - """Mock database session for testing.""" - with patch("services.metadata_service.db.session") as mock_db: - yield mock_db - - def test_get_dataset_metadatas_success(self, mock_db_session): - """ - Test successful retrieval of dataset metadata fields. - - Verifies that all metadata fields are returned with correct - structure and usage counts. - - This test ensures: - - All metadata fields are included - - Built-in fields are excluded - - Usage counts are calculated correctly - - Built-in field status is included - """ - # Arrange - dataset = MetadataTestDataFactory.create_dataset_mock( - dataset_id="dataset-123", - built_in_field_enabled=True, - doc_metadata=[ - {"id": "metadata-1", "name": "category", "type": "string"}, - {"id": "metadata-2", "name": "priority", "type": "number"}, - {"id": "built-in", "name": "document_name", "type": "string"}, - ], - ) - - # Mock usage count queries - mock_db_session.scalar.return_value = 5 # 5 documents use this metadata - - # Act - result = MetadataService.get_dataset_metadatas(dataset) - - # Assert - assert "doc_metadata" in result - assert "built_in_field_enabled" in result - assert result["built_in_field_enabled"] is True - - # Verify built-in fields are excluded - metadata_ids = [meta["id"] for meta in result["doc_metadata"]] - assert "built-in" not in metadata_ids - - # Verify all custom metadata fields are included - assert len(result["doc_metadata"]) == 2 - - # Verify usage counts are included - for meta in result["doc_metadata"]: - assert "count" in meta - assert meta["count"] == 5 - - -# ============================================================================ -# Additional Documentation and Notes -# ============================================================================ -# -# This test suite covers the core metadata CRUD operations and basic -# filtering functionality. Additional test scenarios that could be added: -# -# 1. enable_built_in_field / disable_built_in_field: -# - Testing built-in field enablement -# - Testing built-in field disablement -# - Testing document metadata updates when enabling/disabling -# -# 2. update_documents_metadata: -# - Testing partial updates -# - Testing full updates -# - Testing metadata binding creation -# - Testing built-in field updates -# -# 3. Metadata Filtering and Querying: -# - Testing metadata-based document filtering -# - Testing complex metadata queries -# - Testing metadata value retrieval -# -# These scenarios are not currently implemented but could be added if needed -# based on real-world usage patterns or discovered edge cases. -# -# ============================================================================ From 9069c01f9ceae3b934181de1943292b45266d5b4 Mon Sep 17 00:00:00 2001 From: NVIDIAN Date: Fri, 10 Apr 2026 21:01:03 -0700 Subject: [PATCH 011/147] refactor: replace inline api.model with register_schema_models in billing (#34928) Co-authored-by: ai-hpc --- api/controllers/console/billing/billing.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/api/controllers/console/billing/billing.py b/api/controllers/console/billing/billing.py index 23c01eedb1..45de338559 100644 --- a/api/controllers/console/billing/billing.py +++ b/api/controllers/console/billing/billing.py @@ -2,18 +2,17 @@ import base64 from typing import Literal from flask import request -from flask_restx import Resource, fields +from flask_restx import Resource from pydantic import BaseModel, Field from werkzeug.exceptions import BadRequest +from controllers.common.schema import register_schema_models from controllers.console import console_ns from controllers.console.wraps import account_initialization_required, only_edition_cloud, setup_required from enums.cloud_plan import CloudPlan from libs.login import current_account_with_tenant, login_required from services.billing_service import BillingService -DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" - class SubscriptionQuery(BaseModel): plan: Literal[CloudPlan.PROFESSIONAL, CloudPlan.TEAM] = Field(..., description="Subscription plan") @@ -24,8 +23,7 @@ class PartnerTenantsPayload(BaseModel): click_id: str = Field(..., description="Click Id from partner referral link") -for model in (SubscriptionQuery, PartnerTenantsPayload): - console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)) +register_schema_models(console_ns, SubscriptionQuery, PartnerTenantsPayload) @console_ns.route("/billing/subscription") @@ -58,12 +56,7 @@ class PartnerTenants(Resource): @console_ns.doc("sync_partner_tenants_bindings") @console_ns.doc(description="Sync partner tenants bindings") @console_ns.doc(params={"partner_key": "Partner key"}) - @console_ns.expect( - console_ns.model( - "SyncPartnerTenantsBindingsRequest", - {"click_id": fields.String(required=True, description="Click Id from partner referral link")}, - ) - ) + @console_ns.expect(console_ns.models[PartnerTenantsPayload.__name__]) @console_ns.response(200, "Tenants synced to partner successfully") @console_ns.response(400, "Invalid partner information") @setup_required From d5104a4268c94cf6b7f2c418d19aaade559a7250 Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Sat, 11 Apr 2026 06:29:39 +0200 Subject: [PATCH 012/147] test: remove dataset permission mock tests superseded by testcontainers (#34936) --- .../services/dataset_permission_service.py | 825 ------------------ 1 file changed, 825 deletions(-) delete mode 100644 api/tests/unit_tests/services/dataset_permission_service.py diff --git a/api/tests/unit_tests/services/dataset_permission_service.py b/api/tests/unit_tests/services/dataset_permission_service.py deleted file mode 100644 index e098e90455..0000000000 --- a/api/tests/unit_tests/services/dataset_permission_service.py +++ /dev/null @@ -1,825 +0,0 @@ -""" -Comprehensive unit tests for DatasetPermissionService and DatasetService permission methods. - -This module contains extensive unit tests for dataset permission management, -including partial member list operations, permission validation, and permission -enum handling. - -The DatasetPermissionService provides methods for: -- Retrieving partial member permissions (get_dataset_partial_member_list) -- Updating partial member lists (update_partial_member_list) -- Validating permissions before operations (check_permission) -- Clearing partial member lists (clear_partial_member_list) - -The DatasetService provides permission checking methods: -- check_dataset_permission - validates user access to dataset -- check_dataset_operator_permission - validates operator permissions - -These operations are critical for dataset access control and security, ensuring -that users can only access datasets they have permission to view or modify. - -This test suite ensures: -- Correct retrieval of partial member lists -- Proper update of partial member permissions -- Accurate permission validation logic -- Proper handling of permission enums (only_me, all_team_members, partial_members) -- Security boundaries are maintained -- Error conditions are handled correctly - -================================================================================ -ARCHITECTURE OVERVIEW -================================================================================ - -The Dataset permission system is a multi-layered access control mechanism -that provides fine-grained control over who can access and modify datasets. - -1. Permission Levels: - - only_me: Only the dataset creator can access - - all_team_members: All members of the tenant can access - - partial_members: Only specific users listed in DatasetPermission can access - -2. Permission Storage: - - Dataset.permission: Stores the permission level enum - - DatasetPermission: Stores individual user permissions for partial_members - - Each DatasetPermission record links a dataset to a user account - -3. Permission Validation: - - Tenant-level checks: Users must be in the same tenant - - Role-based checks: OWNER role bypasses some restrictions - - Explicit permission checks: For partial_members, explicit DatasetPermission - records are required - -4. Permission Operations: - - Partial member list management: Add/remove users from partial access - - Permission validation: Check before allowing operations - - Permission clearing: Remove all partial members when changing permission level - -================================================================================ -TESTING STRATEGY -================================================================================ - -This test suite follows a comprehensive testing strategy that covers: - -1. Partial Member List Operations: - - Retrieving member lists - - Adding new members - - Updating existing members - - Removing members - - Empty list handling - -2. Permission Validation: - - Dataset editor permissions - - Dataset operator restrictions - - Permission enum validation - - Partial member list validation - - Tenant isolation - -3. Permission Enum Handling: - - only_me permission behavior - - all_team_members permission behavior - - partial_members permission behavior - - Permission transitions - - Edge cases for each enum value - -4. Security and Access Control: - - Tenant boundary enforcement - - Role-based access control - - Creator privilege validation - - Explicit permission requirement - -5. Error Handling: - - Invalid permission changes - - Missing required data - - Database transaction failures - - Permission denial scenarios - -================================================================================ -""" - -from unittest.mock import Mock, create_autospec, patch - -import pytest - -from models import Account, TenantAccountRole -from models.dataset import ( - Dataset, - DatasetPermission, - DatasetPermissionEnum, -) -from services.dataset_service import DatasetPermissionService, DatasetService -from services.errors.account import NoPermissionError - -# ============================================================================ -# Test Data Factory -# ============================================================================ -# The Test Data Factory pattern is used here to centralize the creation of -# test objects and mock instances. This approach provides several benefits: -# -# 1. Consistency: All test objects are created using the same factory methods, -# ensuring consistent structure across all tests. -# -# 2. Maintainability: If the structure of models or services changes, we only -# need to update the factory methods rather than every individual test. -# -# 3. Reusability: Factory methods can be reused across multiple test classes, -# reducing code duplication. -# -# 4. Readability: Tests become more readable when they use descriptive factory -# method calls instead of complex object construction logic. -# -# ============================================================================ - - -class DatasetPermissionTestDataFactory: - """ - Factory class for creating test data and mock objects for dataset permission tests. - - This factory provides static methods to create mock objects for: - - Dataset instances with various permission configurations - - User/Account instances with different roles and permissions - - DatasetPermission instances - - Permission enum values - - Database query results - - The factory methods help maintain consistency across tests and reduce - code duplication when setting up test scenarios. - """ - - @staticmethod - def create_dataset_mock( - dataset_id: str = "dataset-123", - tenant_id: str = "tenant-123", - permission: DatasetPermissionEnum = DatasetPermissionEnum.ONLY_ME, - created_by: str = "user-123", - name: str = "Test Dataset", - **kwargs, - ) -> Mock: - """ - Create a mock Dataset with specified attributes. - - Args: - dataset_id: Unique identifier for the dataset - tenant_id: Tenant identifier - permission: Permission level enum - created_by: ID of user who created the dataset - name: Dataset name - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a Dataset instance - """ - dataset = Mock(spec=Dataset) - dataset.id = dataset_id - dataset.tenant_id = tenant_id - dataset.permission = permission - dataset.created_by = created_by - dataset.name = name - for key, value in kwargs.items(): - setattr(dataset, key, value) - return dataset - - @staticmethod - def create_user_mock( - user_id: str = "user-123", - tenant_id: str = "tenant-123", - role: TenantAccountRole = TenantAccountRole.NORMAL, - is_dataset_editor: bool = True, - is_dataset_operator: bool = False, - **kwargs, - ) -> Mock: - """ - Create a mock user (Account) with specified attributes. - - Args: - user_id: Unique identifier for the user - tenant_id: Tenant identifier - role: User role (OWNER, ADMIN, NORMAL, DATASET_OPERATOR, etc.) - is_dataset_editor: Whether user has dataset editor permissions - is_dataset_operator: Whether user is a dataset operator - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as an Account instance - """ - user = create_autospec(Account, instance=True) - user.id = user_id - user.current_tenant_id = tenant_id - user.current_role = role - user.is_dataset_editor = is_dataset_editor - user.is_dataset_operator = is_dataset_operator - for key, value in kwargs.items(): - setattr(user, key, value) - return user - - @staticmethod - def create_dataset_permission_mock( - permission_id: str = "permission-123", - dataset_id: str = "dataset-123", - account_id: str = "user-456", - tenant_id: str = "tenant-123", - has_permission: bool = True, - **kwargs, - ) -> Mock: - """ - Create a mock DatasetPermission instance. - - Args: - permission_id: Unique identifier for the permission - dataset_id: Dataset ID - account_id: User account ID - tenant_id: Tenant identifier - has_permission: Whether permission is granted - **kwargs: Additional attributes to set on the mock - - Returns: - Mock object configured as a DatasetPermission instance - """ - permission = Mock(spec=DatasetPermission) - permission.id = permission_id - permission.dataset_id = dataset_id - permission.account_id = account_id - permission.tenant_id = tenant_id - permission.has_permission = has_permission - for key, value in kwargs.items(): - setattr(permission, key, value) - return permission - - @staticmethod - def create_user_list_mock(user_ids: list[str]) -> list[dict[str, str]]: - """ - Create a list of user dictionaries for partial member list operations. - - Args: - user_ids: List of user IDs to include - - Returns: - List of user dictionaries with "user_id" keys - """ - return [{"user_id": user_id} for user_id in user_ids] - - -# ============================================================================ -# Tests for check_permission -# ============================================================================ - - -class TestDatasetPermissionServiceCheckPermission: - """ - Comprehensive unit tests for DatasetPermissionService.check_permission method. - - This test class covers the permission validation logic that ensures - users have the appropriate permissions to modify dataset permissions. - - The check_permission method: - 1. Validates user is a dataset editor - 2. Checks if dataset operator is trying to change permissions - 3. Validates partial member list when setting to partial_members - 4. Ensures dataset operators cannot change permission levels - 5. Ensures dataset operators cannot modify partial member lists - - Test scenarios include: - - Valid permission changes by dataset editors - - Dataset operator restrictions - - Partial member list validation - - Missing dataset editor permissions - - Invalid permission changes - """ - - @pytest.fixture - def mock_get_partial_member_list(self): - """ - Mock get_dataset_partial_member_list method. - - Provides a mocked version of the get_dataset_partial_member_list - method for testing permission validation logic. - """ - with patch.object(DatasetPermissionService, "get_dataset_partial_member_list") as mock_get_list: - yield mock_get_list - - def test_check_permission_dataset_editor_success(self, mock_get_partial_member_list): - """ - Test successful permission check for dataset editor. - - Verifies that when a dataset editor (not operator) tries to - change permissions, the check passes. - - This test ensures: - - Dataset editors can change permissions - - No errors are raised for valid changes - - Partial member list validation is skipped for non-operators - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(is_dataset_editor=True, is_dataset_operator=False) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.ONLY_ME) - requested_permission = DatasetPermissionEnum.ALL_TEAM - requested_partial_member_list = None - - # Act (should not raise) - DatasetPermissionService.check_permission(user, dataset, requested_permission, requested_partial_member_list) - - # Assert - # Verify get_partial_member_list was not called (not needed for non-operators) - mock_get_partial_member_list.assert_not_called() - - def test_check_permission_not_dataset_editor_error(self): - """ - Test error when user is not a dataset editor. - - Verifies that when a user without dataset editor permissions - tries to change permissions, a NoPermissionError is raised. - - This test ensures: - - Non-editors cannot change permissions - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(is_dataset_editor=False) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock() - requested_permission = DatasetPermissionEnum.ALL_TEAM - requested_partial_member_list = None - - # Act & Assert - with pytest.raises(NoPermissionError, match="User does not have permission to edit this dataset"): - DatasetPermissionService.check_permission( - user, dataset, requested_permission, requested_partial_member_list - ) - - def test_check_permission_operator_cannot_change_permission_error(self): - """ - Test error when dataset operator tries to change permission level. - - Verifies that when a dataset operator tries to change the permission - level, a NoPermissionError is raised. - - This test ensures: - - Dataset operators cannot change permission levels - - Error message is clear - - Current permission is preserved - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(is_dataset_editor=True, is_dataset_operator=True) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.ONLY_ME) - requested_permission = DatasetPermissionEnum.ALL_TEAM # Trying to change - requested_partial_member_list = None - - # Act & Assert - with pytest.raises(NoPermissionError, match="Dataset operators cannot change the dataset permissions"): - DatasetPermissionService.check_permission( - user, dataset, requested_permission, requested_partial_member_list - ) - - def test_check_permission_operator_partial_members_missing_list_error(self, mock_get_partial_member_list): - """ - Test error when operator sets partial_members without providing list. - - Verifies that when a dataset operator tries to set permission to - partial_members without providing a member list, a ValueError is raised. - - This test ensures: - - Partial member list is required for partial_members permission - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(is_dataset_editor=True, is_dataset_operator=True) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.PARTIAL_TEAM) - requested_permission = "partial_members" - requested_partial_member_list = None # Missing list - - # Act & Assert - with pytest.raises(ValueError, match="Partial member list is required when setting to partial members"): - DatasetPermissionService.check_permission( - user, dataset, requested_permission, requested_partial_member_list - ) - - def test_check_permission_operator_cannot_modify_partial_list_error(self, mock_get_partial_member_list): - """ - Test error when operator tries to modify partial member list. - - Verifies that when a dataset operator tries to change the partial - member list, a ValueError is raised. - - This test ensures: - - Dataset operators cannot modify partial member lists - - Error message is clear - - Current member list is preserved - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(is_dataset_editor=True, is_dataset_operator=True) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.PARTIAL_TEAM) - requested_permission = "partial_members" - - # Current member list - current_member_list = ["user-456", "user-789"] - mock_get_partial_member_list.return_value = current_member_list - - # Requested member list (different from current) - requested_partial_member_list = DatasetPermissionTestDataFactory.create_user_list_mock( - ["user-456", "user-999"] # Different list - ) - - # Act & Assert - with pytest.raises(ValueError, match="Dataset operators cannot change the dataset permissions"): - DatasetPermissionService.check_permission( - user, dataset, requested_permission, requested_partial_member_list - ) - - def test_check_permission_operator_can_keep_same_partial_list(self, mock_get_partial_member_list): - """ - Test that operator can keep the same partial member list. - - Verifies that when a dataset operator keeps the same partial member - list, the check passes. - - This test ensures: - - Operators can keep existing partial member lists - - No errors are raised for unchanged lists - - Permission validation works correctly - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(is_dataset_editor=True, is_dataset_operator=True) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock(permission=DatasetPermissionEnum.PARTIAL_TEAM) - requested_permission = "partial_members" - - # Current member list - current_member_list = ["user-456", "user-789"] - mock_get_partial_member_list.return_value = current_member_list - - # Requested member list (same as current) - requested_partial_member_list = DatasetPermissionTestDataFactory.create_user_list_mock( - ["user-456", "user-789"] # Same list - ) - - # Act (should not raise) - DatasetPermissionService.check_permission(user, dataset, requested_permission, requested_partial_member_list) - - # Assert - # Verify get_partial_member_list was called to compare lists - mock_get_partial_member_list.assert_called_once_with(dataset.id) - - -# ============================================================================ -# Tests for DatasetService.check_dataset_permission -# ============================================================================ - - -class TestDatasetServiceCheckDatasetPermission: - """ - Comprehensive unit tests for DatasetService.check_dataset_permission method. - - This test class covers the dataset permission checking logic that validates - whether a user has access to a dataset based on permission enums. - - The check_dataset_permission method: - 1. Validates tenant match - 2. Checks OWNER role (bypasses some restrictions) - 3. Validates only_me permission (creator only) - 4. Validates partial_members permission (explicit permission required) - 5. Validates all_team_members permission (all tenant members) - - Test scenarios include: - - Tenant boundary enforcement - - OWNER role bypass - - only_me permission validation - - partial_members permission validation - - all_team_members permission validation - - Permission denial scenarios - """ - - @pytest.fixture - def mock_db_session(self): - """ - Mock database session for testing. - - Provides a mocked database session that can be used to verify - database queries for permission checks. - """ - with patch("services.dataset_service.db.session") as mock_db: - yield mock_db - - def test_check_dataset_permission_owner_bypass(self, mock_db_session): - """ - Test that OWNER role bypasses permission checks. - - Verifies that when a user has OWNER role, they can access any - dataset in their tenant regardless of permission level. - - This test ensures: - - OWNER role bypasses permission restrictions - - No database queries are needed for OWNER - - Access is granted automatically - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(role=TenantAccountRole.OWNER, tenant_id="tenant-123") - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ONLY_ME, - created_by="other-user-123", # Not the current user - ) - - # Act (should not raise) - DatasetService.check_dataset_permission(dataset, user) - - # Assert - # Verify no permission queries were made (OWNER bypasses) - mock_db_session.query.assert_not_called() - - def test_check_dataset_permission_tenant_mismatch_error(self): - """ - Test error when user and dataset are in different tenants. - - Verifies that when a user tries to access a dataset from a different - tenant, a NoPermissionError is raised. - - This test ensures: - - Tenant boundary is enforced - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(tenant_id="tenant-123") - dataset = DatasetPermissionTestDataFactory.create_dataset_mock(tenant_id="tenant-456") # Different tenant - - # Act & Assert - with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): - DatasetService.check_dataset_permission(dataset, user) - - def test_check_dataset_permission_only_me_creator_success(self): - """ - Test that creator can access only_me dataset. - - Verifies that when a user is the creator of an only_me dataset, - they can access it successfully. - - This test ensures: - - Creators can access their own only_me datasets - - No explicit permission record is needed - - Access is granted correctly - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ONLY_ME, - created_by="user-123", # User is the creator - ) - - # Act (should not raise) - DatasetService.check_dataset_permission(dataset, user) - - def test_check_dataset_permission_only_me_non_creator_error(self): - """ - Test error when non-creator tries to access only_me dataset. - - Verifies that when a user who is not the creator tries to access - an only_me dataset, a NoPermissionError is raised. - - This test ensures: - - Non-creators cannot access only_me datasets - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ONLY_ME, - created_by="other-user-456", # Different creator - ) - - # Act & Assert - with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): - DatasetService.check_dataset_permission(dataset, user) - - def test_check_dataset_permission_partial_members_creator_success(self, mock_db_session): - """ - Test that creator can access partial_members dataset without explicit permission. - - Verifies that when a user is the creator of a partial_members dataset, - they can access it even without an explicit DatasetPermission record. - - This test ensures: - - Creators can access their own datasets - - No explicit permission record is needed for creators - - Access is granted correctly - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="user-123", # User is the creator - ) - - # Act (should not raise) - DatasetService.check_dataset_permission(dataset, user) - - # Assert - # Verify permission query was not executed (creator bypasses) - mock_db_session.query.assert_not_called() - - def test_check_dataset_permission_all_team_members_success(self): - """ - Test that any tenant member can access all_team_members dataset. - - Verifies that when a dataset has all_team_members permission, any - user in the same tenant can access it. - - This test ensures: - - All team members can access - - No explicit permission record is needed - - Access is granted correctly - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ALL_TEAM, - created_by="other-user-456", # Not the creator - ) - - # Act (should not raise) - DatasetService.check_dataset_permission(dataset, user) - - -# ============================================================================ -# Tests for DatasetService.check_dataset_operator_permission -# ============================================================================ - - -class TestDatasetServiceCheckDatasetOperatorPermission: - """ - Comprehensive unit tests for DatasetService.check_dataset_operator_permission method. - - This test class covers the dataset operator permission checking logic, - which validates whether a dataset operator has access to a dataset. - - The check_dataset_operator_permission method: - 1. Validates dataset exists - 2. Validates user exists - 3. Checks OWNER role (bypasses restrictions) - 4. Validates only_me permission (creator only) - 5. Validates partial_members permission (explicit permission required) - - Test scenarios include: - - Dataset not found error - - User not found error - - OWNER role bypass - - only_me permission validation - - partial_members permission validation - - Permission denial scenarios - """ - - @pytest.fixture - def mock_db_session(self): - """ - Mock database session for testing. - - Provides a mocked database session that can be used to verify - database queries for permission checks. - """ - with patch("services.dataset_service.db.session") as mock_db: - yield mock_db - - def test_check_dataset_operator_permission_dataset_not_found_error(self): - """ - Test error when dataset is None. - - Verifies that when dataset is None, a ValueError is raised. - - This test ensures: - - Dataset existence is validated - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock() - dataset = None - - # Act & Assert - with pytest.raises(ValueError, match="Dataset not found"): - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - def test_check_dataset_operator_permission_user_not_found_error(self): - """ - Test error when user is None. - - Verifies that when user is None, a ValueError is raised. - - This test ensures: - - User existence is validated - - Error message is clear - - Error type is correct - """ - # Arrange - user = None - dataset = DatasetPermissionTestDataFactory.create_dataset_mock() - - # Act & Assert - with pytest.raises(ValueError, match="User not found"): - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - def test_check_dataset_operator_permission_owner_bypass(self): - """ - Test that OWNER role bypasses permission checks. - - Verifies that when a user has OWNER role, they can access any - dataset in their tenant regardless of permission level. - - This test ensures: - - OWNER role bypasses permission restrictions - - No database queries are needed for OWNER - - Access is granted automatically - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(role=TenantAccountRole.OWNER, tenant_id="tenant-123") - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ONLY_ME, - created_by="other-user-123", # Not the current user - ) - - # Act (should not raise) - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - def test_check_dataset_operator_permission_only_me_creator_success(self): - """ - Test that creator can access only_me dataset. - - Verifies that when a user is the creator of an only_me dataset, - they can access it successfully. - - This test ensures: - - Creators can access their own only_me datasets - - No explicit permission record is needed - - Access is granted correctly - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ONLY_ME, - created_by="user-123", # User is the creator - ) - - # Act (should not raise) - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - def test_check_dataset_operator_permission_only_me_non_creator_error(self): - """ - Test error when non-creator tries to access only_me dataset. - - Verifies that when a user who is not the creator tries to access - an only_me dataset, a NoPermissionError is raised. - - This test ensures: - - Non-creators cannot access only_me datasets - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.ONLY_ME, - created_by="other-user-456", # Different creator - ) - - # Act & Assert - with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - -# ============================================================================ -# Additional Documentation and Notes -# ============================================================================ -# -# This test suite covers the core permission management operations for datasets. -# Additional test scenarios that could be added: -# -# 1. Permission Enum Transitions: -# - Testing transitions between permission levels -# - Testing validation during transitions -# - Testing partial member list updates during transitions -# -# 2. Bulk Operations: -# - Testing bulk permission updates -# - Testing bulk partial member list updates -# - Testing performance with large member lists -# -# 3. Edge Cases: -# - Testing with very large partial member lists -# - Testing with special characters in user IDs -# - Testing with deleted users -# - Testing with inactive permissions -# -# 4. Integration Scenarios: -# - Testing permission changes followed by access attempts -# - Testing concurrent permission updates -# - Testing permission inheritance -# -# These scenarios are not currently implemented but could be added if needed -# based on real-world usage patterns or discovered edge cases. -# -# ============================================================================ From c960f7ae48ee37a7ed24bd6016dcc9786d646af6 Mon Sep 17 00:00:00 2001 From: yyh <92089059+lyzno1@users.noreply.github.com> Date: Sat, 11 Apr 2026 20:10:30 +0800 Subject: [PATCH 013/147] refactor: remove base ui i18n dependency (#34921) --- .../field/__tests__/number-input.spec.tsx | 2 +- .../base/ui/number-field/__tests__/index.spec.tsx | 12 ++++++------ web/app/components/base/ui/number-field/index.tsx | 12 +++++------- .../base/ui/toast/__tests__/index.spec.tsx | 14 +++++++------- web/app/components/base/ui/toast/index.tsx | 10 +++++----- web/i18n/ar-TN/common.json | 4 ---- web/i18n/de-DE/common.json | 4 ---- web/i18n/en-US/common.json | 4 ---- web/i18n/es-ES/common.json | 4 ---- web/i18n/fa-IR/common.json | 4 ---- web/i18n/fr-FR/common.json | 4 ---- web/i18n/hi-IN/common.json | 4 ---- web/i18n/id-ID/common.json | 4 ---- web/i18n/it-IT/common.json | 4 ---- web/i18n/ja-JP/common.json | 4 ---- web/i18n/ko-KR/common.json | 4 ---- web/i18n/nl-NL/common.json | 4 ---- web/i18n/pl-PL/common.json | 4 ---- web/i18n/pt-BR/common.json | 4 ---- web/i18n/ro-RO/common.json | 4 ---- web/i18n/ru-RU/common.json | 4 ---- web/i18n/sl-SI/common.json | 4 ---- web/i18n/th-TH/common.json | 4 ---- web/i18n/tr-TR/common.json | 4 ---- web/i18n/uk-UA/common.json | 4 ---- web/i18n/vi-VN/common.json | 4 ---- web/i18n/zh-Hans/common.json | 4 ---- web/i18n/zh-Hant/common.json | 4 ---- 28 files changed, 24 insertions(+), 118 deletions(-) diff --git a/web/app/components/base/form/components/field/__tests__/number-input.spec.tsx b/web/app/components/base/form/components/field/__tests__/number-input.spec.tsx index eb5b419d78..714c280008 100644 --- a/web/app/components/base/form/components/field/__tests__/number-input.spec.tsx +++ b/web/app/components/base/form/components/field/__tests__/number-input.spec.tsx @@ -27,7 +27,7 @@ describe('NumberInputField', () => { it('should update value when users click increment', () => { render() - fireEvent.click(screen.getByRole('button', { name: 'common.operation.increment' })) + fireEvent.click(screen.getByRole('button', { name: 'Increment value' })) expect(mockField.handleChange).toHaveBeenCalledWith(3) }) diff --git a/web/app/components/base/ui/number-field/__tests__/index.spec.tsx b/web/app/components/base/ui/number-field/__tests__/index.spec.tsx index 4cc07bc8eb..f988e2b312 100644 --- a/web/app/components/base/ui/number-field/__tests__/index.spec.tsx +++ b/web/app/components/base/ui/number-field/__tests__/index.spec.tsx @@ -172,13 +172,13 @@ describe('NumberField wrapper', () => { // Increment and decrement buttons should preserve accessible naming, icon fallbacks, and spacing variants. describe('Control buttons', () => { - it('should provide localized aria labels and default icons when labels are not provided', () => { + it('should provide english fallback aria labels and default icons when labels are not provided', () => { renderNumberField({ controlsProps: {}, }) - const increment = screen.getByRole('button', { name: 'common.operation.increment' }) - const decrement = screen.getByRole('button', { name: 'common.operation.decrement' }) + const increment = screen.getByRole('button', { name: 'Increment value' }) + const decrement = screen.getByRole('button', { name: 'Decrement value' }) expect(increment.querySelector('.i-ri-arrow-up-s-line')).toBeInTheDocument() expect(decrement.querySelector('.i-ri-arrow-down-s-line')).toBeInTheDocument() @@ -217,11 +217,11 @@ describe('NumberField wrapper', () => { }, }) - expect(screen.getByRole('button', { name: 'common.operation.increment' })).toBeInTheDocument() - expect(screen.getByRole('button', { name: 'common.operation.decrement' })).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'Increment value' })).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'Decrement value' })).toBeInTheDocument() }) - it('should rely on aria-labelledby when provided instead of injecting a translated aria-label', () => { + it('should rely on aria-labelledby when provided instead of injecting a fallback aria-label', () => { render( <> Increment from label diff --git a/web/app/components/base/ui/number-field/index.tsx b/web/app/components/base/ui/number-field/index.tsx index 97f1cc7d31..7d4c43b815 100644 --- a/web/app/components/base/ui/number-field/index.tsx +++ b/web/app/components/base/ui/number-field/index.tsx @@ -4,7 +4,6 @@ import type { VariantProps } from 'class-variance-authority' import { NumberField as BaseNumberField } from '@base-ui/react/number-field' import { cva } from 'class-variance-authority' import * as React from 'react' -import { useTranslation } from 'react-i18next' import { cn } from '@/utils/classnames' export const NumberField = BaseNumberField.Root @@ -188,18 +187,19 @@ type NumberFieldButtonVariantProps = Omit< export type NumberFieldButtonProps = React.ComponentPropsWithoutRef & NumberFieldButtonVariantProps +const incrementAriaLabel = 'Increment value' +const decrementAriaLabel = 'Decrement value' + export function NumberFieldIncrement({ className, children, size = 'regular', ...props }: NumberFieldButtonProps) { - const { t } = useTranslation() - return ( {children ??