diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 91b1efb3d7..49ca98624a 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -1623,85 +1623,177 @@ class DocumentService: Raises: DocumentIndexingError: If document is being indexed or not in correct state + ValueError: If action is invalid """ if not document_ids: return + # Early validation of action parameter + valid_actions = ["enable", "disable", "archive", "un_archive"] + if action not in valid_actions: + raise ValueError(f"Invalid action: {action}. Must be one of {valid_actions}") + + documents_to_update = [] + + # First pass: validate all documents and prepare updates for document_id in document_ids: document = DocumentService.get_document(dataset.id, document_id) - if not document: continue + # Check if document is being indexed indexing_cache_key = f"document_{document.id}_indexing" cache_result = redis_client.get(indexing_cache_key) if cache_result is not None: raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later") - if action == "enable": - if document.enabled: - continue - document.enabled = True - document.disabled_at = None - document.disabled_by = None - document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) + # Prepare update based on action + update_info = DocumentService._prepare_document_status_update(document, action, user) + if update_info: + documents_to_update.append(update_info) + + # Second pass: apply all updates in a single transaction + if documents_to_update: + try: + for update_info in documents_to_update: + document = update_info["document"] + updates = update_info["updates"] + + # Apply updates to the document + for field, value in updates.items(): + setattr(document, field, value) + + db.session.add(document) + + # Batch commit all changes db.session.commit() + except Exception as e: + # Rollback on any error + db.session.rollback() + raise e + # Execute async tasks and set Redis cache after successful commit + # propagation_error is used to capture any errors for submitting async task execution + propagation_error = None + for update_info in documents_to_update: + try: + # Execute async tasks after successful commit + if update_info["async_task"]: + task_info = update_info["async_task"] + task_func = task_info["function"] + task_args = task_info["args"] + task_func.delay(*task_args) + except Exception as e: + # Log the error but do not rollback the transaction + logging.exception(f"Error executing async task for document {update_info['document'].id}") + # don't raise the error immediately, but capture it for later + propagation_error = e + try: + # Set Redis cache if needed after successful commit + if update_info["set_cache"]: + document = update_info["document"] + indexing_cache_key = f"document_{document.id}_indexing" + redis_client.setex(indexing_cache_key, 600, 1) + except Exception as e: + # Log the error but do not rollback the transaction + logging.exception(f"Error setting cache for document {update_info['document'].id}") + # Raise any propagation error after all updates + if propagation_error: + raise propagation_error - # Set cache to prevent indexing the same document multiple times - redis_client.setex(indexing_cache_key, 600, 1) + @staticmethod + def _prepare_document_status_update(document, action: str, user): + """ + Prepare document status update information. - add_document_to_index_task.delay(document_id) + Args: + document: Document object to update + action: Action to perform + user: Current user - elif action == "disable": - if not document.completed_at or document.indexing_status != "completed": - raise DocumentIndexingError(f"Document: {document.name} is not completed.") - if not document.enabled: - continue + Returns: + dict: Update information or None if no update needed + """ + now = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - document.enabled = False - document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - document.disabled_by = user.id - document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - db.session.commit() + if action == "enable": + return DocumentService._prepare_enable_update(document, now) + elif action == "disable": + return DocumentService._prepare_disable_update(document, user, now) + elif action == "archive": + return DocumentService._prepare_archive_update(document, user, now) + elif action == "un_archive": + return DocumentService._prepare_unarchive_update(document, now) - # Set cache to prevent indexing the same document multiple times - redis_client.setex(indexing_cache_key, 600, 1) + return None - remove_document_from_index_task.delay(document_id) + @staticmethod + def _prepare_enable_update(document, now): + """Prepare updates for enabling a document.""" + if document.enabled: + return None - elif action == "archive": - if document.archived: - continue + return { + "document": document, + "updates": {"enabled": True, "disabled_at": None, "disabled_by": None, "updated_at": now}, + "async_task": {"function": add_document_to_index_task, "args": [document.id]}, + "set_cache": True, + } - document.archived = True - document.archived_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - document.archived_by = user.id - document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - db.session.commit() + @staticmethod + def _prepare_disable_update(document, user, now): + """Prepare updates for disabling a document.""" + if not document.completed_at or document.indexing_status != "completed": + raise DocumentIndexingError(f"Document: {document.name} is not completed.") - if document.enabled: - # Set cache to prevent indexing the same document multiple times - redis_client.setex(indexing_cache_key, 600, 1) + if not document.enabled: + return None - remove_document_from_index_task.delay(document_id) + return { + "document": document, + "updates": {"enabled": False, "disabled_at": now, "disabled_by": user.id, "updated_at": now}, + "async_task": {"function": remove_document_from_index_task, "args": [document.id]}, + "set_cache": True, + } - elif action == "un_archive": - if not document.archived: - continue - document.archived = False - document.archived_at = None - document.archived_by = None - document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - db.session.commit() + @staticmethod + def _prepare_archive_update(document, user, now): + """Prepare updates for archiving a document.""" + if document.archived: + return None - # Only re-index if the document is currently enabled - if document.enabled: - # Set cache to prevent indexing the same document multiple times - redis_client.setex(indexing_cache_key, 600, 1) - add_document_to_index_task.delay(document_id) + update_info = { + "document": document, + "updates": {"archived": True, "archived_at": now, "archived_by": user.id, "updated_at": now}, + "async_task": None, + "set_cache": False, + } - else: - raise ValueError(f"Invalid action: {action}") + # Only set async task and cache if document is currently enabled + if document.enabled: + update_info["async_task"] = {"function": remove_document_from_index_task, "args": [document.id]} + update_info["set_cache"] = True + + return update_info + + @staticmethod + def _prepare_unarchive_update(document, now): + """Prepare updates for unarchiving a document.""" + if not document.archived: + return None + + update_info = { + "document": document, + "updates": {"archived": False, "archived_at": None, "archived_by": None, "updated_at": now}, + "async_task": None, + "set_cache": False, + } + + # Only re-index if the document is currently enabled + if document.enabled: + update_info["async_task"] = {"function": add_document_to_index_task, "args": [document.id]} + update_info["set_cache"] = True + + return update_info class SegmentService: diff --git a/api/tests/unit_tests/conftest.py b/api/tests/unit_tests/conftest.py index e09acc4c39..077ffe3408 100644 --- a/api/tests/unit_tests/conftest.py +++ b/api/tests/unit_tests/conftest.py @@ -1,4 +1,5 @@ import os +from unittest.mock import MagicMock, patch import pytest from flask import Flask @@ -11,6 +12,24 @@ PROJECT_DIR = os.path.abspath(os.path.join(ABS_PATH, os.pardir, os.pardir)) CACHED_APP = Flask(__name__) +# set global mock for Redis client +redis_mock = MagicMock() +redis_mock.get = MagicMock(return_value=None) +redis_mock.setex = MagicMock() +redis_mock.setnx = MagicMock() +redis_mock.delete = MagicMock() +redis_mock.lock = MagicMock() +redis_mock.exists = MagicMock(return_value=False) +redis_mock.set = MagicMock() +redis_mock.expire = MagicMock() +redis_mock.hgetall = MagicMock(return_value={}) +redis_mock.hdel = MagicMock() +redis_mock.incr = MagicMock(return_value=1) + +# apply the mock to the Redis client in the Flask app +redis_patcher = patch("extensions.ext_redis.redis_client", redis_mock) +redis_patcher.start() + @pytest.fixture def app() -> Flask: @@ -21,3 +40,19 @@ def app() -> Flask: def _provide_app_context(app: Flask): with app.app_context(): yield + + +@pytest.fixture(autouse=True) +def reset_redis_mock(): + """reset the Redis mock before each test""" + redis_mock.reset_mock() + redis_mock.get.return_value = None + redis_mock.setex.return_value = None + redis_mock.setnx.return_value = None + redis_mock.delete.return_value = None + redis_mock.exists.return_value = False + redis_mock.set.return_value = None + redis_mock.expire.return_value = None + redis_mock.hgetall.return_value = {} + redis_mock.hdel.return_value = None + redis_mock.incr.return_value = 1 diff --git a/api/tests/unit_tests/services/test_dataset_service.py b/api/tests/unit_tests/services/test_dataset_service.py new file mode 100644 index 0000000000..f22500cfe4 --- /dev/null +++ b/api/tests/unit_tests/services/test_dataset_service.py @@ -0,0 +1,1238 @@ +import datetime +import unittest + +# Mock redis_client before importing dataset_service +from unittest.mock import Mock, call, patch + +import pytest + +from models.dataset import Dataset, Document +from services.dataset_service import DocumentService +from services.errors.document import DocumentIndexingError +from tests.unit_tests.conftest import redis_mock + + +class TestDatasetServiceBatchUpdateDocumentStatus(unittest.TestCase): + """ + Comprehensive unit tests for DocumentService.batch_update_document_status method. + + This test suite covers all supported actions (enable, disable, archive, un_archive), + error conditions, edge cases, and validates proper interaction with Redis cache, + database operations, and async task triggers. + """ + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_enable_documents_success(self, mock_datetime, mock_get_doc, mock_add_task, mock_db): + """ + Test successful enabling of disabled documents. + + Verifies that: + 1. Only disabled documents are processed (already enabled documents are skipped) + 2. Document attributes are updated correctly (enabled=True, metadata cleared) + 3. Database changes are committed for each document + 4. Redis cache keys are set to prevent concurrent indexing + 5. Async indexing task is triggered for each enabled document + 6. Timestamp fields are properly updated + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock disabled document + mock_disabled_doc_1 = Mock(spec=Document) + mock_disabled_doc_1.id = "doc-1" + mock_disabled_doc_1.name = "disabled_document.pdf" + mock_disabled_doc_1.enabled = False + mock_disabled_doc_1.archived = False + mock_disabled_doc_1.indexing_status = "completed" + mock_disabled_doc_1.completed_at = datetime.datetime.now() + + mock_disabled_doc_2 = Mock(spec=Document) + mock_disabled_doc_2.id = "doc-2" + mock_disabled_doc_2.name = "disabled_document.pdf" + mock_disabled_doc_2.enabled = False + mock_disabled_doc_2.archived = False + mock_disabled_doc_2.indexing_status = "completed" + mock_disabled_doc_2.completed_at = datetime.datetime.now() + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + # Mock document retrieval to return disabled documents + mock_get_doc.side_effect = [mock_disabled_doc_1, mock_disabled_doc_2] + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Call the method to enable documents + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1", "doc-2"], action="enable", user=mock_user + ) + + # Verify document attributes were updated correctly + for mock_doc in [mock_disabled_doc_1, mock_disabled_doc_2]: + # Check that document was enabled + assert mock_doc.enabled == True + # Check that disable metadata was cleared + assert mock_doc.disabled_at is None + assert mock_doc.disabled_by is None + # Check that update timestamp was set + assert mock_doc.updated_at == current_time.replace(tzinfo=None) + + # Verify Redis cache operations + expected_cache_calls = [call("document_doc-1_indexing"), call("document_doc-2_indexing")] + redis_mock.get.assert_has_calls(expected_cache_calls) + + # Verify Redis cache was set to prevent concurrent indexing (600 seconds) + expected_setex_calls = [call("document_doc-1_indexing", 600, 1), call("document_doc-2_indexing", 600, 1)] + redis_mock.setex.assert_has_calls(expected_setex_calls) + + # Verify async tasks were triggered for indexing + expected_task_calls = [call("doc-1"), call("doc-2")] + mock_add_task.delay.assert_has_calls(expected_task_calls) + + # Verify database add counts (one add for one document) + assert mock_db.add.call_count == 2 + # Verify database commits (one commit for the batch operation) + assert mock_db.commit.call_count == 1 + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.remove_document_from_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_disable_documents_success(self, mock_datetime, mock_get_doc, mock_remove_task, mock_db): + """ + Test successful disabling of enabled and completed documents. + + Verifies that: + 1. Only completed and enabled documents can be disabled + 2. Document attributes are updated correctly (enabled=False, disable metadata set) + 3. User ID is recorded in disabled_by field + 4. Database changes are committed for each document + 5. Redis cache keys are set to prevent concurrent indexing + 6. Async task is triggered to remove documents from index + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock enabled document + mock_enabled_doc_1 = Mock(spec=Document) + mock_enabled_doc_1.id = "doc-1" + mock_enabled_doc_1.name = "enabled_document.pdf" + mock_enabled_doc_1.enabled = True + mock_enabled_doc_1.archived = False + mock_enabled_doc_1.indexing_status = "completed" + mock_enabled_doc_1.completed_at = datetime.datetime.now() + + mock_enabled_doc_2 = Mock(spec=Document) + mock_enabled_doc_2.id = "doc-2" + mock_enabled_doc_2.name = "enabled_document.pdf" + mock_enabled_doc_2.enabled = True + mock_enabled_doc_2.archived = False + mock_enabled_doc_2.indexing_status = "completed" + mock_enabled_doc_2.completed_at = datetime.datetime.now() + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + # Mock document retrieval to return enabled, completed documents + mock_get_doc.side_effect = [mock_enabled_doc_1, mock_enabled_doc_2] + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Call the method to disable documents + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1", "doc-2"], action="disable", user=mock_user + ) + + # Verify document attributes were updated correctly + for mock_doc in [mock_enabled_doc_1, mock_enabled_doc_2]: + # Check that document was disabled + assert mock_doc.enabled == False + # Check that disable metadata was set correctly + assert mock_doc.disabled_at == current_time.replace(tzinfo=None) + assert mock_doc.disabled_by == mock_user.id + # Check that update timestamp was set + assert mock_doc.updated_at == current_time.replace(tzinfo=None) + + # Verify Redis cache operations for indexing prevention + expected_setex_calls = [call("document_doc-1_indexing", 600, 1), call("document_doc-2_indexing", 600, 1)] + redis_mock.setex.assert_has_calls(expected_setex_calls) + + # Verify async tasks were triggered to remove from index + expected_task_calls = [call("doc-1"), call("doc-2")] + mock_remove_task.delay.assert_has_calls(expected_task_calls) + + # Verify database add counts (one add for one document) + assert mock_db.add.call_count == 2 + # Verify database commits (totally 1 for any batch operation) + assert mock_db.commit.call_count == 1 + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.remove_document_from_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_archive_documents_success(self, mock_datetime, mock_get_doc, mock_remove_task, mock_db): + """ + Test successful archiving of unarchived documents. + + Verifies that: + 1. Only unarchived documents are processed (already archived are skipped) + 2. Document attributes are updated correctly (archived=True, archive metadata set) + 3. User ID is recorded in archived_by field + 4. If documents are enabled, they are removed from the index + 5. Redis cache keys are set only for enabled documents being archived + 6. Database changes are committed for each document + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create unarchived enabled document + unarchived_doc = Mock(spec=Document) + # Manually set attributes to ensure they can be modified + unarchived_doc.id = "doc-1" + unarchived_doc.name = "unarchived_document.pdf" + unarchived_doc.enabled = True + unarchived_doc.archived = False + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + mock_get_doc.return_value = unarchived_doc + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Call the method to archive documents + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="archive", user=mock_user + ) + + # Verify document attributes were updated correctly + assert unarchived_doc.archived == True + assert unarchived_doc.archived_at == current_time.replace(tzinfo=None) + assert unarchived_doc.archived_by == mock_user.id + assert unarchived_doc.updated_at == current_time.replace(tzinfo=None) + + # Verify Redis cache was set (because document was enabled) + redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1) + + # Verify async task was triggered to remove from index (because enabled) + mock_remove_task.delay.assert_called_once_with("doc-1") + + # Verify database add + mock_db.add.assert_called_once() + # Verify database commit + mock_db.commit.assert_called_once() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_unarchive_documents_success(self, mock_datetime, mock_get_doc, mock_add_task, mock_db): + """ + Test successful unarchiving of archived documents. + + Verifies that: + 1. Only archived documents are processed (already unarchived are skipped) + 2. Document attributes are updated correctly (archived=False, archive metadata cleared) + 3. If documents are enabled, they are added back to the index + 4. Redis cache keys are set only for enabled documents being unarchived + 5. Database changes are committed for each document + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock archived document + mock_archived_doc = Mock(spec=Document) + mock_archived_doc.id = "doc-3" + mock_archived_doc.name = "archived_document.pdf" + mock_archived_doc.enabled = True + mock_archived_doc.archived = True + mock_archived_doc.indexing_status = "completed" + mock_archived_doc.completed_at = datetime.datetime.now() + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + mock_get_doc.return_value = mock_archived_doc + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Call the method to unarchive documents + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-3"], action="un_archive", user=mock_user + ) + + # Verify document attributes were updated correctly + assert mock_archived_doc.archived == False + assert mock_archived_doc.archived_at is None + assert mock_archived_doc.archived_by is None + assert mock_archived_doc.updated_at == current_time.replace(tzinfo=None) + + # Verify Redis cache was set (because document is enabled) + redis_mock.setex.assert_called_once_with("document_doc-3_indexing", 600, 1) + + # Verify async task was triggered to add back to index (because enabled) + mock_add_task.delay.assert_called_once_with("doc-3") + + # Verify database add + mock_db.add.assert_called_once() + # Verify database commit + mock_db.commit.assert_called_once() + + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_document_indexing_error_redis_cache_hit(self, mock_get_doc): + """ + Test that DocumentIndexingError is raised when documents are currently being indexed. + + Verifies that: + 1. The method checks Redis cache for active indexing operations + 2. DocumentIndexingError is raised if any document is being indexed + 3. Error message includes the document name for user feedback + 4. No further processing occurs when indexing is detected + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock enabled document + mock_enabled_doc = Mock(spec=Document) + mock_enabled_doc.id = "doc-1" + mock_enabled_doc.name = "enabled_document.pdf" + mock_enabled_doc.enabled = True + mock_enabled_doc.archived = False + mock_enabled_doc.indexing_status = "completed" + mock_enabled_doc.completed_at = datetime.datetime.now() + + # Set up mock to indicate document is being indexed + mock_get_doc.return_value = mock_enabled_doc + + # Reset module-level Redis mock, set to indexing status + redis_mock.reset_mock() + redis_mock.get.return_value = "indexing" + + # Verify that DocumentIndexingError is raised + with pytest.raises(DocumentIndexingError) as exc_info: + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="enable", user=mock_user + ) + + # Verify error message contains document name + assert "enabled_document.pdf" in str(exc_info.value) + assert "is being indexed" in str(exc_info.value) + + # Verify Redis cache was checked + redis_mock.get.assert_called_once_with("document_doc-1_indexing") + + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_disable_non_completed_document_error(self, mock_get_doc): + """ + Test that DocumentIndexingError is raised when trying to disable non-completed documents. + + Verifies that: + 1. Only completed documents can be disabled + 2. DocumentIndexingError is raised for non-completed documents + 3. Error message indicates the document is not completed + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create a document that's not completed + non_completed_doc = Mock(spec=Document) + # Manually set attributes to ensure they can be modified + non_completed_doc.id = "doc-1" + non_completed_doc.name = "indexing_document.pdf" + non_completed_doc.enabled = True + non_completed_doc.indexing_status = "indexing" # Not completed + non_completed_doc.completed_at = None # Not completed + + mock_get_doc.return_value = non_completed_doc + + # Verify that DocumentIndexingError is raised + with pytest.raises(DocumentIndexingError) as exc_info: + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="disable", user=mock_user + ) + + # Verify error message indicates document is not completed + assert "is not completed" in str(exc_info.value) + + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_empty_document_list(self, mock_get_doc): + """ + Test batch operations with an empty document ID list. + + Verifies that: + 1. The method handles empty input gracefully + 2. No document operations are performed with empty input + 3. No errors are raised with empty input + 4. Method returns early without processing + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Call method with empty document list + result = DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=[], action="enable", user=mock_user + ) + + # Verify no document lookups were performed + mock_get_doc.assert_not_called() + + # Verify method returns None (early return) + assert result is None + + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_document_not_found_skipped(self, mock_get_doc): + """ + Test behavior when some documents don't exist in the database. + + Verifies that: + 1. Non-existent documents are gracefully skipped + 2. Processing continues for existing documents + 3. No errors are raised for missing document IDs + 4. Method completes successfully despite missing documents + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Mock document service to return None (document not found) + mock_get_doc.return_value = None + + # Call method with non-existent document ID + # This should not raise an error, just skip the missing document + try: + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["non-existent-doc"], action="enable", user=mock_user + ) + except Exception as e: + pytest.fail(f"Method should not raise exception for missing documents: {e}") + + # Verify document lookup was attempted + mock_get_doc.assert_called_once_with(mock_dataset.id, "non-existent-doc") + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_enable_already_enabled_document_skipped(self, mock_get_doc, mock_db): + """ + Test enabling documents that are already enabled. + + Verifies that: + 1. Already enabled documents are skipped (no unnecessary operations) + 2. No database commits occur for already enabled documents + 3. No Redis cache operations occur for skipped documents + 4. No async tasks are triggered for skipped documents + 5. Method completes successfully + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock enabled document + mock_enabled_doc = Mock(spec=Document) + mock_enabled_doc.id = "doc-1" + mock_enabled_doc.name = "enabled_document.pdf" + mock_enabled_doc.enabled = True + mock_enabled_doc.archived = False + mock_enabled_doc.indexing_status = "completed" + mock_enabled_doc.completed_at = datetime.datetime.now() + + # Mock document that is already enabled + mock_get_doc.return_value = mock_enabled_doc # Already enabled + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Attempt to enable already enabled document + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="enable", user=mock_user + ) + + # Verify no database operations occurred (document was skipped) + mock_db.commit.assert_not_called() + + # Verify no Redis setex operations occurred (document was skipped) + redis_mock.setex.assert_not_called() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_archive_already_archived_document_skipped(self, mock_get_doc, mock_db): + """ + Test archiving documents that are already archived. + + Verifies that: + 1. Already archived documents are skipped (no unnecessary operations) + 2. No database commits occur for already archived documents + 3. No Redis cache operations occur for skipped documents + 4. No async tasks are triggered for skipped documents + 5. Method completes successfully + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock archived document + mock_archived_doc = Mock(spec=Document) + mock_archived_doc.id = "doc-3" + mock_archived_doc.name = "archived_document.pdf" + mock_archived_doc.enabled = True + mock_archived_doc.archived = True + mock_archived_doc.indexing_status = "completed" + mock_archived_doc.completed_at = datetime.datetime.now() + + # Mock document that is already archived + mock_get_doc.return_value = mock_archived_doc # Already archived + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Attempt to archive already archived document + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-3"], action="archive", user=mock_user + ) + + # Verify no database operations occurred (document was skipped) + mock_db.commit.assert_not_called() + + # Verify no Redis setex operations occurred (document was skipped) + redis_mock.setex.assert_not_called() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.remove_document_from_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_mixed_document_states_and_actions( + self, mock_datetime, mock_get_doc, mock_remove_task, mock_add_task, mock_db + ): + """ + Test batch operations on documents with mixed states and various scenarios. + + Verifies that: + 1. Each document is processed according to its current state + 2. Some documents may be skipped while others are processed + 3. Different async tasks are triggered based on document states + 4. Method handles mixed scenarios gracefully + 5. Database commits occur only for documents that were actually modified + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock documents with different states + mock_disabled_doc = Mock(spec=Document) + mock_disabled_doc.id = "doc-1" + mock_disabled_doc.name = "disabled_document.pdf" + mock_disabled_doc.enabled = False + mock_disabled_doc.archived = False + mock_disabled_doc.indexing_status = "completed" + mock_disabled_doc.completed_at = datetime.datetime.now() + + mock_enabled_doc = Mock(spec=Document) + mock_enabled_doc.id = "doc-2" + mock_enabled_doc.name = "enabled_document.pdf" + mock_enabled_doc.enabled = True + mock_enabled_doc.archived = False + mock_enabled_doc.indexing_status = "completed" + mock_enabled_doc.completed_at = datetime.datetime.now() + + mock_archived_doc = Mock(spec=Document) + mock_archived_doc.id = "doc-3" + mock_archived_doc.name = "archived_document.pdf" + mock_archived_doc.enabled = True + mock_archived_doc.archived = True + mock_archived_doc.indexing_status = "completed" + mock_archived_doc.completed_at = datetime.datetime.now() + + # Set up mixed document states + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + # Mix of different document states + documents = [ + mock_disabled_doc, # Will be enabled + mock_enabled_doc, # Already enabled, will be skipped + mock_archived_doc, # Archived but enabled, will be skipped for enable action + ] + + mock_get_doc.side_effect = documents + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Perform enable operation on mixed state documents + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1", "doc-2", "doc-3"], action="enable", user=mock_user + ) + + # Verify only the disabled document was processed + # (enabled and archived documents should be skipped for enable action) + + # Only one add should occur (for the disabled document that was enabled) + mock_db.add.assert_called_once() + # Only one commit should occur + mock_db.commit.assert_called_once() + + # Only one Redis setex should occur (for the document that was enabled) + redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1) + + # Only one async task should be triggered (for the document that was enabled) + mock_add_task.delay.assert_called_once_with("doc-1") + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.remove_document_from_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_archive_disabled_document_no_index_removal( + self, mock_datetime, mock_get_doc, mock_remove_task, mock_db + ): + """ + Test archiving disabled documents (should not trigger index removal). + + Verifies that: + 1. Disabled documents can be archived + 2. Archive metadata is set correctly + 3. No index removal task is triggered (because document is disabled) + 4. No Redis cache key is set (because document is disabled) + 5. Database commit still occurs + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Set up disabled, unarchived document + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + disabled_unarchived_doc = Mock(spec=Document) + # Manually set attributes to ensure they can be modified + disabled_unarchived_doc.id = "doc-1" + disabled_unarchived_doc.name = "disabled_document.pdf" + disabled_unarchived_doc.enabled = False # Disabled + disabled_unarchived_doc.archived = False # Not archived + + mock_get_doc.return_value = disabled_unarchived_doc + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Archive the disabled document + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="archive", user=mock_user + ) + + # Verify document was archived + assert disabled_unarchived_doc.archived == True + assert disabled_unarchived_doc.archived_at == current_time.replace(tzinfo=None) + assert disabled_unarchived_doc.archived_by == mock_user.id + + # Verify no Redis cache was set (document is disabled) + redis_mock.setex.assert_not_called() + + # Verify no index removal task was triggered (document is disabled) + mock_remove_task.delay.assert_not_called() + + # Verify database add still occurred + mock_db.add.assert_called_once() + # Verify database commit still occurred + mock_db.commit.assert_called_once() + + @patch("services.dataset_service.DocumentService.get_document") + def test_batch_update_invalid_action_error(self, mock_get_doc): + """ + Test that ValueError is raised when an invalid action is provided. + + Verifies that: + 1. Invalid actions are rejected with ValueError + 2. Error message includes the invalid action name + 3. No document processing occurs with invalid actions + 4. Method fails fast on invalid input + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock document + mock_doc = Mock(spec=Document) + mock_doc.id = "doc-1" + mock_doc.name = "test_document.pdf" + mock_doc.enabled = True + mock_doc.archived = False + + mock_get_doc.return_value = mock_doc + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Test with invalid action + invalid_action = "invalid_action" + with pytest.raises(ValueError) as exc_info: + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action=invalid_action, user=mock_user + ) + + # Verify error message contains the invalid action + assert invalid_action in str(exc_info.value) + assert "Invalid action" in str(exc_info.value) + + # Verify no Redis operations occurred + redis_mock.setex.assert_not_called() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_disable_already_disabled_document_skipped( + self, mock_datetime, mock_get_doc, mock_add_task, mock_db + ): + """ + Test disabling documents that are already disabled. + + Verifies that: + 1. Already disabled documents are skipped (no unnecessary operations) + 2. No database commits occur for already disabled documents + 3. No Redis cache operations occur for skipped documents + 4. No async tasks are triggered for skipped documents + 5. Method completes successfully + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock disabled document + mock_disabled_doc = Mock(spec=Document) + mock_disabled_doc.id = "doc-1" + mock_disabled_doc.name = "disabled_document.pdf" + mock_disabled_doc.enabled = False # Already disabled + mock_disabled_doc.archived = False + mock_disabled_doc.indexing_status = "completed" + mock_disabled_doc.completed_at = datetime.datetime.now() + + # Mock document that is already disabled + mock_get_doc.return_value = mock_disabled_doc + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Attempt to disable already disabled document + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="disable", user=mock_user + ) + + # Verify no database operations occurred (document was skipped) + mock_db.commit.assert_not_called() + + # Verify no Redis setex operations occurred (document was skipped) + redis_mock.setex.assert_not_called() + + # Verify no async tasks were triggered (document was skipped) + mock_add_task.delay.assert_not_called() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_unarchive_already_unarchived_document_skipped( + self, mock_datetime, mock_get_doc, mock_add_task, mock_db + ): + """ + Test unarchiving documents that are already unarchived. + + Verifies that: + 1. Already unarchived documents are skipped (no unnecessary operations) + 2. No database commits occur for already unarchived documents + 3. No Redis cache operations occur for skipped documents + 4. No async tasks are triggered for skipped documents + 5. Method completes successfully + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock unarchived document + mock_unarchived_doc = Mock(spec=Document) + mock_unarchived_doc.id = "doc-1" + mock_unarchived_doc.name = "unarchived_document.pdf" + mock_unarchived_doc.enabled = True + mock_unarchived_doc.archived = False # Already unarchived + mock_unarchived_doc.indexing_status = "completed" + mock_unarchived_doc.completed_at = datetime.datetime.now() + + # Mock document that is already unarchived + mock_get_doc.return_value = mock_unarchived_doc + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Attempt to unarchive already unarchived document + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="un_archive", user=mock_user + ) + + # Verify no database operations occurred (document was skipped) + mock_db.commit.assert_not_called() + + # Verify no Redis setex operations occurred (document was skipped) + redis_mock.setex.assert_not_called() + + # Verify no async tasks were triggered (document was skipped) + mock_add_task.delay.assert_not_called() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_unarchive_disabled_document_no_index_addition( + self, mock_datetime, mock_get_doc, mock_add_task, mock_db + ): + """ + Test unarchiving disabled documents (should not trigger index addition). + + Verifies that: + 1. Disabled documents can be unarchived + 2. Unarchive metadata is cleared correctly + 3. No index addition task is triggered (because document is disabled) + 4. No Redis cache key is set (because document is disabled) + 5. Database commit still occurs + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock archived but disabled document + mock_archived_disabled_doc = Mock(spec=Document) + mock_archived_disabled_doc.id = "doc-1" + mock_archived_disabled_doc.name = "archived_disabled_document.pdf" + mock_archived_disabled_doc.enabled = False # Disabled + mock_archived_disabled_doc.archived = True # Archived + mock_archived_disabled_doc.indexing_status = "completed" + mock_archived_disabled_doc.completed_at = datetime.datetime.now() + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + mock_get_doc.return_value = mock_archived_disabled_doc + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Unarchive the disabled document + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="un_archive", user=mock_user + ) + + # Verify document was unarchived + assert mock_archived_disabled_doc.archived == False + assert mock_archived_disabled_doc.archived_at is None + assert mock_archived_disabled_doc.archived_by is None + assert mock_archived_disabled_doc.updated_at == current_time.replace(tzinfo=None) + + # Verify no Redis cache was set (document is disabled) + redis_mock.setex.assert_not_called() + + # Verify no index addition task was triggered (document is disabled) + mock_add_task.delay.assert_not_called() + + # Verify database add still occurred + mock_db.add.assert_called_once() + # Verify database commit still occurred + mock_db.commit.assert_called_once() + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_async_task_error_handling(self, mock_datetime, mock_get_doc, mock_add_task, mock_db): + """ + Test handling of async task errors during batch operations. + + Verifies that: + 1. Async task errors are properly handled + 2. Database operations complete successfully + 3. Redis cache operations complete successfully + 4. Method continues processing despite async task errors + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create mock disabled document + mock_disabled_doc = Mock(spec=Document) + mock_disabled_doc.id = "doc-1" + mock_disabled_doc.name = "disabled_document.pdf" + mock_disabled_doc.enabled = False + mock_disabled_doc.archived = False + mock_disabled_doc.indexing_status = "completed" + mock_disabled_doc.completed_at = datetime.datetime.now() + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + mock_get_doc.return_value = mock_disabled_doc + + # Mock async task to raise an exception + mock_add_task.delay.side_effect = Exception("Celery task error") + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Verify that async task error is propagated + with pytest.raises(Exception) as exc_info: + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=["doc-1"], action="enable", user=mock_user + ) + + # Verify error message + assert "Celery task error" in str(exc_info.value) + + # Verify database operations completed successfully + mock_db.add.assert_called_once() + mock_db.commit.assert_called_once() + + # Verify Redis cache was set successfully + redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1) + + # Verify document was updated + assert mock_disabled_doc.enabled == True + assert mock_disabled_doc.disabled_at is None + assert mock_disabled_doc.disabled_by is None + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_large_document_list_performance(self, mock_datetime, mock_get_doc, mock_add_task, mock_db): + """ + Test batch operations with a large number of documents. + + Verifies that: + 1. Method can handle large document lists efficiently + 2. All documents are processed correctly + 3. Database commits occur for each document + 4. Redis cache operations occur for each document + 5. Async tasks are triggered for each document + 6. Performance remains consistent with large inputs + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create large list of document IDs + document_ids = [f"doc-{i}" for i in range(1, 101)] # 100 documents + + # Create mock documents + mock_documents = [] + for i in range(1, 101): + mock_doc = Mock(spec=Document) + mock_doc.id = f"doc-{i}" + mock_doc.name = f"document_{i}.pdf" + mock_doc.enabled = False # All disabled, will be enabled + mock_doc.archived = False + mock_doc.indexing_status = "completed" + mock_doc.completed_at = datetime.datetime.now() + mock_documents.append(mock_doc) + + # Set up mock return values + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + mock_get_doc.side_effect = mock_documents + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Perform batch enable operation + DocumentService.batch_update_document_status( + dataset=mock_dataset, document_ids=document_ids, action="enable", user=mock_user + ) + + # Verify all documents were processed + assert mock_get_doc.call_count == 100 + + # Verify all documents were updated + for mock_doc in mock_documents: + assert mock_doc.enabled == True + assert mock_doc.disabled_at is None + assert mock_doc.disabled_by is None + assert mock_doc.updated_at == current_time.replace(tzinfo=None) + + # Verify database commits, one add for one document + assert mock_db.add.call_count == 100 + # Verify database commits, one commit for the batch operation + assert mock_db.commit.call_count == 1 + + # Verify Redis cache operations occurred for each document + assert redis_mock.setex.call_count == 100 + + # Verify async tasks were triggered for each document + assert mock_add_task.delay.call_count == 100 + + # Verify correct Redis cache keys were set + expected_redis_calls = [call(f"document_doc-{i}_indexing", 600, 1) for i in range(1, 101)] + redis_mock.setex.assert_has_calls(expected_redis_calls) + + # Verify correct async task calls + expected_task_calls = [call(f"doc-{i}") for i in range(1, 101)] + mock_add_task.delay.assert_has_calls(expected_task_calls) + + @patch("extensions.ext_database.db.session") + @patch("services.dataset_service.add_document_to_index_task") + @patch("services.dataset_service.DocumentService.get_document") + @patch("services.dataset_service.datetime") + def test_batch_update_mixed_document_states_complex_scenario( + self, mock_datetime, mock_get_doc, mock_add_task, mock_db + ): + """ + Test complex batch operations with documents in various states. + + Verifies that: + 1. Each document is processed according to its current state + 2. Some documents are skipped while others are processed + 3. Different actions trigger different async tasks + 4. Database commits occur only for modified documents + 5. Redis cache operations occur only for relevant documents + 6. Method handles complex mixed scenarios correctly + """ + # Create mock dataset + mock_dataset = Mock(spec=Dataset) + mock_dataset.id = "dataset-123" + mock_dataset.tenant_id = "tenant-456" + + # Create mock user + mock_user = Mock() + mock_user.id = "user-789" + + # Create documents in various states + current_time = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_datetime.datetime.now.return_value = current_time + mock_datetime.UTC = datetime.UTC + + # Document 1: Disabled, will be enabled + doc1 = Mock(spec=Document) + doc1.id = "doc-1" + doc1.name = "disabled_doc.pdf" + doc1.enabled = False + doc1.archived = False + doc1.indexing_status = "completed" + doc1.completed_at = datetime.datetime.now() + + # Document 2: Already enabled, will be skipped + doc2 = Mock(spec=Document) + doc2.id = "doc-2" + doc2.name = "enabled_doc.pdf" + doc2.enabled = True + doc2.archived = False + doc2.indexing_status = "completed" + doc2.completed_at = datetime.datetime.now() + + # Document 3: Enabled and completed, will be disabled + doc3 = Mock(spec=Document) + doc3.id = "doc-3" + doc3.name = "enabled_completed_doc.pdf" + doc3.enabled = True + doc3.archived = False + doc3.indexing_status = "completed" + doc3.completed_at = datetime.datetime.now() + + # Document 4: Unarchived, will be archived + doc4 = Mock(spec=Document) + doc4.id = "doc-4" + doc4.name = "unarchived_doc.pdf" + doc4.enabled = True + doc4.archived = False + doc4.indexing_status = "completed" + doc4.completed_at = datetime.datetime.now() + + # Document 5: Archived, will be unarchived + doc5 = Mock(spec=Document) + doc5.id = "doc-5" + doc5.name = "archived_doc.pdf" + doc5.enabled = True + doc5.archived = True + doc5.indexing_status = "completed" + doc5.completed_at = datetime.datetime.now() + + # Document 6: Non-existent, will be skipped + doc6 = None + + mock_get_doc.side_effect = [doc1, doc2, doc3, doc4, doc5, doc6] + + # Reset module-level Redis mock + redis_mock.reset_mock() + redis_mock.get.return_value = None + + # Perform mixed batch operations + DocumentService.batch_update_document_status( + dataset=mock_dataset, + document_ids=["doc-1", "doc-2", "doc-3", "doc-4", "doc-5", "doc-6"], + action="enable", # This will only affect doc1 and doc3 (doc3 will be enabled then disabled) + user=mock_user, + ) + + # Verify document 1 was enabled + assert doc1.enabled == True + assert doc1.disabled_at is None + assert doc1.disabled_by is None + + # Verify document 2 was skipped (already enabled) + assert doc2.enabled == True # No change + + # Verify document 3 was skipped (already enabled) + assert doc3.enabled == True + + # Verify document 4 was skipped (not affected by enable action) + assert doc4.enabled == True # No change + + # Verify document 5 was skipped (not affected by enable action) + assert doc5.enabled == True # No change + + # Verify database commits occurred for processed documents + # Only doc1 should be added (doc2, doc3, doc4, doc5 were skipped, doc6 doesn't exist) + assert mock_db.add.call_count == 1 + assert mock_db.commit.call_count == 1 + + # Verify Redis cache operations occurred for processed documents + # Only doc1 should have Redis operations + assert redis_mock.setex.call_count == 1 + + # Verify async tasks were triggered for processed documents + # Only doc1 should trigger tasks + assert mock_add_task.delay.call_count == 1 + + # Verify correct Redis cache keys were set + expected_redis_calls = [call("document_doc-1_indexing", 600, 1)] + redis_mock.setex.assert_has_calls(expected_redis_calls) + + # Verify correct async task calls + expected_task_calls = [call("doc-1")] + mock_add_task.delay.assert_has_calls(expected_task_calls)