diff --git a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py index 68e485107c..f1530bcac6 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py @@ -256,7 +256,7 @@ class TestAddDocumentToIndexTask: """ # Arrange: Use non-existent document ID fake = Faker() - non_existent_id = fake.uuid4() + non_existent_id = str(fake.uuid4()) # Act: Execute the task with non-existent document add_document_to_index_task(non_existent_id) @@ -282,7 +282,7 @@ class TestAddDocumentToIndexTask: - Redis cache key not affected """ # Arrange: Create test data with invalid indexing status - dataset, document = self._create_test_dataset_and_document( + _, document = self._create_test_dataset_and_document( db_session_with_containers, mock_external_service_dependencies ) @@ -417,15 +417,15 @@ class TestAddDocumentToIndexTask: # Verify redis cache was cleared assert redis_client.exists(indexing_cache_key) == 0 - def test_add_document_to_index_with_no_segments_to_process( + def test_add_document_to_index_with_already_enabled_segments( self, db_session_with_containers, mock_external_service_dependencies ): """ - Test document indexing when no segments need processing. + Test document indexing when segments are already enabled. This test verifies: - - Proper handling when all segments are already enabled - - Index processing still occurs but with empty documents list + - Segments with status="completed" are processed regardless of enabled status + - Index processing occurs with all completed segments - Auto disable log deletion still occurs - Redis cache is cleared """ @@ -465,15 +465,16 @@ class TestAddDocumentToIndexTask: # Act: Execute the task add_document_to_index_task(document.id) - # Assert: Verify index processing occurred but with empty documents list + # Assert: Verify index processing occurred with all completed segments mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX) mock_external_service_dependencies["index_processor"].load.assert_called_once() - # Verify the load method was called with empty documents list + # Verify the load method was called with all completed segments + # (implementation doesn't filter by enabled status, only by status="completed") call_args = mock_external_service_dependencies["index_processor"].load.call_args assert call_args is not None documents = call_args[0][1] # Second argument should be documents list - assert len(documents) == 0 # No segments to process + assert len(documents) == 3 # All completed segments are processed # Verify redis cache was cleared assert redis_client.exists(indexing_cache_key) == 0 @@ -499,7 +500,7 @@ class TestAddDocumentToIndexTask: # Create some auto disable log entries fake = Faker() auto_disable_logs = [] - for i in range(2): + for _ in range(2): log_entry = DatasetAutoDisableLog( id=fake.uuid4(), tenant_id=document.tenant_id, @@ -595,9 +596,11 @@ class TestAddDocumentToIndexTask: Test segment filtering with various edge cases. This test verifies: - - Only segments with enabled=False and status="completed" are processed + - Only segments with status="completed" are processed (regardless of enabled status) + - Segments with status!="completed" are NOT processed - Segments are ordered by position correctly - Mixed segment states are handled properly + - All segments are updated to enabled=True after processing - Redis cache key deletion """ # Arrange: Create test data @@ -628,7 +631,8 @@ class TestAddDocumentToIndexTask: db.session.add(segment1) segments.append(segment1) - # Segment 2: Should NOT be processed (enabled=True, status="completed") + # Segment 2: Should be processed (enabled=True, status="completed") + # Note: Implementation doesn't filter by enabled status, only by status="completed" segment2 = DocumentSegment( id=fake.uuid4(), tenant_id=document.tenant_id, @@ -640,7 +644,7 @@ class TestAddDocumentToIndexTask: tokens=len(fake.text(max_nb_chars=200).split()) * 2, index_node_id="node_1", index_node_hash="hash_1", - enabled=True, # Already enabled + enabled=True, # Already enabled, but will still be processed status="completed", created_by=document.created_by, ) @@ -702,11 +706,14 @@ class TestAddDocumentToIndexTask: call_args = mock_external_service_dependencies["index_processor"].load.call_args assert call_args is not None documents = call_args[0][1] # Second argument should be documents list - assert len(documents) == 2 # Only 2 segments should be processed + assert len(documents) == 3 # 3 segments with status="completed" should be processed # Verify correct segments were processed (by position order) - assert documents[0].metadata["doc_id"] == "node_0" # position 0 - assert documents[1].metadata["doc_id"] == "node_3" # position 3 + # Segments 1, 2, 4 should be processed (positions 0, 1, 3) + # Segment 3 is skipped (position 2, status="processing") + assert documents[0].metadata["doc_id"] == "node_0" # segment1, position 0 + assert documents[1].metadata["doc_id"] == "node_1" # segment2, position 1 + assert documents[2].metadata["doc_id"] == "node_3" # segment4, position 3 # Verify database state changes db.session.refresh(document) @@ -717,7 +724,7 @@ class TestAddDocumentToIndexTask: # All segments should be enabled because the task updates ALL segments for the document assert segment1.enabled is True - assert segment2.enabled is True # Was already enabled, now updated to True + assert segment2.enabled is True # Was already enabled, stays True assert segment3.enabled is True # Was not processed but still updated to True assert segment4.enabled is True