fix document index test (#28113)

This commit is contained in:
Jyong 2025-11-12 16:00:10 +08:00 committed by CodingOnStar
parent c76f370f97
commit 76be5c054b

View File

@ -256,7 +256,7 @@ class TestAddDocumentToIndexTask:
""" """
# Arrange: Use non-existent document ID # Arrange: Use non-existent document ID
fake = Faker() fake = Faker()
non_existent_id = fake.uuid4() non_existent_id = str(fake.uuid4())
# Act: Execute the task with non-existent document # Act: Execute the task with non-existent document
add_document_to_index_task(non_existent_id) add_document_to_index_task(non_existent_id)
@ -282,7 +282,7 @@ class TestAddDocumentToIndexTask:
- Redis cache key not affected - Redis cache key not affected
""" """
# Arrange: Create test data with invalid indexing status # Arrange: Create test data with invalid indexing status
dataset, document = self._create_test_dataset_and_document( _, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies db_session_with_containers, mock_external_service_dependencies
) )
@ -417,15 +417,15 @@ class TestAddDocumentToIndexTask:
# Verify redis cache was cleared # Verify redis cache was cleared
assert redis_client.exists(indexing_cache_key) == 0 assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_with_no_segments_to_process( def test_add_document_to_index_with_already_enabled_segments(
self, db_session_with_containers, mock_external_service_dependencies self, db_session_with_containers, mock_external_service_dependencies
): ):
""" """
Test document indexing when no segments need processing. Test document indexing when segments are already enabled.
This test verifies: This test verifies:
- Proper handling when all segments are already enabled - Segments with status="completed" are processed regardless of enabled status
- Index processing still occurs but with empty documents list - Index processing occurs with all completed segments
- Auto disable log deletion still occurs - Auto disable log deletion still occurs
- Redis cache is cleared - Redis cache is cleared
""" """
@ -465,15 +465,16 @@ class TestAddDocumentToIndexTask:
# Act: Execute the task # Act: Execute the task
add_document_to_index_task(document.id) add_document_to_index_task(document.id)
# Assert: Verify index processing occurred but with empty documents list # Assert: Verify index processing occurred with all completed segments
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX) mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once() mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with empty documents list # Verify the load method was called with all completed segments
# (implementation doesn't filter by enabled status, only by status="completed")
call_args = mock_external_service_dependencies["index_processor"].load.call_args call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 0 # No segments to process assert len(documents) == 3 # All completed segments are processed
# Verify redis cache was cleared # Verify redis cache was cleared
assert redis_client.exists(indexing_cache_key) == 0 assert redis_client.exists(indexing_cache_key) == 0
@ -499,7 +500,7 @@ class TestAddDocumentToIndexTask:
# Create some auto disable log entries # Create some auto disable log entries
fake = Faker() fake = Faker()
auto_disable_logs = [] auto_disable_logs = []
for i in range(2): for _ in range(2):
log_entry = DatasetAutoDisableLog( log_entry = DatasetAutoDisableLog(
id=fake.uuid4(), id=fake.uuid4(),
tenant_id=document.tenant_id, tenant_id=document.tenant_id,
@ -595,9 +596,11 @@ class TestAddDocumentToIndexTask:
Test segment filtering with various edge cases. Test segment filtering with various edge cases.
This test verifies: This test verifies:
- Only segments with enabled=False and status="completed" are processed - Only segments with status="completed" are processed (regardless of enabled status)
- Segments with status!="completed" are NOT processed
- Segments are ordered by position correctly - Segments are ordered by position correctly
- Mixed segment states are handled properly - Mixed segment states are handled properly
- All segments are updated to enabled=True after processing
- Redis cache key deletion - Redis cache key deletion
""" """
# Arrange: Create test data # Arrange: Create test data
@ -628,7 +631,8 @@ class TestAddDocumentToIndexTask:
db.session.add(segment1) db.session.add(segment1)
segments.append(segment1) segments.append(segment1)
# Segment 2: Should NOT be processed (enabled=True, status="completed") # Segment 2: Should be processed (enabled=True, status="completed")
# Note: Implementation doesn't filter by enabled status, only by status="completed"
segment2 = DocumentSegment( segment2 = DocumentSegment(
id=fake.uuid4(), id=fake.uuid4(),
tenant_id=document.tenant_id, tenant_id=document.tenant_id,
@ -640,7 +644,7 @@ class TestAddDocumentToIndexTask:
tokens=len(fake.text(max_nb_chars=200).split()) * 2, tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id="node_1", index_node_id="node_1",
index_node_hash="hash_1", index_node_hash="hash_1",
enabled=True, # Already enabled enabled=True, # Already enabled, but will still be processed
status="completed", status="completed",
created_by=document.created_by, created_by=document.created_by,
) )
@ -702,11 +706,14 @@ class TestAddDocumentToIndexTask:
call_args = mock_external_service_dependencies["index_processor"].load.call_args call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 2 # Only 2 segments should be processed assert len(documents) == 3 # 3 segments with status="completed" should be processed
# Verify correct segments were processed (by position order) # Verify correct segments were processed (by position order)
assert documents[0].metadata["doc_id"] == "node_0" # position 0 # Segments 1, 2, 4 should be processed (positions 0, 1, 3)
assert documents[1].metadata["doc_id"] == "node_3" # position 3 # Segment 3 is skipped (position 2, status="processing")
assert documents[0].metadata["doc_id"] == "node_0" # segment1, position 0
assert documents[1].metadata["doc_id"] == "node_1" # segment2, position 1
assert documents[2].metadata["doc_id"] == "node_3" # segment4, position 3
# Verify database state changes # Verify database state changes
db.session.refresh(document) db.session.refresh(document)
@ -717,7 +724,7 @@ class TestAddDocumentToIndexTask:
# All segments should be enabled because the task updates ALL segments for the document # All segments should be enabled because the task updates ALL segments for the document
assert segment1.enabled is True assert segment1.enabled is True
assert segment2.enabled is True # Was already enabled, now updated to True assert segment2.enabled is True # Was already enabled, stays True
assert segment3.enabled is True # Was not processed but still updated to True assert segment3.enabled is True # Was not processed but still updated to True
assert segment4.enabled is True assert segment4.enabled is True