mirror of https://github.com/langgenius/dify.git
fix document index test (#28113)
This commit is contained in:
parent
123bb3ec08
commit
47dc26f011
|
|
@ -256,7 +256,7 @@ class TestAddDocumentToIndexTask:
|
|||
"""
|
||||
# Arrange: Use non-existent document ID
|
||||
fake = Faker()
|
||||
non_existent_id = fake.uuid4()
|
||||
non_existent_id = str(fake.uuid4())
|
||||
|
||||
# Act: Execute the task with non-existent document
|
||||
add_document_to_index_task(non_existent_id)
|
||||
|
|
@ -282,7 +282,7 @@ class TestAddDocumentToIndexTask:
|
|||
- Redis cache key not affected
|
||||
"""
|
||||
# Arrange: Create test data with invalid indexing status
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
_, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
|
|
@ -417,15 +417,15 @@ class TestAddDocumentToIndexTask:
|
|||
# Verify redis cache was cleared
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_with_no_segments_to_process(
|
||||
def test_add_document_to_index_with_already_enabled_segments(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test document indexing when no segments need processing.
|
||||
Test document indexing when segments are already enabled.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling when all segments are already enabled
|
||||
- Index processing still occurs but with empty documents list
|
||||
- Segments with status="completed" are processed regardless of enabled status
|
||||
- Index processing occurs with all completed segments
|
||||
- Auto disable log deletion still occurs
|
||||
- Redis cache is cleared
|
||||
"""
|
||||
|
|
@ -465,15 +465,16 @@ class TestAddDocumentToIndexTask:
|
|||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify index processing occurred but with empty documents list
|
||||
# Assert: Verify index processing occurred with all completed segments
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with empty documents list
|
||||
# Verify the load method was called with all completed segments
|
||||
# (implementation doesn't filter by enabled status, only by status="completed")
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 0 # No segments to process
|
||||
assert len(documents) == 3 # All completed segments are processed
|
||||
|
||||
# Verify redis cache was cleared
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
|
@ -499,7 +500,7 @@ class TestAddDocumentToIndexTask:
|
|||
# Create some auto disable log entries
|
||||
fake = Faker()
|
||||
auto_disable_logs = []
|
||||
for i in range(2):
|
||||
for _ in range(2):
|
||||
log_entry = DatasetAutoDisableLog(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
|
|
@ -595,9 +596,11 @@ class TestAddDocumentToIndexTask:
|
|||
Test segment filtering with various edge cases.
|
||||
|
||||
This test verifies:
|
||||
- Only segments with enabled=False and status="completed" are processed
|
||||
- Only segments with status="completed" are processed (regardless of enabled status)
|
||||
- Segments with status!="completed" are NOT processed
|
||||
- Segments are ordered by position correctly
|
||||
- Mixed segment states are handled properly
|
||||
- All segments are updated to enabled=True after processing
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
|
|
@ -628,7 +631,8 @@ class TestAddDocumentToIndexTask:
|
|||
db.session.add(segment1)
|
||||
segments.append(segment1)
|
||||
|
||||
# Segment 2: Should NOT be processed (enabled=True, status="completed")
|
||||
# Segment 2: Should be processed (enabled=True, status="completed")
|
||||
# Note: Implementation doesn't filter by enabled status, only by status="completed"
|
||||
segment2 = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
|
|
@ -640,7 +644,7 @@ class TestAddDocumentToIndexTask:
|
|||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id="node_1",
|
||||
index_node_hash="hash_1",
|
||||
enabled=True, # Already enabled
|
||||
enabled=True, # Already enabled, but will still be processed
|
||||
status="completed",
|
||||
created_by=document.created_by,
|
||||
)
|
||||
|
|
@ -702,11 +706,14 @@ class TestAddDocumentToIndexTask:
|
|||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 2 # Only 2 segments should be processed
|
||||
assert len(documents) == 3 # 3 segments with status="completed" should be processed
|
||||
|
||||
# Verify correct segments were processed (by position order)
|
||||
assert documents[0].metadata["doc_id"] == "node_0" # position 0
|
||||
assert documents[1].metadata["doc_id"] == "node_3" # position 3
|
||||
# Segments 1, 2, 4 should be processed (positions 0, 1, 3)
|
||||
# Segment 3 is skipped (position 2, status="processing")
|
||||
assert documents[0].metadata["doc_id"] == "node_0" # segment1, position 0
|
||||
assert documents[1].metadata["doc_id"] == "node_1" # segment2, position 1
|
||||
assert documents[2].metadata["doc_id"] == "node_3" # segment4, position 3
|
||||
|
||||
# Verify database state changes
|
||||
db.session.refresh(document)
|
||||
|
|
@ -717,7 +724,7 @@ class TestAddDocumentToIndexTask:
|
|||
|
||||
# All segments should be enabled because the task updates ALL segments for the document
|
||||
assert segment1.enabled is True
|
||||
assert segment2.enabled is True # Was already enabled, now updated to True
|
||||
assert segment2.enabled is True # Was already enabled, stays True
|
||||
assert segment3.enabled is True # Was not processed but still updated to True
|
||||
assert segment4.enabled is True
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue