mirror of
https://github.com/langgenius/dify.git
synced 2026-04-16 02:16:57 +08:00
test: migrate clean_dataset integration tests to SQLAlchemy 2.0 APIs (#35146)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
62bb830338
commit
173e0d6f35
@ -16,6 +16,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||
@ -52,18 +53,18 @@ class TestCleanDatasetTask:
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
# Clear all test data using the provided session fixture
|
||||
db_session_with_containers.query(DatasetMetadataBinding).delete()
|
||||
db_session_with_containers.query(DatasetMetadata).delete()
|
||||
db_session_with_containers.query(AppDatasetJoin).delete()
|
||||
db_session_with_containers.query(DatasetQuery).delete()
|
||||
db_session_with_containers.query(DatasetProcessRule).delete()
|
||||
db_session_with_containers.query(DocumentSegment).delete()
|
||||
db_session_with_containers.query(Document).delete()
|
||||
db_session_with_containers.query(Dataset).delete()
|
||||
db_session_with_containers.query(UploadFile).delete()
|
||||
db_session_with_containers.query(TenantAccountJoin).delete()
|
||||
db_session_with_containers.query(Tenant).delete()
|
||||
db_session_with_containers.query(Account).delete()
|
||||
db_session_with_containers.execute(delete(DatasetMetadataBinding))
|
||||
db_session_with_containers.execute(delete(DatasetMetadata))
|
||||
db_session_with_containers.execute(delete(AppDatasetJoin))
|
||||
db_session_with_containers.execute(delete(DatasetQuery))
|
||||
db_session_with_containers.execute(delete(DatasetProcessRule))
|
||||
db_session_with_containers.execute(delete(DocumentSegment))
|
||||
db_session_with_containers.execute(delete(Document))
|
||||
db_session_with_containers.execute(delete(Dataset))
|
||||
db_session_with_containers.execute(delete(UploadFile))
|
||||
db_session_with_containers.execute(delete(TenantAccountJoin))
|
||||
db_session_with_containers.execute(delete(Tenant))
|
||||
db_session_with_containers.execute(delete(Account))
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Clear Redis cache
|
||||
@ -302,28 +303,40 @@ class TestCleanDatasetTask:
|
||||
|
||||
# Verify results
|
||||
# Check that dataset-related data was cleaned up
|
||||
documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
documents = db_session_with_containers.scalars(select(Document).where(Document.dataset_id == dataset.id)).all()
|
||||
assert len(documents) == 0
|
||||
|
||||
segments = db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(segments) == 0
|
||||
|
||||
# Check that metadata and bindings were cleaned up
|
||||
metadata = db_session_with_containers.query(DatasetMetadata).filter_by(dataset_id=dataset.id).all()
|
||||
metadata = db_session_with_containers.scalars(
|
||||
select(DatasetMetadata).where(DatasetMetadata.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(metadata) == 0
|
||||
|
||||
bindings = db_session_with_containers.query(DatasetMetadataBinding).filter_by(dataset_id=dataset.id).all()
|
||||
bindings = db_session_with_containers.scalars(
|
||||
select(DatasetMetadataBinding).where(DatasetMetadataBinding.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(bindings) == 0
|
||||
|
||||
# Check that process rules and queries were cleaned up
|
||||
process_rules = db_session_with_containers.query(DatasetProcessRule).filter_by(dataset_id=dataset.id).all()
|
||||
process_rules = db_session_with_containers.scalars(
|
||||
select(DatasetProcessRule).where(DatasetProcessRule.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(process_rules) == 0
|
||||
|
||||
queries = db_session_with_containers.query(DatasetQuery).filter_by(dataset_id=dataset.id).all()
|
||||
queries = db_session_with_containers.scalars(
|
||||
select(DatasetQuery).where(DatasetQuery.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(queries) == 0
|
||||
|
||||
# Check that app dataset joins were cleaned up
|
||||
app_joins = db_session_with_containers.query(AppDatasetJoin).filter_by(dataset_id=dataset.id).all()
|
||||
app_joins = db_session_with_containers.scalars(
|
||||
select(AppDatasetJoin).where(AppDatasetJoin.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(app_joins) == 0
|
||||
|
||||
# Verify index processor was called
|
||||
@ -414,24 +427,32 @@ class TestCleanDatasetTask:
|
||||
|
||||
# Verify results
|
||||
# Check that all documents were deleted
|
||||
remaining_documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_documents = db_session_with_containers.scalars(
|
||||
select(Document).where(Document.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_documents) == 0
|
||||
|
||||
# Check that all segments were deleted
|
||||
remaining_segments = db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_segments) == 0
|
||||
|
||||
# Check that all upload files were deleted
|
||||
remaining_files = db_session_with_containers.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).all()
|
||||
remaining_files = db_session_with_containers.scalars(
|
||||
select(UploadFile).where(UploadFile.id.in_(upload_file_ids))
|
||||
).all()
|
||||
assert len(remaining_files) == 0
|
||||
|
||||
# Check that metadata and bindings were cleaned up
|
||||
remaining_metadata = db_session_with_containers.query(DatasetMetadata).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_metadata = db_session_with_containers.scalars(
|
||||
select(DatasetMetadata).where(DatasetMetadata.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_metadata) == 0
|
||||
|
||||
remaining_bindings = (
|
||||
db_session_with_containers.query(DatasetMetadataBinding).filter_by(dataset_id=dataset.id).all()
|
||||
)
|
||||
remaining_bindings = db_session_with_containers.scalars(
|
||||
select(DatasetMetadataBinding).where(DatasetMetadataBinding.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_bindings) == 0
|
||||
|
||||
# Verify index processor was called
|
||||
@ -485,12 +506,14 @@ class TestCleanDatasetTask:
|
||||
|
||||
# Check that all data was cleaned up
|
||||
|
||||
remaining_documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_documents = db_session_with_containers.scalars(
|
||||
select(Document).where(Document.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_documents) == 0
|
||||
|
||||
remaining_segments = (
|
||||
db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
)
|
||||
remaining_segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_segments) == 0
|
||||
|
||||
# Recreate data for next test case
|
||||
@ -538,11 +561,15 @@ class TestCleanDatasetTask:
|
||||
# Verify results - even with vector cleanup failure, documents and segments should be deleted
|
||||
|
||||
# Check that documents were still deleted despite vector cleanup failure
|
||||
remaining_documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_documents = db_session_with_containers.scalars(
|
||||
select(Document).where(Document.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_documents) == 0
|
||||
|
||||
# Check that segments were still deleted despite vector cleanup failure
|
||||
remaining_segments = db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_segments) == 0
|
||||
|
||||
# Verify that index processor was called and failed
|
||||
@ -622,18 +649,22 @@ class TestCleanDatasetTask:
|
||||
|
||||
# Verify results
|
||||
# Check that all documents were deleted
|
||||
remaining_documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_documents = db_session_with_containers.scalars(
|
||||
select(Document).where(Document.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_documents) == 0
|
||||
|
||||
# Check that all segments were deleted
|
||||
remaining_segments = db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_segments) == 0
|
||||
|
||||
# Check that all image files were deleted from database
|
||||
image_file_ids = [f.id for f in image_files]
|
||||
remaining_image_files = (
|
||||
db_session_with_containers.query(UploadFile).where(UploadFile.id.in_(image_file_ids)).all()
|
||||
)
|
||||
remaining_image_files = db_session_with_containers.scalars(
|
||||
select(UploadFile).where(UploadFile.id.in_(image_file_ids))
|
||||
).all()
|
||||
assert len(remaining_image_files) == 0
|
||||
|
||||
# Verify that storage.delete was called for each image file
|
||||
@ -738,24 +769,32 @@ class TestCleanDatasetTask:
|
||||
|
||||
# Verify results
|
||||
# Check that all documents were deleted
|
||||
remaining_documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_documents = db_session_with_containers.scalars(
|
||||
select(Document).where(Document.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_documents) == 0
|
||||
|
||||
# Check that all segments were deleted
|
||||
remaining_segments = db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_segments) == 0
|
||||
|
||||
# Check that all upload files were deleted
|
||||
remaining_files = db_session_with_containers.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).all()
|
||||
remaining_files = db_session_with_containers.scalars(
|
||||
select(UploadFile).where(UploadFile.id.in_(upload_file_ids))
|
||||
).all()
|
||||
assert len(remaining_files) == 0
|
||||
|
||||
# Check that all metadata and bindings were deleted
|
||||
remaining_metadata = db_session_with_containers.query(DatasetMetadata).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_metadata = db_session_with_containers.scalars(
|
||||
select(DatasetMetadata).where(DatasetMetadata.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_metadata) == 0
|
||||
|
||||
remaining_bindings = (
|
||||
db_session_with_containers.query(DatasetMetadataBinding).filter_by(dataset_id=dataset.id).all()
|
||||
)
|
||||
remaining_bindings = db_session_with_containers.scalars(
|
||||
select(DatasetMetadataBinding).where(DatasetMetadataBinding.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_bindings) == 0
|
||||
|
||||
# Verify performance expectations
|
||||
@ -826,7 +865,9 @@ class TestCleanDatasetTask:
|
||||
# Check that upload file was still deleted from database despite storage failure
|
||||
# Note: When storage operations fail, the upload file may not be deleted
|
||||
# This demonstrates that the cleanup process continues even with storage errors
|
||||
remaining_files = db_session_with_containers.query(UploadFile).filter_by(id=upload_file.id).all()
|
||||
remaining_files = db_session_with_containers.scalars(
|
||||
select(UploadFile).where(UploadFile.id == upload_file.id)
|
||||
).all()
|
||||
# The upload file should still be deleted from the database even if storage cleanup fails
|
||||
# However, this depends on the specific implementation of clean_dataset_task
|
||||
if len(remaining_files) > 0:
|
||||
@ -976,19 +1017,27 @@ class TestCleanDatasetTask:
|
||||
|
||||
# Verify results
|
||||
# Check that all documents were deleted
|
||||
remaining_documents = db_session_with_containers.query(Document).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_documents = db_session_with_containers.scalars(
|
||||
select(Document).where(Document.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_documents) == 0
|
||||
|
||||
# Check that all segments were deleted
|
||||
remaining_segments = db_session_with_containers.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_segments = db_session_with_containers.scalars(
|
||||
select(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_segments) == 0
|
||||
|
||||
# Check that all upload files were deleted
|
||||
remaining_files = db_session_with_containers.query(UploadFile).filter_by(id=upload_file_id).all()
|
||||
remaining_files = db_session_with_containers.scalars(
|
||||
select(UploadFile).where(UploadFile.id == upload_file_id)
|
||||
).all()
|
||||
assert len(remaining_files) == 0
|
||||
|
||||
# Check that all metadata was deleted
|
||||
remaining_metadata = db_session_with_containers.query(DatasetMetadata).filter_by(dataset_id=dataset.id).all()
|
||||
remaining_metadata = db_session_with_containers.scalars(
|
||||
select(DatasetMetadata).where(DatasetMetadata.dataset_id == dataset.id)
|
||||
).all()
|
||||
assert len(remaining_metadata) == 0
|
||||
|
||||
# Verify that storage.delete was called
|
||||
|
||||
Loading…
Reference in New Issue
Block a user