mirror of
https://github.com/langgenius/dify.git
synced 2026-04-27 11:06:46 +08:00
refactor: select in metadata_service (#34479)
This commit is contained in:
parent
894826771a
commit
d243de26ec
@ -1,6 +1,8 @@
|
|||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from sqlalchemy import delete, func, select
|
||||||
|
|
||||||
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
@ -25,10 +27,14 @@ class MetadataService:
|
|||||||
raise ValueError("Metadata name cannot exceed 255 characters.")
|
raise ValueError("Metadata name cannot exceed 255 characters.")
|
||||||
current_user, current_tenant_id = current_account_with_tenant()
|
current_user, current_tenant_id = current_account_with_tenant()
|
||||||
# check if metadata name already exists
|
# check if metadata name already exists
|
||||||
if (
|
if db.session.scalar(
|
||||||
db.session.query(DatasetMetadata)
|
select(DatasetMetadata)
|
||||||
.filter_by(tenant_id=current_tenant_id, dataset_id=dataset_id, name=metadata_args.name)
|
.where(
|
||||||
.first()
|
DatasetMetadata.tenant_id == current_tenant_id,
|
||||||
|
DatasetMetadata.dataset_id == dataset_id,
|
||||||
|
DatasetMetadata.name == metadata_args.name,
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
):
|
):
|
||||||
raise ValueError("Metadata name already exists.")
|
raise ValueError("Metadata name already exists.")
|
||||||
for field in BuiltInField:
|
for field in BuiltInField:
|
||||||
@ -54,10 +60,14 @@ class MetadataService:
|
|||||||
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
||||||
# check if metadata name already exists
|
# check if metadata name already exists
|
||||||
current_user, current_tenant_id = current_account_with_tenant()
|
current_user, current_tenant_id = current_account_with_tenant()
|
||||||
if (
|
if db.session.scalar(
|
||||||
db.session.query(DatasetMetadata)
|
select(DatasetMetadata)
|
||||||
.filter_by(tenant_id=current_tenant_id, dataset_id=dataset_id, name=name)
|
.where(
|
||||||
.first()
|
DatasetMetadata.tenant_id == current_tenant_id,
|
||||||
|
DatasetMetadata.dataset_id == dataset_id,
|
||||||
|
DatasetMetadata.name == name,
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
):
|
):
|
||||||
raise ValueError("Metadata name already exists.")
|
raise ValueError("Metadata name already exists.")
|
||||||
for field in BuiltInField:
|
for field in BuiltInField:
|
||||||
@ -65,7 +75,11 @@ class MetadataService:
|
|||||||
raise ValueError("Metadata name already exists in Built-in fields.")
|
raise ValueError("Metadata name already exists in Built-in fields.")
|
||||||
try:
|
try:
|
||||||
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
||||||
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id, dataset_id=dataset_id).first()
|
metadata = db.session.scalar(
|
||||||
|
select(DatasetMetadata)
|
||||||
|
.where(DatasetMetadata.id == metadata_id, DatasetMetadata.dataset_id == dataset_id)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
raise ValueError("Metadata not found.")
|
raise ValueError("Metadata not found.")
|
||||||
old_name = metadata.name
|
old_name = metadata.name
|
||||||
@ -74,9 +88,9 @@ class MetadataService:
|
|||||||
metadata.updated_at = naive_utc_now()
|
metadata.updated_at = naive_utc_now()
|
||||||
|
|
||||||
# update related documents
|
# update related documents
|
||||||
dataset_metadata_bindings = (
|
dataset_metadata_bindings = db.session.scalars(
|
||||||
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
|
select(DatasetMetadataBinding).where(DatasetMetadataBinding.metadata_id == metadata_id)
|
||||||
)
|
).all()
|
||||||
if dataset_metadata_bindings:
|
if dataset_metadata_bindings:
|
||||||
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
||||||
documents = DocumentService.get_document_by_ids(document_ids)
|
documents = DocumentService.get_document_by_ids(document_ids)
|
||||||
@ -101,15 +115,19 @@ class MetadataService:
|
|||||||
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
lock_key = f"dataset_metadata_lock_{dataset_id}"
|
||||||
try:
|
try:
|
||||||
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
||||||
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id, dataset_id=dataset_id).first()
|
metadata = db.session.scalar(
|
||||||
|
select(DatasetMetadata)
|
||||||
|
.where(DatasetMetadata.id == metadata_id, DatasetMetadata.dataset_id == dataset_id)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
raise ValueError("Metadata not found.")
|
raise ValueError("Metadata not found.")
|
||||||
db.session.delete(metadata)
|
db.session.delete(metadata)
|
||||||
|
|
||||||
# deal related documents
|
# deal related documents
|
||||||
dataset_metadata_bindings = (
|
dataset_metadata_bindings = db.session.scalars(
|
||||||
db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all()
|
select(DatasetMetadataBinding).where(DatasetMetadataBinding.metadata_id == metadata_id)
|
||||||
)
|
).all()
|
||||||
if dataset_metadata_bindings:
|
if dataset_metadata_bindings:
|
||||||
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
document_ids = [binding.document_id for binding in dataset_metadata_bindings]
|
||||||
documents = DocumentService.get_document_by_ids(document_ids)
|
documents = DocumentService.get_document_by_ids(document_ids)
|
||||||
@ -224,16 +242,23 @@ class MetadataService:
|
|||||||
|
|
||||||
# deal metadata binding (in the same transaction as the doc_metadata update)
|
# deal metadata binding (in the same transaction as the doc_metadata update)
|
||||||
if not operation.partial_update:
|
if not operation.partial_update:
|
||||||
db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete()
|
db.session.execute(
|
||||||
|
delete(DatasetMetadataBinding).where(
|
||||||
|
DatasetMetadataBinding.document_id == operation.document_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
current_user, current_tenant_id = current_account_with_tenant()
|
current_user, current_tenant_id = current_account_with_tenant()
|
||||||
for metadata_value in operation.metadata_list:
|
for metadata_value in operation.metadata_list:
|
||||||
# check if binding already exists
|
# check if binding already exists
|
||||||
if operation.partial_update:
|
if operation.partial_update:
|
||||||
existing_binding = (
|
existing_binding = db.session.scalar(
|
||||||
db.session.query(DatasetMetadataBinding)
|
select(DatasetMetadataBinding)
|
||||||
.filter_by(document_id=operation.document_id, metadata_id=metadata_value.id)
|
.where(
|
||||||
.first()
|
DatasetMetadataBinding.document_id == operation.document_id,
|
||||||
|
DatasetMetadataBinding.metadata_id == metadata_value.id,
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
)
|
)
|
||||||
if existing_binding:
|
if existing_binding:
|
||||||
continue
|
continue
|
||||||
@ -275,9 +300,13 @@ class MetadataService:
|
|||||||
"id": item.get("id"),
|
"id": item.get("id"),
|
||||||
"name": item.get("name"),
|
"name": item.get("name"),
|
||||||
"type": item.get("type"),
|
"type": item.get("type"),
|
||||||
"count": db.session.query(DatasetMetadataBinding)
|
"count": db.session.scalar(
|
||||||
.filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
|
select(func.count(DatasetMetadataBinding.id)).where(
|
||||||
.count(),
|
DatasetMetadataBinding.metadata_id == item.get("id"),
|
||||||
|
DatasetMetadataBinding.dataset_id == dataset.id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
or 0,
|
||||||
}
|
}
|
||||||
for item in dataset.doc_metadata or []
|
for item in dataset.doc_metadata or []
|
||||||
if item.get("id") != "built-in"
|
if item.get("id") != "built-in"
|
||||||
|
|||||||
@ -401,10 +401,7 @@ class TestMetadataServiceCreateMetadata:
|
|||||||
metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name="category", metadata_type="string")
|
metadata_args = MetadataTestDataFactory.create_metadata_args_mock(name="category", metadata_type="string")
|
||||||
|
|
||||||
# Mock query to return None (no existing metadata with same name)
|
# Mock query to return None (no existing metadata with same name)
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.return_value = None
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = None
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Mock BuiltInField enum iteration
|
# Mock BuiltInField enum iteration
|
||||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||||
@ -417,10 +414,6 @@ class TestMetadataServiceCreateMetadata:
|
|||||||
assert result is not None
|
assert result is not None
|
||||||
assert isinstance(result, DatasetMetadata)
|
assert isinstance(result, DatasetMetadata)
|
||||||
|
|
||||||
# Verify query was made to check for duplicates
|
|
||||||
mock_db_session.query.assert_called()
|
|
||||||
mock_query.filter_by.assert_called()
|
|
||||||
|
|
||||||
# Verify metadata was added and committed
|
# Verify metadata was added and committed
|
||||||
mock_db_session.add.assert_called_once()
|
mock_db_session.add.assert_called_once()
|
||||||
mock_db_session.commit.assert_called_once()
|
mock_db_session.commit.assert_called_once()
|
||||||
@ -468,10 +461,7 @@ class TestMetadataServiceCreateMetadata:
|
|||||||
|
|
||||||
# Mock existing metadata with same name
|
# Mock existing metadata with same name
|
||||||
existing_metadata = MetadataTestDataFactory.create_metadata_mock(name="category")
|
existing_metadata = MetadataTestDataFactory.create_metadata_mock(name="category")
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.return_value = existing_metadata
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = existing_metadata
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
with pytest.raises(ValueError, match="Metadata name already exists"):
|
with pytest.raises(ValueError, match="Metadata name already exists"):
|
||||||
@ -500,10 +490,7 @@ class TestMetadataServiceCreateMetadata:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Mock query to return None (no duplicate in database)
|
# Mock query to return None (no duplicate in database)
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.return_value = None
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = None
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Mock BuiltInField to include the conflicting name
|
# Mock BuiltInField to include the conflicting name
|
||||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||||
@ -597,27 +584,11 @@ class TestMetadataServiceUpdateMetadataName:
|
|||||||
|
|
||||||
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
|
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
|
||||||
|
|
||||||
# Mock query for duplicate check (no duplicate)
|
# Mock scalar calls: first for duplicate check (None), second for metadata retrieval
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.side_effect = [None, existing_metadata]
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = None
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Mock metadata retrieval
|
|
||||||
def query_side_effect(model):
|
|
||||||
if model == DatasetMetadata:
|
|
||||||
mock_meta_query = Mock()
|
|
||||||
mock_meta_query.filter_by.return_value = mock_meta_query
|
|
||||||
mock_meta_query.first.return_value = existing_metadata
|
|
||||||
return mock_meta_query
|
|
||||||
return mock_query
|
|
||||||
|
|
||||||
mock_db_session.query.side_effect = query_side_effect
|
|
||||||
|
|
||||||
# Mock no metadata bindings (no documents to update)
|
# Mock no metadata bindings (no documents to update)
|
||||||
mock_binding_query = Mock()
|
mock_db_session.scalars.return_value.all.return_value = []
|
||||||
mock_binding_query.filter_by.return_value = mock_binding_query
|
|
||||||
mock_binding_query.all.return_value = []
|
|
||||||
|
|
||||||
# Mock BuiltInField enum
|
# Mock BuiltInField enum
|
||||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||||
@ -655,22 +626,8 @@ class TestMetadataServiceUpdateMetadataName:
|
|||||||
metadata_id = "non-existent-metadata"
|
metadata_id = "non-existent-metadata"
|
||||||
new_name = "updated_category"
|
new_name = "updated_category"
|
||||||
|
|
||||||
# Mock query for duplicate check (no duplicate)
|
# Mock scalar calls: first for duplicate check (None), second for metadata retrieval (None = not found)
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.side_effect = [None, None]
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = None
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Mock metadata retrieval to return None
|
|
||||||
def query_side_effect(model):
|
|
||||||
if model == DatasetMetadata:
|
|
||||||
mock_meta_query = Mock()
|
|
||||||
mock_meta_query.filter_by.return_value = mock_meta_query
|
|
||||||
mock_meta_query.first.return_value = None # Not found
|
|
||||||
return mock_meta_query
|
|
||||||
return mock_query
|
|
||||||
|
|
||||||
mock_db_session.query.side_effect = query_side_effect
|
|
||||||
|
|
||||||
# Mock BuiltInField enum
|
# Mock BuiltInField enum
|
||||||
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
with patch("services.metadata_service.BuiltInField") as mock_builtin:
|
||||||
@ -746,15 +703,10 @@ class TestMetadataServiceDeleteMetadata:
|
|||||||
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
|
existing_metadata = MetadataTestDataFactory.create_metadata_mock(metadata_id=metadata_id, name="category")
|
||||||
|
|
||||||
# Mock metadata retrieval
|
# Mock metadata retrieval
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.return_value = existing_metadata
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = existing_metadata
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Mock no metadata bindings (no documents to update)
|
# Mock no metadata bindings (no documents to update)
|
||||||
mock_binding_query = Mock()
|
mock_db_session.scalars.return_value.all.return_value = []
|
||||||
mock_binding_query.filter_by.return_value = mock_binding_query
|
|
||||||
mock_binding_query.all.return_value = []
|
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
result = MetadataService.delete_metadata(dataset_id, metadata_id)
|
result = MetadataService.delete_metadata(dataset_id, metadata_id)
|
||||||
@ -788,10 +740,7 @@ class TestMetadataServiceDeleteMetadata:
|
|||||||
metadata_id = "non-existent-metadata"
|
metadata_id = "non-existent-metadata"
|
||||||
|
|
||||||
# Mock metadata retrieval to return None
|
# Mock metadata retrieval to return None
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.return_value = None
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.first.return_value = None
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
with pytest.raises(ValueError, match="Metadata not found"):
|
with pytest.raises(ValueError, match="Metadata not found"):
|
||||||
@ -1013,10 +962,7 @@ class TestMetadataServiceGetDatasetMetadatas:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Mock usage count queries
|
# Mock usage count queries
|
||||||
mock_query = Mock()
|
mock_db_session.scalar.return_value = 5 # 5 documents use this metadata
|
||||||
mock_query.filter_by.return_value = mock_query
|
|
||||||
mock_query.count.return_value = 5 # 5 documents use this metadata
|
|
||||||
mock_db_session.query.return_value = mock_query
|
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
result = MetadataService.get_dataset_metadatas(dataset)
|
result = MetadataService.get_dataset_metadatas(dataset)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user