diff --git a/api/services/metadata_service.py b/api/services/metadata_service.py index 3329ac349c..859fc1902b 100644 --- a/api/services/metadata_service.py +++ b/api/services/metadata_service.py @@ -220,8 +220,8 @@ class MetadataService: doc_metadata[BuiltInField.source] = MetadataDataSource[document.data_source_type] document.doc_metadata = doc_metadata db.session.add(document) - db.session.commit() - # deal metadata binding + + # deal metadata binding (in the same transaction as the doc_metadata update) if not operation.partial_update: db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete() @@ -247,7 +247,9 @@ class MetadataService: db.session.add(dataset_metadata_binding) db.session.commit() except Exception: + db.session.rollback() logger.exception("Update documents metadata failed") + raise finally: redis_client.delete(lock_key) diff --git a/api/tests/test_containers_integration_tests/services/test_metadata_service.py b/api/tests/test_containers_integration_tests/services/test_metadata_service.py index c8ced3f3a5..e04725627b 100644 --- a/api/tests/test_containers_integration_tests/services/test_metadata_service.py +++ b/api/tests/test_containers_integration_tests/services/test_metadata_service.py @@ -914,9 +914,6 @@ class TestMetadataService: metadata_args = MetadataArgs(type="string", name="test_metadata") metadata = MetadataService.create_metadata(dataset.id, metadata_args) - # Mock DocumentService.get_document to return None (document not found) - mock_external_service_dependencies["document_service"].get_document.return_value = None - # Create metadata operation data from services.entities.knowledge_entities.knowledge_entities import ( DocumentMetadataOperation, @@ -926,16 +923,17 @@ class TestMetadataService: metadata_detail = MetadataDetail(id=metadata.id, name=metadata.name, value="test_value") - operation = DocumentMetadataOperation(document_id="non-existent-document-id", metadata_list=[metadata_detail]) + # Use a valid UUID format that does not exist in the database + operation = DocumentMetadataOperation( + document_id="00000000-0000-0000-0000-000000000000", metadata_list=[metadata_detail] + ) operation_data = MetadataOperationData(operation_data=[operation]) - # Act: Execute the method under test - # The method should handle the error gracefully and continue - MetadataService.update_documents_metadata(dataset, operation_data) - - # Assert: Verify the method completes without raising exceptions - # The main functionality (error handling) is verified + # Act & Assert: The method should raise ValueError("Document not found.") + # because the exception is now re-raised after rollback + with pytest.raises(ValueError, match="Document not found"): + MetadataService.update_documents_metadata(dataset, operation_data) def test_knowledge_base_metadata_lock_check_dataset_id( self, db_session_with_containers, mock_external_service_dependencies diff --git a/api/tests/unit_tests/services/test_metadata_partial_update.py b/api/tests/unit_tests/services/test_metadata_partial_update.py index 00162c10e4..60252784bc 100644 --- a/api/tests/unit_tests/services/test_metadata_partial_update.py +++ b/api/tests/unit_tests/services/test_metadata_partial_update.py @@ -1,6 +1,8 @@ import unittest from unittest.mock import MagicMock, patch +import pytest + from models.dataset import Dataset, Document from services.entities.knowledge_entities.knowledge_entities import ( DocumentMetadataOperation, @@ -148,6 +150,38 @@ class TestMetadataPartialUpdate(unittest.TestCase): # If it were added, there would be 2 calls. If skipped, 1 call. assert mock_db.session.add.call_count == 1 + @patch("services.metadata_service.db") + @patch("services.metadata_service.DocumentService") + @patch("services.metadata_service.current_account_with_tenant") + @patch("services.metadata_service.redis_client") + def test_rollback_called_on_commit_failure(self, mock_redis, mock_current_account, mock_document_service, mock_db): + """When db.session.commit() raises, rollback must be called and the exception must propagate.""" + # Setup mocks + mock_redis.get.return_value = None + mock_document_service.get_document.return_value = self.document + mock_current_account.return_value = (MagicMock(id="user_id"), "tenant_id") + mock_db.session.query.return_value.filter_by.return_value.first.return_value = None + + # Make commit raise an exception + mock_db.session.commit.side_effect = RuntimeError("database connection lost") + + operation = DocumentMetadataOperation( + document_id="doc_id", + metadata_list=[MetadataDetail(id="meta_id", name="key", value="value")], + partial_update=True, + ) + metadata_args = MetadataOperationData(operation_data=[operation]) + + # Act & Assert: the exception must propagate + with pytest.raises(RuntimeError, match="database connection lost"): + MetadataService.update_documents_metadata(self.dataset, metadata_args) + + # Verify rollback was called + mock_db.session.rollback.assert_called_once() + + # Verify the lock key was cleaned up despite the failure + mock_redis.delete.assert_called_with("document_metadata_lock_doc_id") + if __name__ == "__main__": unittest.main() diff --git a/web/app/components/datasets/metadata/hooks/__tests__/use-batch-edit-document-metadata.spec.ts b/web/app/components/datasets/metadata/hooks/__tests__/use-batch-edit-document-metadata.spec.ts index bdcd2004d7..30ff2aa2aa 100644 --- a/web/app/components/datasets/metadata/hooks/__tests__/use-batch-edit-document-metadata.spec.ts +++ b/web/app/components/datasets/metadata/hooks/__tests__/use-batch-edit-document-metadata.spec.ts @@ -22,6 +22,7 @@ type MetadataItemWithEdit = { type: DataType value: string | number | null isMultipleValue?: boolean + isUpdated?: boolean updateType?: UpdateType } @@ -615,6 +616,91 @@ describe('useBatchEditDocumentMetadata', () => { }) }) + describe('toCleanMetadataItem sanitization', () => { + it('should strip extra fields (isMultipleValue, updateType, isUpdated) from metadata items sent to backend', async () => { + const docListSingleDoc: DocListItem[] = [ + { + id: 'doc-1', + doc_metadata: [ + { id: '1', name: 'field_one', type: DataType.string, value: 'Old Value' }, + ], + }, + ] + + const { result } = renderHook(() => + useBatchEditDocumentMetadata({ + ...defaultProps, + docList: docListSingleDoc as Parameters[0]['docList'], + }), + ) + + const editedList: MetadataItemWithEdit[] = [ + { + id: '1', + name: 'field_one', + type: DataType.string, + value: 'New Value', + isMultipleValue: true, + isUpdated: true, + updateType: UpdateType.changeValue, + }, + ] + + await act(async () => { + await result.current.handleSave(editedList, [], false) + }) + + const callArgs = mockMutateAsync.mock.calls[0][0] + const sentItem = callArgs.metadata_list[0].metadata_list[0] + + // Only id, name, type, value should be present + expect(Object.keys(sentItem).sort()).toEqual(['id', 'name', 'type', 'value'].sort()) + expect(sentItem).not.toHaveProperty('isMultipleValue') + expect(sentItem).not.toHaveProperty('updateType') + expect(sentItem).not.toHaveProperty('isUpdated') + }) + + it('should coerce undefined value to null in metadata items sent to backend', async () => { + const docListSingleDoc: DocListItem[] = [ + { + id: 'doc-1', + doc_metadata: [ + { id: '1', name: 'field_one', type: DataType.string, value: 'Value' }, + ], + }, + ] + + const { result } = renderHook(() => + useBatchEditDocumentMetadata({ + ...defaultProps, + docList: docListSingleDoc as Parameters[0]['docList'], + }), + ) + + // Pass an item with value explicitly set to undefined (via cast) + const editedList: MetadataItemWithEdit[] = [ + { + id: '1', + name: 'field_one', + type: DataType.string, + value: undefined as unknown as null, + updateType: UpdateType.changeValue, + }, + ] + + await act(async () => { + await result.current.handleSave(editedList, [], false) + }) + + const callArgs = mockMutateAsync.mock.calls[0][0] + const sentItem = callArgs.metadata_list[0].metadata_list[0] + + // value should be null, not undefined + expect(sentItem.value).toBeNull() + expect(sentItem.value).not.toBeUndefined() + }) + }) + describe('Edge Cases', () => { it('should handle empty docList', () => { const { result } = renderHook(() => diff --git a/web/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata.ts b/web/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata.ts index f3243ca6b6..84e6496859 100644 --- a/web/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata.ts +++ b/web/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata.ts @@ -71,6 +71,13 @@ const useBatchEditDocumentMetadata = ({ return res }, [metaDataList]) + const toCleanMetadataItem = (item: MetadataItemWithValue | MetadataItemWithEdit | MetadataItemInBatchEdit): MetadataItemWithValue => ({ + id: item.id, + name: item.name, + type: item.type, + value: item.value ?? null, + }) + const formateToBackendList = (editedList: MetadataItemWithEdit[], addedList: MetadataItemInBatchEdit[], isApplyToAllSelectDocument: boolean) => { const updatedList = editedList.filter((editedItem) => { return editedItem.updateType === UpdateType.changeValue @@ -92,24 +99,19 @@ const useBatchEditDocumentMetadata = ({ .filter((item) => { return !removedList.find(removedItem => removedItem.id === item.id) }) - .map(item => ({ - id: item.id, - name: item.name, - type: item.type, - value: item.value, - })) + .map(toCleanMetadataItem) if (isApplyToAllSelectDocument) { // add missing metadata item updatedList.forEach((editedItem) => { if (!newMetadataList.find(i => i.id === editedItem.id) && !editedItem.isMultipleValue) - newMetadataList.push(editedItem) + newMetadataList.push(toCleanMetadataItem(editedItem)) }) } newMetadataList = newMetadataList.map((item) => { const editedItem = updatedList.find(i => i.id === item.id) if (editedItem) - return editedItem + return toCleanMetadataItem(editedItem) return item })