From 2a8bdc2373531937f03fddf9b77f5f7ada35ae81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=86=E8=90=8C=E9=97=B7=E6=B2=B9=E7=93=B6?= Date: Wed, 3 Jun 2026 15:10:55 +0800 Subject: [PATCH] fix: pydantic_core._pydantic_core.ValidationError: 2 validation errors for DatasetDetailResponse (#36753) --- api/models/dataset.py | 11 ++++++- .../console/datasets/test_datasets.py | 33 +++++++++++++++++++ .../unit_tests/models/test_dataset_models.py | 25 ++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/api/models/dataset.py b/api/models/dataset.py index 0f26752dd3..5c7a58d53f 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -322,6 +322,12 @@ class Dataset(Base): @property def retrieval_model_dict(self): + """Return a normalized retrieval model payload for API responses. + + Older rows may only persist a partial retrieval model dict. Merge the + stored value over the current defaults so response validation still sees + the required baseline fields. + """ default_retrieval_model = { "search_method": RetrievalMethod.SEMANTIC_SEARCH, "reranking_enable": False, @@ -329,7 +335,10 @@ class Dataset(Base): "top_k": 2, "score_threshold_enabled": False, } - return self.retrieval_model or default_retrieval_model + if not self.retrieval_model: + return default_retrieval_model + + return {**default_retrieval_model, **self.retrieval_model} @property def tags(self): diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py index d9f3270bd0..ff1a664515 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py @@ -294,6 +294,39 @@ class TestDatasetList: assert status == 200 assert resp["data"][0]["retrieval_model_dict"]["weights"]["weight_type"] is None + def test_get_merges_partial_retrieval_model_defaults(self, app: Flask): + api = DatasetListApi() + method = unwrap(api.get) + + current_user = self._mock_user() + datasets = [make_dataset(retrieval_model={"top_k": 4, "score_threshold_enabled": False})] + + with app.test_request_context("/datasets"): + with ( + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_datasets", + return_value=(datasets, 1), + ), + patch.object( + ProviderManager, + "get_configurations", + return_value=MagicMock(get_models=lambda **_: []), + ), + ): + resp, status = method(api) + + assert status == 200 + retrieval_model = resp["data"][0]["retrieval_model_dict"] + assert retrieval_model["search_method"] == "semantic_search" + assert retrieval_model["reranking_enable"] is False + assert retrieval_model["top_k"] == 4 + assert retrieval_model["score_threshold_enabled"] is False + def test_embedding_available_false(self, app: Flask): api = DatasetListApi() method = unwrap(api.get) diff --git a/api/tests/unit_tests/models/test_dataset_models.py b/api/tests/unit_tests/models/test_dataset_models.py index f4ccfb4191..44a4e6af98 100644 --- a/api/tests/unit_tests/models/test_dataset_models.py +++ b/api/tests/unit_tests/models/test_dataset_models.py @@ -219,6 +219,31 @@ class TestDatasetModelValidation: assert result["reranking_enable"] is False assert result["score_threshold_enabled"] is False + def test_dataset_retrieval_model_dict_property_merges_partial_values(self): + """Test retrieval_model_dict property fills in missing legacy keys.""" + # Arrange + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type=DataSourceType.UPLOAD_FILE, + created_by=str(uuid4()), + retrieval_model={ + "top_k": 4, + "score_threshold_enabled": True, + "score_threshold": 0.42, + }, + ) + + # Act + result = dataset.retrieval_model_dict + + # Assert + assert result["search_method"] == "semantic_search" + assert result["reranking_enable"] is False + assert result["top_k"] == 4 + assert result["score_threshold_enabled"] is True + assert result["score_threshold"] == 0.42 + def test_dataset_gen_collection_name_by_id(self): """Test static method for generating collection name.""" # Arrange