fix: pydantic_core._pydantic_core.ValidationError: 2 validation errors for DatasetDetailResponse (#36753)

This commit is contained in:
呆萌闷油瓶 2026-06-03 15:10:55 +08:00 committed by GitHub
parent ee6a07d13c
commit 2a8bdc2373
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 68 additions and 1 deletions

View File

@ -322,6 +322,12 @@ class Dataset(Base):
@property
def retrieval_model_dict(self):
"""Return a normalized retrieval model payload for API responses.
Older rows may only persist a partial retrieval model dict. Merge the
stored value over the current defaults so response validation still sees
the required baseline fields.
"""
default_retrieval_model = {
"search_method": RetrievalMethod.SEMANTIC_SEARCH,
"reranking_enable": False,
@ -329,7 +335,10 @@ class Dataset(Base):
"top_k": 2,
"score_threshold_enabled": False,
}
return self.retrieval_model or default_retrieval_model
if not self.retrieval_model:
return default_retrieval_model
return {**default_retrieval_model, **self.retrieval_model}
@property
def tags(self):

View File

@ -294,6 +294,39 @@ class TestDatasetList:
assert status == 200
assert resp["data"][0]["retrieval_model_dict"]["weights"]["weight_type"] is None
def test_get_merges_partial_retrieval_model_defaults(self, app: Flask):
api = DatasetListApi()
method = unwrap(api.get)
current_user = self._mock_user()
datasets = [make_dataset(retrieval_model={"top_k": 4, "score_threshold_enabled": False})]
with app.test_request_context("/datasets"):
with (
patch(
"controllers.console.datasets.datasets.current_account_with_tenant",
return_value=(current_user, "tenant-1"),
),
patch.object(
DatasetService,
"get_datasets",
return_value=(datasets, 1),
),
patch.object(
ProviderManager,
"get_configurations",
return_value=MagicMock(get_models=lambda **_: []),
),
):
resp, status = method(api)
assert status == 200
retrieval_model = resp["data"][0]["retrieval_model_dict"]
assert retrieval_model["search_method"] == "semantic_search"
assert retrieval_model["reranking_enable"] is False
assert retrieval_model["top_k"] == 4
assert retrieval_model["score_threshold_enabled"] is False
def test_embedding_available_false(self, app: Flask):
api = DatasetListApi()
method = unwrap(api.get)

View File

@ -219,6 +219,31 @@ class TestDatasetModelValidation:
assert result["reranking_enable"] is False
assert result["score_threshold_enabled"] is False
def test_dataset_retrieval_model_dict_property_merges_partial_values(self):
"""Test retrieval_model_dict property fills in missing legacy keys."""
# Arrange
dataset = Dataset(
tenant_id=str(uuid4()),
name="Test Dataset",
data_source_type=DataSourceType.UPLOAD_FILE,
created_by=str(uuid4()),
retrieval_model={
"top_k": 4,
"score_threshold_enabled": True,
"score_threshold": 0.42,
},
)
# Act
result = dataset.retrieval_model_dict
# Assert
assert result["search_method"] == "semantic_search"
assert result["reranking_enable"] is False
assert result["top_k"] == 4
assert result["score_threshold_enabled"] is True
assert result["score_threshold"] == 0.42
def test_dataset_gen_collection_name_by_id(self):
"""Test static method for generating collection name."""
# Arrange