From 12b00c546d41a18cf4294593196028cdf732d32e Mon Sep 17 00:00:00 2001 From: FFXN <31929997+FFXN@users.noreply.github.com> Date: Thu, 30 Apr 2026 10:37:32 +0800 Subject: [PATCH] feat: evaluation (#35714) Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Yansong Zhang <916125788@qq.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: hj24 Co-authored-by: hj24 Co-authored-by: Joel Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com> Co-authored-by: CodingOnStar Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- api/controllers/console/app/workflow.py | 2 +- api/controllers/console/datasets/datasets.py | 2 +- .../controllers/console/app/test_workflow.py | 51 +++++++++++++++++++ .../console/datasets/test_datasets.py | 37 ++++++++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index 16cad35f1c..598e41c1c3 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -11,7 +11,7 @@ from graphon.file import helpers as file_helpers from graphon.graph_engine.manager import GraphEngineManager from graphon.model_runtime.utils.encoders import jsonable_encoder from pydantic import BaseModel, Field, ValidationError, field_validator -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import Session, sessionmaker from werkzeug.exceptions import BadRequest, Forbidden, InternalServerError, NotFound import services diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index f1b900e7e6..25a7286b43 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -1377,7 +1377,7 @@ class DatasetEvaluationMetricsApi(Resource): raise Forbidden(str(e)) return { - "metrics": EvaluationService.get_supported_metrics(EvaluationCategory.KNOWLEDGE_BASE) + "metrics": EvaluationService.get_supported_metrics(EvaluationCategory.RETRIEVAL) } diff --git a/api/tests/unit_tests/controllers/console/app/test_workflow.py b/api/tests/unit_tests/controllers/console/app/test_workflow.py index e91c0a0597..8024c675e1 100644 --- a/api/tests/unit_tests/controllers/console/app/test_workflow.py +++ b/api/tests/unit_tests/controllers/console/app/test_workflow.py @@ -259,6 +259,57 @@ def test_restore_published_workflow_to_draft_returns_400_for_invalid_structure( assert exc.value.description == "invalid workflow graph" +def test_publish_evaluation_workflow_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + workflow = SimpleNamespace(created_at=datetime(2024, 1, 1), id="wf-1") + user = SimpleNamespace(id="account-1") + app_model = SimpleNamespace(id="app-1") + + class _FakeSession: + def __enter__(self) -> "_FakeSession": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + def get(self, model, app_id): + assert model is workflow_module.App + assert app_id == "app-1" + return SimpleNamespace( + workflow_id=None, + updated_by=None, + updated_at=None, + ) + + def commit(self) -> None: + return None + + class _FakeSessionFactory: + def __call__(self, _engine) -> _FakeSession: + return _FakeSession() + + monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (user, "tenant-1")) + monkeypatch.setattr(workflow_module, "Session", _FakeSessionFactory()) + monkeypatch.setattr(workflow_module, "db", SimpleNamespace(engine=object())) + monkeypatch.setattr( + workflow_module, + "WorkflowService", + lambda: SimpleNamespace(publish_evaluation_workflow=lambda **_kwargs: workflow), + ) + + api = workflow_module.EvaluationPublishedWorkflowApi() + handler = _unwrap(api.post) + + with app.test_request_context( + "/apps/app/workflows/publish/evaluation", + method="POST", + json={"marked_name": "v1", "marked_comment": "publish"}, + ): + response = handler(api, app_model=app_model) + + assert response["result"] == "success" + assert response["created_at"] is not None + + def test_get_published_workflows_marshals_items_before_session_closes(app, monkeypatch: pytest.MonkeyPatch) -> None: api = workflow_module.PublishedAllWorkflowApi() handler = _unwrap(api.get) diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py index 9465936f28..1e42b34634 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py @@ -13,6 +13,7 @@ from controllers.console.datasets.datasets import ( DatasetApiDeleteApi, DatasetApiKeyApi, DatasetAutoDisableLogApi, + DatasetEvaluationMetricsApi, DatasetEnableApiApi, DatasetErrorDocs, DatasetIndexingEstimateApi, @@ -27,11 +28,13 @@ from controllers.console.datasets.datasets import ( ) from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError +from core.evaluation.entities.evaluation_entity import EvaluationCategory from core.provider_manager import ProviderManager from core.rag.index_processor.constant.index_type import IndexStructureType from extensions.storage.storage_type import StorageType from models.enums import CreatorUserRole from models.model import ApiToken, UploadFile +from services.evaluation_service import EvaluationService from services.dataset_service import DatasetPermissionService, DatasetService @@ -1959,3 +1962,37 @@ class TestDatasetAutoDisableLogApi: ): with pytest.raises(NotFound): method(api, "dataset-1") + + +class TestDatasetEvaluationMetricsApi: + def test_get_uses_retrieval_metrics_category(self, app): + api = DatasetEvaluationMetricsApi() + method = unwrap(api.get) + + dataset = MagicMock() + current_user = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.check_dataset_permission", + return_value=None, + ), + patch.object( + EvaluationService, + "get_supported_metrics", + return_value=["context_precision", "context_recall"], + ) as get_supported_metrics_mock, + ): + response = method(api, "dataset-1") + + get_supported_metrics_mock.assert_called_once_with(EvaluationCategory.RETRIEVAL) + assert response["metrics"] == ["context_precision", "context_recall"]