feat: evaluation (#35714)

Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Yansong Zhang <916125788@qq.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: hj24 <mambahj24@gmail.com>
Co-authored-by: hj24 <huangjian@dify.ai>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
FFXN 2026-04-30 10:37:32 +08:00 committed by GitHub
parent 88ac401af3
commit 12b00c546d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 90 additions and 2 deletions

View File

@ -11,7 +11,7 @@ from graphon.file import helpers as file_helpers
from graphon.graph_engine.manager import GraphEngineManager
from graphon.model_runtime.utils.encoders import jsonable_encoder
from pydantic import BaseModel, Field, ValidationError, field_validator
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import Session, sessionmaker
from werkzeug.exceptions import BadRequest, Forbidden, InternalServerError, NotFound
import services

View File

@ -1377,7 +1377,7 @@ class DatasetEvaluationMetricsApi(Resource):
raise Forbidden(str(e))
return {
"metrics": EvaluationService.get_supported_metrics(EvaluationCategory.KNOWLEDGE_BASE)
"metrics": EvaluationService.get_supported_metrics(EvaluationCategory.RETRIEVAL)
}

View File

@ -259,6 +259,57 @@ def test_restore_published_workflow_to_draft_returns_400_for_invalid_structure(
assert exc.value.description == "invalid workflow graph"
def test_publish_evaluation_workflow_success(app, monkeypatch: pytest.MonkeyPatch) -> None:
workflow = SimpleNamespace(created_at=datetime(2024, 1, 1), id="wf-1")
user = SimpleNamespace(id="account-1")
app_model = SimpleNamespace(id="app-1")
class _FakeSession:
def __enter__(self) -> "_FakeSession":
return self
def __exit__(self, exc_type, exc, tb) -> None:
return None
def get(self, model, app_id):
assert model is workflow_module.App
assert app_id == "app-1"
return SimpleNamespace(
workflow_id=None,
updated_by=None,
updated_at=None,
)
def commit(self) -> None:
return None
class _FakeSessionFactory:
def __call__(self, _engine) -> _FakeSession:
return _FakeSession()
monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (user, "tenant-1"))
monkeypatch.setattr(workflow_module, "Session", _FakeSessionFactory())
monkeypatch.setattr(workflow_module, "db", SimpleNamespace(engine=object()))
monkeypatch.setattr(
workflow_module,
"WorkflowService",
lambda: SimpleNamespace(publish_evaluation_workflow=lambda **_kwargs: workflow),
)
api = workflow_module.EvaluationPublishedWorkflowApi()
handler = _unwrap(api.post)
with app.test_request_context(
"/apps/app/workflows/publish/evaluation",
method="POST",
json={"marked_name": "v1", "marked_comment": "publish"},
):
response = handler(api, app_model=app_model)
assert response["result"] == "success"
assert response["created_at"] is not None
def test_get_published_workflows_marshals_items_before_session_closes(app, monkeypatch: pytest.MonkeyPatch) -> None:
api = workflow_module.PublishedAllWorkflowApi()
handler = _unwrap(api.get)

View File

@ -13,6 +13,7 @@ from controllers.console.datasets.datasets import (
DatasetApiDeleteApi,
DatasetApiKeyApi,
DatasetAutoDisableLogApi,
DatasetEvaluationMetricsApi,
DatasetEnableApiApi,
DatasetErrorDocs,
DatasetIndexingEstimateApi,
@ -27,11 +28,13 @@ from controllers.console.datasets.datasets import (
)
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.evaluation.entities.evaluation_entity import EvaluationCategory
from core.provider_manager import ProviderManager
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType
from models.enums import CreatorUserRole
from models.model import ApiToken, UploadFile
from services.evaluation_service import EvaluationService
from services.dataset_service import DatasetPermissionService, DatasetService
@ -1959,3 +1962,37 @@ class TestDatasetAutoDisableLogApi:
):
with pytest.raises(NotFound):
method(api, "dataset-1")
class TestDatasetEvaluationMetricsApi:
def test_get_uses_retrieval_metrics_category(self, app):
api = DatasetEvaluationMetricsApi()
method = unwrap(api.get)
dataset = MagicMock()
current_user = MagicMock()
with (
app.test_request_context("/"),
patch(
"controllers.console.datasets.datasets.current_account_with_tenant",
return_value=(current_user, "tenant-1"),
),
patch(
"controllers.console.datasets.datasets.DatasetService.get_dataset",
return_value=dataset,
),
patch(
"controllers.console.datasets.datasets.DatasetService.check_dataset_permission",
return_value=None,
),
patch.object(
EvaluationService,
"get_supported_metrics",
return_value=["context_precision", "context_recall"],
) as get_supported_metrics_mock,
):
response = method(api, "dataset-1")
get_supported_metrics_mock.assert_called_once_with(EvaluationCategory.RETRIEVAL)
assert response["metrics"] == ["context_precision", "context_recall"]