mirror of
https://github.com/langgenius/dify.git
synced 2026-05-11 14:58:23 +08:00
feat: evaluation (#35714)
Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Yansong Zhang <916125788@qq.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: hj24 <mambahj24@gmail.com> Co-authored-by: hj24 <huangjian@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com> Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
parent
88ac401af3
commit
12b00c546d
@ -11,7 +11,7 @@ from graphon.file import helpers as file_helpers
|
||||
from graphon.graph_engine.manager import GraphEngineManager
|
||||
from graphon.model_runtime.utils.encoders import jsonable_encoder
|
||||
from pydantic import BaseModel, Field, ValidationError, field_validator
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from werkzeug.exceptions import BadRequest, Forbidden, InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
|
||||
@ -1377,7 +1377,7 @@ class DatasetEvaluationMetricsApi(Resource):
|
||||
raise Forbidden(str(e))
|
||||
|
||||
return {
|
||||
"metrics": EvaluationService.get_supported_metrics(EvaluationCategory.KNOWLEDGE_BASE)
|
||||
"metrics": EvaluationService.get_supported_metrics(EvaluationCategory.RETRIEVAL)
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -259,6 +259,57 @@ def test_restore_published_workflow_to_draft_returns_400_for_invalid_structure(
|
||||
assert exc.value.description == "invalid workflow graph"
|
||||
|
||||
|
||||
def test_publish_evaluation_workflow_success(app, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
workflow = SimpleNamespace(created_at=datetime(2024, 1, 1), id="wf-1")
|
||||
user = SimpleNamespace(id="account-1")
|
||||
app_model = SimpleNamespace(id="app-1")
|
||||
|
||||
class _FakeSession:
|
||||
def __enter__(self) -> "_FakeSession":
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb) -> None:
|
||||
return None
|
||||
|
||||
def get(self, model, app_id):
|
||||
assert model is workflow_module.App
|
||||
assert app_id == "app-1"
|
||||
return SimpleNamespace(
|
||||
workflow_id=None,
|
||||
updated_by=None,
|
||||
updated_at=None,
|
||||
)
|
||||
|
||||
def commit(self) -> None:
|
||||
return None
|
||||
|
||||
class _FakeSessionFactory:
|
||||
def __call__(self, _engine) -> _FakeSession:
|
||||
return _FakeSession()
|
||||
|
||||
monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (user, "tenant-1"))
|
||||
monkeypatch.setattr(workflow_module, "Session", _FakeSessionFactory())
|
||||
monkeypatch.setattr(workflow_module, "db", SimpleNamespace(engine=object()))
|
||||
monkeypatch.setattr(
|
||||
workflow_module,
|
||||
"WorkflowService",
|
||||
lambda: SimpleNamespace(publish_evaluation_workflow=lambda **_kwargs: workflow),
|
||||
)
|
||||
|
||||
api = workflow_module.EvaluationPublishedWorkflowApi()
|
||||
handler = _unwrap(api.post)
|
||||
|
||||
with app.test_request_context(
|
||||
"/apps/app/workflows/publish/evaluation",
|
||||
method="POST",
|
||||
json={"marked_name": "v1", "marked_comment": "publish"},
|
||||
):
|
||||
response = handler(api, app_model=app_model)
|
||||
|
||||
assert response["result"] == "success"
|
||||
assert response["created_at"] is not None
|
||||
|
||||
|
||||
def test_get_published_workflows_marshals_items_before_session_closes(app, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
api = workflow_module.PublishedAllWorkflowApi()
|
||||
handler = _unwrap(api.get)
|
||||
|
||||
@ -13,6 +13,7 @@ from controllers.console.datasets.datasets import (
|
||||
DatasetApiDeleteApi,
|
||||
DatasetApiKeyApi,
|
||||
DatasetAutoDisableLogApi,
|
||||
DatasetEvaluationMetricsApi,
|
||||
DatasetEnableApiApi,
|
||||
DatasetErrorDocs,
|
||||
DatasetIndexingEstimateApi,
|
||||
@ -27,11 +28,13 @@ from controllers.console.datasets.datasets import (
|
||||
)
|
||||
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
|
||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||
from core.evaluation.entities.evaluation_entity import EvaluationCategory
|
||||
from core.provider_manager import ProviderManager
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from extensions.storage.storage_type import StorageType
|
||||
from models.enums import CreatorUserRole
|
||||
from models.model import ApiToken, UploadFile
|
||||
from services.evaluation_service import EvaluationService
|
||||
from services.dataset_service import DatasetPermissionService, DatasetService
|
||||
|
||||
|
||||
@ -1959,3 +1962,37 @@ class TestDatasetAutoDisableLogApi:
|
||||
):
|
||||
with pytest.raises(NotFound):
|
||||
method(api, "dataset-1")
|
||||
|
||||
|
||||
class TestDatasetEvaluationMetricsApi:
|
||||
def test_get_uses_retrieval_metrics_category(self, app):
|
||||
api = DatasetEvaluationMetricsApi()
|
||||
method = unwrap(api.get)
|
||||
|
||||
dataset = MagicMock()
|
||||
current_user = MagicMock()
|
||||
|
||||
with (
|
||||
app.test_request_context("/"),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets.current_account_with_tenant",
|
||||
return_value=(current_user, "tenant-1"),
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets.DatasetService.get_dataset",
|
||||
return_value=dataset,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets.DatasetService.check_dataset_permission",
|
||||
return_value=None,
|
||||
),
|
||||
patch.object(
|
||||
EvaluationService,
|
||||
"get_supported_metrics",
|
||||
return_value=["context_precision", "context_recall"],
|
||||
) as get_supported_metrics_mock,
|
||||
):
|
||||
response = method(api, "dataset-1")
|
||||
|
||||
get_supported_metrics_mock.assert_called_once_with(EvaluationCategory.RETRIEVAL)
|
||||
assert response["metrics"] == ["context_precision", "context_recall"]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user