mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 18:27:19 +08:00
fix: evaluation (#35728)
Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Yansong Zhang <916125788@qq.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: hj24 <mambahj24@gmail.com> Co-authored-by: hj24 <huangjian@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com> Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
parent
3373b63716
commit
22653b7464
@ -234,7 +234,7 @@ def get_evaluation_target(view_func: Callable[P, R]):
|
||||
return decorated_view
|
||||
|
||||
|
||||
def _load_evaluation_run_request_and_dataset(tenant_id: str) -> tuple[EvaluationRunRequest, bytes]:
|
||||
def _load_evaluation_run_request_and_dataset(tenant_id: str) -> tuple[EvaluationRunRequest, bytes, str]:
|
||||
"""Validate the run payload and load the uploaded dataset bytes."""
|
||||
body = request.get_json(force=True)
|
||||
if not body:
|
||||
@ -257,7 +257,7 @@ def _load_evaluation_run_request_and_dataset(tenant_id: str) -> tuple[Evaluation
|
||||
if not dataset_content:
|
||||
raise BadRequest("Dataset file is empty.")
|
||||
|
||||
return run_request, dataset_content
|
||||
return run_request, dataset_content, upload_file.name
|
||||
|
||||
|
||||
@console_ns.route("/<string:evaluate_target_type>/<uuid:evaluate_target_id>/dataset-template/download")
|
||||
@ -434,7 +434,7 @@ class EvaluationRunApi(Resource):
|
||||
- judgment_config: judgment conditions config (optional)
|
||||
"""
|
||||
current_account, current_tenant_id = current_account_with_tenant()
|
||||
run_request, dataset_content = _load_evaluation_run_request_and_dataset(current_tenant_id)
|
||||
run_request, dataset_content, dataset_filename = _load_evaluation_run_request_and_dataset(current_tenant_id)
|
||||
|
||||
try:
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
@ -446,6 +446,7 @@ class EvaluationRunApi(Resource):
|
||||
target_id=str(target.id),
|
||||
account_id=str(current_account.id),
|
||||
dataset_file_content=dataset_content,
|
||||
dataset_filename=dataset_filename,
|
||||
run_request=run_request,
|
||||
)
|
||||
else:
|
||||
@ -456,6 +457,7 @@ class EvaluationRunApi(Resource):
|
||||
target_id=str(target.id),
|
||||
account_id=str(current_account.id),
|
||||
dataset_file_content=dataset_content,
|
||||
dataset_filename=dataset_filename,
|
||||
run_request=run_request,
|
||||
)
|
||||
return _serialize_evaluation_run(evaluation_run), 200
|
||||
@ -483,7 +485,7 @@ class EvaluationRunRealApi(Resource):
|
||||
def post(self, target: Union[App, CustomizedSnippet, Dataset], target_type: str):
|
||||
"""Start the real evaluation execution flow on the temporary dev path."""
|
||||
current_account, current_tenant_id = current_account_with_tenant()
|
||||
run_request, dataset_content = _load_evaluation_run_request_and_dataset(current_tenant_id)
|
||||
run_request, dataset_content, dataset_filename = _load_evaluation_run_request_and_dataset(current_tenant_id)
|
||||
|
||||
try:
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
@ -494,6 +496,7 @@ class EvaluationRunRealApi(Resource):
|
||||
target_id=str(target.id),
|
||||
account_id=str(current_account.id),
|
||||
dataset_file_content=dataset_content,
|
||||
dataset_filename=dataset_filename,
|
||||
run_request=run_request,
|
||||
)
|
||||
return _serialize_evaluation_run(evaluation_run), 200
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import csv
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Union
|
||||
|
||||
@ -352,6 +353,7 @@ class EvaluationService:
|
||||
target_id: str,
|
||||
account_id: str,
|
||||
dataset_file_content: bytes,
|
||||
dataset_filename: str,
|
||||
run_request: EvaluationRunRequest,
|
||||
) -> EvaluationRun:
|
||||
"""Validate dataset, create run record, dispatch Celery task.
|
||||
@ -386,7 +388,7 @@ class EvaluationService:
|
||||
raise EvaluationMaxConcurrentRunsError(f"Maximum concurrent runs ({max_concurrent}) reached.")
|
||||
|
||||
# Parse dataset
|
||||
items = cls._parse_dataset(dataset_file_content)
|
||||
items = cls._parse_dataset(dataset_file_content, dataset_filename)
|
||||
max_rows = dify_config.EVALUATION_MAX_DATASET_ROWS
|
||||
if len(items) > max_rows:
|
||||
raise EvaluationDatasetInvalidError(f"Dataset has {len(items)} rows, max is {max_rows}.")
|
||||
@ -437,6 +439,7 @@ class EvaluationService:
|
||||
target_id: str,
|
||||
account_id: str,
|
||||
dataset_file_content: bytes,
|
||||
dataset_filename: str,
|
||||
run_request: EvaluationRunRequest,
|
||||
) -> EvaluationRun:
|
||||
"""Persist a completed synthetic run for frontend integration testing.
|
||||
@ -461,7 +464,7 @@ class EvaluationService:
|
||||
data=run_request,
|
||||
)
|
||||
|
||||
items = cls._parse_dataset(dataset_file_content)
|
||||
items = cls._parse_dataset(dataset_file_content, dataset_filename)
|
||||
max_rows = dify_config.EVALUATION_MAX_DATASET_ROWS
|
||||
if len(items) > max_rows:
|
||||
raise EvaluationDatasetInvalidError(f"Dataset has {len(items)} rows, max is {max_rows}.")
|
||||
@ -932,7 +935,15 @@ class EvaluationService:
|
||||
# ---- Dataset Parsing ----
|
||||
|
||||
@classmethod
|
||||
def _parse_dataset(cls, xlsx_content: bytes) -> list[EvaluationDatasetInput]:
|
||||
def _parse_dataset(cls, file_content: bytes, filename: str) -> list[EvaluationDatasetInput]:
|
||||
"""Parse evaluation dataset from CSV or XLSX content."""
|
||||
filename_lower = filename.lower()
|
||||
if filename_lower.endswith(".csv"):
|
||||
return cls._parse_csv_dataset(file_content)
|
||||
return cls._parse_xlsx_dataset(file_content)
|
||||
|
||||
@classmethod
|
||||
def _parse_xlsx_dataset(cls, xlsx_content: bytes) -> list[EvaluationDatasetInput]:
|
||||
"""Parse evaluation dataset from XLSX bytes."""
|
||||
wb = load_workbook(io.BytesIO(xlsx_content), read_only=True)
|
||||
ws = wb.active
|
||||
@ -979,6 +990,51 @@ class EvaluationService:
|
||||
wb.close()
|
||||
return items
|
||||
|
||||
@classmethod
|
||||
def _parse_csv_dataset(cls, csv_content: bytes) -> list[EvaluationDatasetInput]:
|
||||
"""Parse evaluation dataset from UTF-8 CSV bytes.
|
||||
|
||||
CSV follows the same schema as XLSX:
|
||||
the first column must be `index`, remaining columns become inputs,
|
||||
and `expected_output` is extracted into a dedicated field.
|
||||
"""
|
||||
try:
|
||||
decoded = csv_content.decode("utf-8-sig")
|
||||
except UnicodeDecodeError as e:
|
||||
raise EvaluationDatasetInvalidError("CSV file must be UTF-8 encoded.") from e
|
||||
|
||||
reader = csv.reader(io.StringIO(decoded))
|
||||
rows = list(reader)
|
||||
if len(rows) < 2:
|
||||
raise EvaluationDatasetInvalidError("Dataset must have at least a header row and one data row.")
|
||||
|
||||
headers = [str(h).strip() if h is not None else "" for h in rows[0]]
|
||||
if not headers or headers[0].lower() != "index":
|
||||
raise EvaluationDatasetInvalidError("First column header must be 'index'.")
|
||||
|
||||
input_headers = headers[1:]
|
||||
items: list[EvaluationDatasetInput] = []
|
||||
for row_idx, row in enumerate(rows[1:], start=1):
|
||||
values = list(row)
|
||||
if all(str(v).strip() == "" for v in values):
|
||||
continue
|
||||
|
||||
index_val = values[0] if values else row_idx
|
||||
try:
|
||||
index = int(str(index_val))
|
||||
except (TypeError, ValueError):
|
||||
index = row_idx
|
||||
|
||||
inputs: dict[str, Any] = {}
|
||||
for col_idx, header in enumerate(input_headers):
|
||||
val = values[col_idx + 1] if col_idx + 1 < len(values) else None
|
||||
inputs[header] = str(val) if val is not None else ""
|
||||
|
||||
expected_output = inputs.pop("expected_output", None)
|
||||
items.append(EvaluationDatasetInput(index=index, inputs=inputs, expected_output=expected_output))
|
||||
|
||||
return items
|
||||
|
||||
@classmethod
|
||||
def _build_stub_results(
|
||||
cls,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user