diff --git a/api/controllers/console/evaluation/evaluation.py b/api/controllers/console/evaluation/evaluation.py index 54a34e88b9..e8262c6266 100644 --- a/api/controllers/console/evaluation/evaluation.py +++ b/api/controllers/console/evaluation/evaluation.py @@ -234,7 +234,7 @@ def get_evaluation_target(view_func: Callable[P, R]): return decorated_view -def _load_evaluation_run_request_and_dataset(tenant_id: str) -> tuple[EvaluationRunRequest, bytes]: +def _load_evaluation_run_request_and_dataset(tenant_id: str) -> tuple[EvaluationRunRequest, bytes, str]: """Validate the run payload and load the uploaded dataset bytes.""" body = request.get_json(force=True) if not body: @@ -257,7 +257,7 @@ def _load_evaluation_run_request_and_dataset(tenant_id: str) -> tuple[Evaluation if not dataset_content: raise BadRequest("Dataset file is empty.") - return run_request, dataset_content + return run_request, dataset_content, upload_file.name @console_ns.route("///dataset-template/download") @@ -434,7 +434,7 @@ class EvaluationRunApi(Resource): - judgment_config: judgment conditions config (optional) """ current_account, current_tenant_id = current_account_with_tenant() - run_request, dataset_content = _load_evaluation_run_request_and_dataset(current_tenant_id) + run_request, dataset_content, dataset_filename = _load_evaluation_run_request_and_dataset(current_tenant_id) try: with Session(db.engine, expire_on_commit=False) as session: @@ -446,6 +446,7 @@ class EvaluationRunApi(Resource): target_id=str(target.id), account_id=str(current_account.id), dataset_file_content=dataset_content, + dataset_filename=dataset_filename, run_request=run_request, ) else: @@ -456,6 +457,7 @@ class EvaluationRunApi(Resource): target_id=str(target.id), account_id=str(current_account.id), dataset_file_content=dataset_content, + dataset_filename=dataset_filename, run_request=run_request, ) return _serialize_evaluation_run(evaluation_run), 200 @@ -483,7 +485,7 @@ class EvaluationRunRealApi(Resource): def post(self, target: Union[App, CustomizedSnippet, Dataset], target_type: str): """Start the real evaluation execution flow on the temporary dev path.""" current_account, current_tenant_id = current_account_with_tenant() - run_request, dataset_content = _load_evaluation_run_request_and_dataset(current_tenant_id) + run_request, dataset_content, dataset_filename = _load_evaluation_run_request_and_dataset(current_tenant_id) try: with Session(db.engine, expire_on_commit=False) as session: @@ -494,6 +496,7 @@ class EvaluationRunRealApi(Resource): target_id=str(target.id), account_id=str(current_account.id), dataset_file_content=dataset_content, + dataset_filename=dataset_filename, run_request=run_request, ) return _serialize_evaluation_run(evaluation_run), 200 diff --git a/api/services/evaluation_service.py b/api/services/evaluation_service.py index c6fad3b90c..52b6582eca 100644 --- a/api/services/evaluation_service.py +++ b/api/services/evaluation_service.py @@ -1,6 +1,7 @@ import io import json import logging +import csv from collections.abc import Mapping from typing import Any, Union @@ -352,6 +353,7 @@ class EvaluationService: target_id: str, account_id: str, dataset_file_content: bytes, + dataset_filename: str, run_request: EvaluationRunRequest, ) -> EvaluationRun: """Validate dataset, create run record, dispatch Celery task. @@ -386,7 +388,7 @@ class EvaluationService: raise EvaluationMaxConcurrentRunsError(f"Maximum concurrent runs ({max_concurrent}) reached.") # Parse dataset - items = cls._parse_dataset(dataset_file_content) + items = cls._parse_dataset(dataset_file_content, dataset_filename) max_rows = dify_config.EVALUATION_MAX_DATASET_ROWS if len(items) > max_rows: raise EvaluationDatasetInvalidError(f"Dataset has {len(items)} rows, max is {max_rows}.") @@ -437,6 +439,7 @@ class EvaluationService: target_id: str, account_id: str, dataset_file_content: bytes, + dataset_filename: str, run_request: EvaluationRunRequest, ) -> EvaluationRun: """Persist a completed synthetic run for frontend integration testing. @@ -461,7 +464,7 @@ class EvaluationService: data=run_request, ) - items = cls._parse_dataset(dataset_file_content) + items = cls._parse_dataset(dataset_file_content, dataset_filename) max_rows = dify_config.EVALUATION_MAX_DATASET_ROWS if len(items) > max_rows: raise EvaluationDatasetInvalidError(f"Dataset has {len(items)} rows, max is {max_rows}.") @@ -932,7 +935,15 @@ class EvaluationService: # ---- Dataset Parsing ---- @classmethod - def _parse_dataset(cls, xlsx_content: bytes) -> list[EvaluationDatasetInput]: + def _parse_dataset(cls, file_content: bytes, filename: str) -> list[EvaluationDatasetInput]: + """Parse evaluation dataset from CSV or XLSX content.""" + filename_lower = filename.lower() + if filename_lower.endswith(".csv"): + return cls._parse_csv_dataset(file_content) + return cls._parse_xlsx_dataset(file_content) + + @classmethod + def _parse_xlsx_dataset(cls, xlsx_content: bytes) -> list[EvaluationDatasetInput]: """Parse evaluation dataset from XLSX bytes.""" wb = load_workbook(io.BytesIO(xlsx_content), read_only=True) ws = wb.active @@ -979,6 +990,51 @@ class EvaluationService: wb.close() return items + @classmethod + def _parse_csv_dataset(cls, csv_content: bytes) -> list[EvaluationDatasetInput]: + """Parse evaluation dataset from UTF-8 CSV bytes. + + CSV follows the same schema as XLSX: + the first column must be `index`, remaining columns become inputs, + and `expected_output` is extracted into a dedicated field. + """ + try: + decoded = csv_content.decode("utf-8-sig") + except UnicodeDecodeError as e: + raise EvaluationDatasetInvalidError("CSV file must be UTF-8 encoded.") from e + + reader = csv.reader(io.StringIO(decoded)) + rows = list(reader) + if len(rows) < 2: + raise EvaluationDatasetInvalidError("Dataset must have at least a header row and one data row.") + + headers = [str(h).strip() if h is not None else "" for h in rows[0]] + if not headers or headers[0].lower() != "index": + raise EvaluationDatasetInvalidError("First column header must be 'index'.") + + input_headers = headers[1:] + items: list[EvaluationDatasetInput] = [] + for row_idx, row in enumerate(rows[1:], start=1): + values = list(row) + if all(str(v).strip() == "" for v in values): + continue + + index_val = values[0] if values else row_idx + try: + index = int(str(index_val)) + except (TypeError, ValueError): + index = row_idx + + inputs: dict[str, Any] = {} + for col_idx, header in enumerate(input_headers): + val = values[col_idx + 1] if col_idx + 1 < len(values) else None + inputs[header] = str(val) if val is not None else "" + + expected_output = inputs.pop("expected_output", None) + items.append(EvaluationDatasetInput(index=index, inputs=inputs, expected_output=expected_output)) + + return items + @classmethod def _build_stub_results( cls,