From 12c3b2e0cdacf55f1caa4da7a26b109429f9cf90 Mon Sep 17 00:00:00 2001
From: JzoNg <jzongcode@gmail.com>
Date: Fri, 10 Apr 2026 11:26:12 +0800
Subject: [PATCH] feat(web): start run

---
 .../batch-test-panel/input-fields-tab.tsx     | 94 ++++++++++++++++--
 web/app/components/evaluation/store-utils.ts  | 97 +++++++++++++++++++
 web/app/components/evaluation/store.ts        | 16 +++
 web/app/components/evaluation/types.ts        |  1 +
 web/i18n/en-US/evaluation.json                |  5 +
 web/i18n/zh-Hans/evaluation.json              |  5 +
 web/service/use-evaluation.ts                 | 13 +++
 7 files changed, 222 insertions(+), 9 deletions(-)
diff --git a/web/app/components/evaluation/components/batch-test-panel/input-fields-tab.tsx b/web/app/components/evaluation/components/batch-test-panel/input-fields-tab.tsx
index 7e1b617b0e..1d0d4e210c 100644
--- a/web/app/components/evaluation/components/batch-test-panel/input-fields-tab.tsx
+++ b/web/app/components/evaluation/components/batch-test-panel/input-fields-tab.tsx
@@ -1,10 +1,15 @@
+import type { ChangeEvent } from 'react'
 import type { EvaluationResourceProps } from '../../types'
+import { useMutation } from '@tanstack/react-query'
 import { useRef } from 'react'
 import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
 import { toast } from '@/app/components/base/ui/toast'
+import { upload } from '@/service/base'
+import { useStartEvaluationRunMutation } from '@/service/use-evaluation'
 import { getEvaluationMockConfig } from '../../mock'
 import { useEvaluationResource, useEvaluationStore } from '../../store'
+import { buildEvaluationRunRequest } from '../../store-utils'
 
 type InputFieldsTabProps = EvaluationResourceProps & {
   isPanelReady: boolean
@@ -23,10 +28,38 @@ const InputFieldsTab = ({
     .filter(field => field.id.includes('.input.') || field.group.toLowerCase().includes('input'))
     .slice(0, 4)
   const displayedRequirementFields = requirementFields.length > 0 ? requirementFields : config.fieldOptions.slice(0, 4)
-  const uploadedFileName = useEvaluationResource(resourceType, resourceId).uploadedFileName
+  const resource = useEvaluationResource(resourceType, resourceId)
+  const uploadedFileId = resource.uploadedFileId
+  const uploadedFileName = resource.uploadedFileName
+  const setBatchTab = useEvaluationStore(state => state.setBatchTab)
+  const setUploadedFile = useEvaluationStore(state => state.setUploadedFile)
   const setUploadedFileName = useEvaluationStore(state => state.setUploadedFileName)
-  const runBatchTest = useEvaluationStore(state => state.runBatchTest)
+  const startRunMutation = useStartEvaluationRunMutation()
+  const uploadMutation = useMutation({
+    mutationFn: (file: File) => {
+      const formData = new FormData()
+      formData.append('file', file)
+
+      return upload({
+        xhr: new XMLHttpRequest(),
+        data: formData,
+      })
+    },
+    onSuccess: (uploadedFile) => {
+      setUploadedFile(resourceType, resourceId, {
+        id: uploadedFile.id,
+        name: typeof uploadedFile.name === 'string' ? uploadedFile.name : uploadedFileName ?? uploadedFile.id,
+      })
+    },
+    onError: () => {
+      setUploadedFile(resourceType, resourceId, null)
+      toast.error(t('batch.uploadError'))
+    },
+  })
   const fileInputRef = useRef<HTMLInputElement>(null)
+  const isFileUploading = uploadMutation.isPending
+  const isRunning = startRunMutation.isPending
+  const isRunDisabled = !isRunnable || !uploadedFileId || isFileUploading || isRunning
 
   const handleDownloadTemplate = () => {
     const content = ['case_id,input,expected', '1,Example input,Example output'].join('\n')
@@ -42,7 +75,51 @@ const InputFieldsTab = ({
       return
     }
 
-    runBatchTest(resourceType, resourceId)
+    if (isFileUploading) {
+      toast.warning(t('batch.uploading'))
+      return
+    }
+
+    if (!uploadedFileId) {
+      toast.warning(t('batch.fileRequired'))
+      return
+    }
+
+    const body = buildEvaluationRunRequest(resource, uploadedFileId)
+
+    if (!body) {
+      toast.warning(t('batch.validation'))
+      return
+    }
+
+    startRunMutation.mutate({
+      params: {
+        targetType: resourceType,
+        targetId: resourceId,
+      },
+      body,
+    }, {
+      onSuccess: () => {
+        toast.success(t('batch.runStarted'))
+        setBatchTab(resourceType, resourceId, 'history')
+      },
+      onError: () => {
+        toast.error(t('batch.runFailed'))
+      },
+    })
+  }
+
+  const handleFileChange = (event: ChangeEvent<HTMLInputElement>) => {
+    const file = event.target.files?.[0]
+    event.target.value = ''
+
+    if (!file) {
+      setUploadedFile(resourceType, resourceId, null)
+      return
+    }
+
+    setUploadedFileName(resourceType, resourceId, file.name)
+    uploadMutation.mutate(file)
   }
 
   return (
@@ -73,10 +150,7 @@ const InputFieldsTab = ({
           hidden
           type="file"
           accept=".csv,.xlsx"
-          onChange={(event) => {
-            const file = event.target.files?.[0]
-            setUploadedFileName(resourceType, resourceId, file?.name ?? null)
-          }}
+          onChange={handleFileChange}
         />
         {isPanelReady && (
           <button
@@ -86,7 +160,9 @@ const InputFieldsTab = ({
           >
             <span aria-hidden="true" className="i-ri-file-upload-line h-5 w-5 text-text-tertiary" />
             <div className="mt-2 system-sm-semibold text-text-primary">{t('batch.uploadTitle')}</div>
-            <div className="mt-1 system-xs-regular text-text-tertiary">{uploadedFileName ?? t('batch.uploadHint')}</div>
+            <div className="mt-1 system-xs-regular text-text-tertiary">
+              {isFileUploading ? t('batch.uploading') : uploadedFileName ?? t('batch.uploadHint')}
+            </div>
           </button>
         )}
       </div>
@@ -95,7 +171,7 @@ const InputFieldsTab = ({
           {t('batch.validation')}
         </div>
       )}
-      <Button className="w-full justify-center" variant="primary" disabled={!isRunnable} onClick={handleRun}>
+      <Button className="w-full justify-center" variant="primary" disabled={isRunDisabled} loading={isRunning} onClick={handleRun}>
         {t('batch.run')}
       </Button>
     </div>
diff --git a/web/app/components/evaluation/store-utils.ts b/web/app/components/evaluation/store-utils.ts
index 0f547e7488..abe5a59c51 100644
--- a/web/app/components/evaluation/store-utils.ts
+++ b/web/app/components/evaluation/store-utils.ts
@@ -16,11 +16,13 @@ import type {
   EvaluationJudgmentCondition,
   EvaluationJudgmentConditionValue,
   EvaluationJudgmentConfig,
+  EvaluationRunRequest,
   NodeInfo,
 } from '@/types/evaluation'
 import { getEvaluationMockConfig } from './mock'
 import {
   buildConditionMetricOptions,
+  decodeModelSelection,
   encodeModelSelection,
   getComparisonOperators,
   getDefaultComparisonOperator,
@@ -389,6 +391,7 @@ export const buildInitialState = (_resourceType: EvaluationResourceType): Evalua
     metrics: [],
     judgmentConfig: createEmptyJudgmentConfig(),
     activeBatchTab: 'input-fields',
+    uploadedFileId: null,
     uploadedFileName: null,
     batchRecords: [],
   }
@@ -412,6 +415,100 @@ export const buildStateFromEvaluationConfig = (
   }
 }
 
+const getApiComparisonOperator = (operator: ComparisonOperator) => {
+  if (operator === 'is null')
+    return 'null'
+
+  if (operator === 'is not null')
+    return 'not null'
+
+  return operator
+}
+
+const getCustomMetricScopeId = (metric: EvaluationMetric) => {
+  if (metric.kind !== 'custom-workflow')
+    return null
+
+  return metric.customConfig?.workflowAppId ?? metric.customConfig?.workflowId ?? null
+}
+
+const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationRunRequest['customized_metrics'] => {
+  const customMetric = metrics.find(metric => metric.kind === 'custom-workflow')
+  const customConfig = customMetric?.customConfig
+  const evaluationWorkflowId = customMetric ? getCustomMetricScopeId(customMetric) : null
+
+  if (!customConfig || !evaluationWorkflowId)
+    return null
+
+  return {
+    evaluation_workflow_id: evaluationWorkflowId,
+    input_fields: Object.fromEntries(
+      customConfig.mappings
+        .filter((mapping): mapping is CustomMetricMapping & { inputVariableId: string, outputVariableId: string } =>
+          !!mapping.inputVariableId && !!mapping.outputVariableId,
+        )
+        .map(mapping => [mapping.inputVariableId, mapping.outputVariableId]),
+    ),
+    output_fields: customConfig.outputs.map(output => ({
+      variable: output.id,
+      value_type: output.valueType ?? undefined,
+    })),
+  }
+}
+
+const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationRunRequest['judgment_config'] => {
+  const conditions = resource.judgmentConfig.conditions
+    .filter(condition => !!condition.variableSelector)
+    .map((condition) => {
+      const [scope, metricName] = condition.variableSelector!
+      const customMetric = resource.metrics.find(metric =>
+        metric.kind === 'custom-workflow'
+        && metric.customConfig?.workflowId === scope,
+      )
+
+      const customScopeId = customMetric ? getCustomMetricScopeId(customMetric) : null
+
+      return {
+        variable_selector: [customScopeId ?? scope, metricName],
+        comparison_operator: getApiComparisonOperator(condition.comparisonOperator),
+        ...(requiresComparisonValue(condition.comparisonOperator) ? { value: condition.value ?? undefined } : {}),
+      }
+    })
+
+  if (!conditions.length)
+    return null
+
+  return {
+    logical_operator: resource.judgmentConfig.logicalOperator,
+    conditions,
+  }
+}
+
+export const buildEvaluationRunRequest = (
+  resource: EvaluationResourceState,
+  fileId: string,
+): EvaluationRunRequest | null => {
+  const selectedModel = decodeModelSelection(resource.judgeModelId)
+
+  if (!selectedModel)
+    return null
+
+  return {
+    file_id: fileId,
+    evaluation_model: selectedModel.model,
+    evaluation_model_provider: selectedModel.provider,
+    default_metrics: resource.metrics
+      .filter(metric => metric.kind === 'builtin')
+      .map(metric => ({
+        metric: metric.optionId,
+        value_type: metric.valueType,
+        node_info_list: metric.nodeInfoList ?? [],
+      })),
+    customized_metrics: buildCustomizedMetricsPayload(resource.metrics),
+    judgment_config: buildJudgmentConfigPayload(resource),
+  }
+}
+
 const getResourceState = (
   resources: EvaluationStoreResources,
   resourceType: EvaluationResourceType,
diff --git a/web/app/components/evaluation/store.ts b/web/app/components/evaluation/store.ts
index cfc006b595..7db18e000b 100644
--- a/web/app/components/evaluation/store.ts
+++ b/web/app/components/evaluation/store.ts
@@ -76,6 +76,11 @@ type EvaluationStore = {
     value: string | string[] | boolean | null,
   ) => void
   setBatchTab: (resourceType: EvaluationResourceType, resourceId: string, tab: EvaluationResourceState['activeBatchTab']) => void
+  setUploadedFile: (
+    resourceType: EvaluationResourceType,
+    resourceId: string,
+    uploadedFile: { id: string, name: string } | null,
+  ) => void
   setUploadedFileName: (resourceType: EvaluationResourceType, resourceId: string, uploadedFileName: string | null) => void
   runBatchTest: (resourceType: EvaluationResourceType, resourceId: string) => void
 }
@@ -103,6 +108,7 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
         [buildResourceKey(resourceType, resourceId)]: {
           ...buildStateFromEvaluationConfig(resourceType, config),
           activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields',
+          uploadedFileId: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileId ?? null,
           uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null,
           batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [],
         },
@@ -369,10 +375,20 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
       })),
     }))
   },
+  setUploadedFile: (resourceType, resourceId, uploadedFile) => {
+    set(state => ({
+      resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
+        ...resource,
+        uploadedFileId: uploadedFile?.id ?? null,
+        uploadedFileName: uploadedFile?.name ?? null,
+      })),
+    }))
+  },
   setUploadedFileName: (resourceType, resourceId, uploadedFileName) => {
     set(state => ({
       resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
         ...resource,
+        uploadedFileId: null,
         uploadedFileName,
       })),
     }))
diff --git a/web/app/components/evaluation/types.ts b/web/app/components/evaluation/types.ts
index baa73f6c3f..9a3750e5c5 100644
--- a/web/app/components/evaluation/types.ts
+++ b/web/app/components/evaluation/types.ts
@@ -136,6 +136,7 @@ export type EvaluationResourceState = {
   metrics: EvaluationMetric[]
   judgmentConfig: JudgmentConfig
   activeBatchTab: BatchTestTab
+  uploadedFileId: string | null
   uploadedFileName: string | null
   batchRecords: BatchTestRecord[]
 }
diff --git a/web/i18n/en-US/evaluation.json b/web/i18n/en-US/evaluation.json
index 3558d5e9e2..ec1af7f7f1 100644
--- a/web/i18n/en-US/evaluation.json
+++ b/web/i18n/en-US/evaluation.json
@@ -2,19 +2,24 @@
   "batch.description": "Execute batch evaluations and track performance history.",
   "batch.downloadTemplate": "Download Excel Template",
   "batch.emptyHistory": "No test history yet.",
+  "batch.fileRequired": "Upload an evaluation dataset file before running the test.",
   "batch.noticeDescription": "Configuration incomplete. Select the Judge Model and Metrics on the left to generate your batch test template.",
   "batch.noticeTitle": "Quick start",
   "batch.requirementsDescription": "The input variables required to run this batch test. Ensure your uploaded dataset matches these fields.",
   "batch.requirementsTitle": "Data requirements",
   "batch.run": "Run Test",
+  "batch.runFailed": "Failed to start batch test.",
+  "batch.runStarted": "Batch test started.",
   "batch.status.failed": "Failed",
   "batch.status.running": "Running",
   "batch.status.success": "Success",
   "batch.tabs.history": "Test History",
   "batch.tabs.input-fields": "Input Fields",
   "batch.title": "Batch Test",
+  "batch.uploadError": "Failed to upload file.",
   "batch.uploadHint": "Select a .csv or .xlsx file",
   "batch.uploadTitle": "Upload test file",
+  "batch.uploading": "Uploading file...",
   "batch.validation": "Complete the judge model, metrics, and custom mappings before running a batch test.",
   "conditions.addCondition": "Add Condition",
   "conditions.addGroup": "Add Condition Group",
diff --git a/web/i18n/zh-Hans/evaluation.json b/web/i18n/zh-Hans/evaluation.json
index db167bf50d..f3157f0dec 100644
--- a/web/i18n/zh-Hans/evaluation.json
+++ b/web/i18n/zh-Hans/evaluation.json
@@ -2,19 +2,24 @@
   "batch.description": "执行批量评测并追踪性能历史。",
   "batch.downloadTemplate": "下载 Excel 模板",
   "batch.emptyHistory": "还没有测试历史。",
+  "batch.fileRequired": "请先上传评估数据集文件，再运行测试。",
   "batch.noticeDescription": "配置尚未完成。请先在左侧选择判定模型和指标，以生成批量测试模板。",
   "batch.noticeTitle": "快速开始",
   "batch.requirementsDescription": "运行此批量测试所需的输入变量。请确保上传的数据集包含这些字段。",
   "batch.requirementsTitle": "数据要求",
   "batch.run": "运行测试",
+  "batch.runFailed": "启动批量测试失败。",
+  "batch.runStarted": "批量测试已启动。",
   "batch.status.failed": "失败",
   "batch.status.running": "运行中",
   "batch.status.success": "成功",
   "batch.tabs.history": "测试历史",
   "batch.tabs.input-fields": "输入字段",
   "batch.title": "批量测试",
+  "batch.uploadError": "文件上传失败。",
   "batch.uploadHint": "选择 .csv 或 .xlsx 文件",
   "batch.uploadTitle": "上传测试文件",
+  "batch.uploading": "文件上传中...",
   "batch.validation": "运行批量测试前，请先完成判定模型、指标和自定义映射配置。",
   "conditions.addCondition": "添加条件",
   "conditions.addGroup": "添加条件组",
diff --git a/web/service/use-evaluation.ts b/web/service/use-evaluation.ts
index 5b68588f63..2406345fc2 100644
--- a/web/service/use-evaluation.ts
+++ b/web/service/use-evaluation.ts
@@ -5,6 +5,7 @@ import {
   useInfiniteQuery,
   useMutation,
   useQuery,
+  useQueryClient,
 } from '@tanstack/react-query'
 import { consoleClient, consoleQuery } from '@/service/client'
 
@@ -62,6 +63,18 @@ export const useEvaluationNodeInfoMutation = () => {
   return useMutation(consoleQuery.evaluation.nodeInfo.mutationOptions())
 }
 
+export const useStartEvaluationRunMutation = () => {
+  const queryClient = useQueryClient()
+
+  return useMutation(consoleQuery.evaluation.startRun.mutationOptions({
+    onSuccess: () => {
+      queryClient.invalidateQueries({
+        queryKey: consoleQuery.evaluation.logs.key(),
+      })
+    },
+  }))
+}
+
 export const useAvailableEvaluationWorkflows = (
   params: AvailableEvaluationWorkflowsParams = {},
   options?: { enabled?: boolean },