From 4041fd7e5cee0cbac0f97d65743a5f2bd30e30f4 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 20:10:37 +0800 Subject: [PATCH 1/9] fix(web): auto select model in evaluation --- .../components/judge-model-selector.tsx | 20 +------------------ .../components/layout/pipeline-evaluation.tsx | 1 - 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/web/app/components/evaluation/components/judge-model-selector.tsx b/web/app/components/evaluation/components/judge-model-selector.tsx index 8f9ee4aff6..ac954ed086 100644 --- a/web/app/components/evaluation/components/judge-model-selector.tsx +++ b/web/app/components/evaluation/components/judge-model-selector.tsx @@ -1,39 +1,21 @@ 'use client' import type { EvaluationResourceProps } from '../types' -import { useEffect } from 'react' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import { useModelList } from '@/app/components/header/account-setting/model-provider-page/hooks' import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' import { useEvaluationResource, useEvaluationStore } from '../store' import { decodeModelSelection, encodeModelSelection } from '../utils' -type JudgeModelSelectorProps = EvaluationResourceProps & { - autoSelectFirst?: boolean -} - const JudgeModelSelector = ({ resourceType, resourceId, - autoSelectFirst = true, -}: JudgeModelSelectorProps) => { +}: EvaluationResourceProps) => { const { data: modelList } = useModelList(ModelTypeEnum.textGeneration) const resource = useEvaluationResource(resourceType, resourceId) const setJudgeModel = useEvaluationStore(state => state.setJudgeModel) const selectedModel = decodeModelSelection(resource.judgeModelId) - useEffect(() => { - if (!autoSelectFirst || resource.judgeModelId || !modelList.length) - return - - const firstProvider = modelList[0] - const firstModel = firstProvider.models[0] - if (!firstProvider || !firstModel) - return - - setJudgeModel(resourceType, resourceId, encodeModelSelection(firstProvider.provider, firstModel.model)) - }, [autoSelectFirst, modelList, resource.judgeModelId, resourceId, resourceType, setJudgeModel]) - return ( From d92722e7ab06cb6779138b372e6a4cc264e4a5f6 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 20:16:42 +0800 Subject: [PATCH 2/9] fix(web): default metrics for dataset --- .../components/evaluation/__tests__/index.spec.tsx | 7 ++++--- .../pipeline/pipeline-metrics-section.tsx | 8 ++++---- web/contract/console/evaluation.ts | 7 ------- web/contract/router.ts | 2 -- web/service/use-evaluation.ts | 14 +++++++++++--- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index 3d1ae351d3..a3b0d2fcac 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -6,7 +6,7 @@ import ConditionsSection from '../components/conditions-section' import { useEvaluationStore } from '../store' const mockUpload = vi.hoisted(() => vi.fn()) -const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) +const mockUseDatasetEvaluationMetrics = vi.hoisted(() => vi.fn()) const mockUseDefaultEvaluationMetrics = vi.hoisted(() => vi.fn()) const mockUseEvaluationConfig = vi.hoisted(() => vi.fn()) const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn()) @@ -51,7 +51,7 @@ vi.mock('@/service/base', () => ({ vi.mock('@/service/use-evaluation', () => ({ useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args), - useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args), + useDatasetEvaluationMetrics: (...args: unknown[]) => mockUseDatasetEvaluationMetrics(...args), useDefaultEvaluationMetrics: (...args: unknown[]) => mockUseDefaultEvaluationMetrics(...args), useSaveEvaluationConfigMutation: (...args: unknown[]) => mockUseSaveEvaluationConfigMutation(...args), useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args), @@ -119,7 +119,7 @@ describe('Evaluation', () => { data: null, }) - mockUseAvailableEvaluationMetrics.mockReturnValue({ + mockUseDatasetEvaluationMetrics.mockReturnValue({ data: { metrics: ['answer-correctness', 'faithfulness', 'context-precision', 'context-recall', 'context-relevance'], }, @@ -582,6 +582,7 @@ describe('Evaluation', () => { it('should render the pipeline-specific layout without auto-selecting a judge model', () => { renderWithQueryClient() + expect(mockUseDatasetEvaluationMetrics).toHaveBeenCalledWith('dataset-1') expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty') expect(screen.getByText('evaluation.history.columns.time')).toBeInTheDocument() expect(screen.getByText('Context Precision')).toBeInTheDocument() diff --git a/web/app/components/evaluation/components/pipeline/pipeline-metrics-section.tsx b/web/app/components/evaluation/components/pipeline/pipeline-metrics-section.tsx index 553083f867..568b66b5a6 100644 --- a/web/app/components/evaluation/components/pipeline/pipeline-metrics-section.tsx +++ b/web/app/components/evaluation/components/pipeline/pipeline-metrics-section.tsx @@ -7,7 +7,7 @@ import { useEffect, useMemo } from 'react' import { useTranslation } from 'react-i18next' import { BlockEnum } from '@/app/components/workflow/types' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' -import { useAvailableEvaluationMetrics } from '@/service/use-evaluation' +import { useDatasetEvaluationMetrics } from '@/service/use-evaluation' import { usePublishedPipelineInfo } from '@/service/use-pipeline' import { useEvaluationResource, useEvaluationStore } from '../../store' import { buildMetricOption } from '../metric-selector/utils' @@ -49,7 +49,7 @@ const PipelineMetricsSection = ({ const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric) const removeMetric = useEvaluationStore(state => state.removeMetric) const updateMetricThreshold = useEvaluationStore(state => state.updateMetricThreshold) - const { data: availableMetricsData } = useAvailableEvaluationMetrics() + const { data: datasetMetricsData } = useDatasetEvaluationMetrics(resourceId) const { data: publishedPipeline } = usePublishedPipelineInfo(pipelineId || '') const resource = useEvaluationResource(resourceType, resourceId) const knowledgeIndexNodeInfoList = useMemo( @@ -63,12 +63,12 @@ const PipelineMetricsSection = ({ ), [resource.metrics]) const availableBuiltinMetrics = useMemo(() => { const metricIds = new Set([ - ...(availableMetricsData?.metrics ?? []), + ...(datasetMetricsData?.metrics ?? []), ...builtinMetricMap.keys(), ]) return Array.from(metricIds).map(metricId => buildMetricOption(metricId)) - }, [availableMetricsData?.metrics, builtinMetricMap]) + }, [datasetMetricsData?.metrics, builtinMetricMap]) useEffect(() => { if (!knowledgeIndexNodeInfoList.length) diff --git a/web/contract/console/evaluation.ts b/web/contract/console/evaluation.ts index 47580da438..ac961732f8 100644 --- a/web/contract/console/evaluation.ts +++ b/web/contract/console/evaluation.ts @@ -284,13 +284,6 @@ export const evaluationNodeInfoContract = base }>()) .output(type()) -export const availableEvaluationMetricsContract = base - .route({ - path: '/evaluation/available-metrics', - method: 'GET', - }) - .output(type()) - export const availableEvaluationWorkflowsContract = base .route({ path: '/workspaces/current/available-evaluation-workflows', diff --git a/web/contract/router.ts b/web/contract/router.ts index dc00a5cbbb..b02071ab28 100644 --- a/web/contract/router.ts +++ b/web/contract/router.ts @@ -3,7 +3,6 @@ import { accountAvatarContract } from './console/account' import { appDeleteContract, appWorkflowTypeConvertContract, workflowOnlineUsersContract } from './console/apps' import { bindPartnerStackContract, invoicesContract } from './console/billing' import { - availableEvaluationMetricsContract, availableEvaluationWorkflowsContract, cancelDatasetEvaluationRunContract, cancelEvaluationRunContract, @@ -148,7 +147,6 @@ export const consoleRouterContract = { metrics: evaluationMetricsContract, defaultMetrics: evaluationDefaultMetricsContract, nodeInfo: evaluationNodeInfoContract, - availableMetrics: availableEvaluationMetricsContract, availableWorkflows: availableEvaluationWorkflowsContract, associatedTargets: evaluationWorkflowAssociatedTargetsContract, file: evaluationFileContract, diff --git a/web/service/use-evaluation.ts b/web/service/use-evaluation.ts index cb3f46bf5b..84750417c3 100644 --- a/web/service/use-evaluation.ts +++ b/web/service/use-evaluation.ts @@ -54,9 +54,17 @@ export const useEvaluationConfig = ( return useQuery(getEvaluationConfigQueryOptions(resourceType, resourceId)) } -export const useAvailableEvaluationMetrics = (enabled = true) => { - return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({ - enabled, +export const useDatasetEvaluationMetrics = (datasetId: string, enabled = true) => { + return useQuery(consoleQuery.datasetEvaluation.metrics.queryOptions({ + input: datasetId + ? { + params: { + datasetId, + }, + } + : skipToken, + enabled: !!datasetId && enabled, + refetchOnWindowFocus: false, })) } From 25fc518c5d62e23c788c3ef43da10fddbb116433 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 20:28:41 +0800 Subject: [PATCH 3/9] fix(web): style of config --- .../components/pipeline/pipeline-metric-item.tsx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx b/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx index 4496f71edb..e357709c72 100644 --- a/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx +++ b/web/app/components/evaluation/components/pipeline/pipeline-metric-item.tsx @@ -30,7 +30,7 @@ const PipelineMetricItem = ({ const metricDescription = getTranslatedMetricDescription(t, metric.id, metric.description) return ( -
+
)}
From ad58895b25ac9337cdfdf4c88cb7f0d4adff6c61 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 20:30:43 +0800 Subject: [PATCH 4/9] fix(web): template of dataset evaluation template --- .../evaluation/components/pipeline/pipeline-batch-actions.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx b/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx index 123c654f8e..0051491d80 100644 --- a/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx +++ b/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx @@ -11,7 +11,7 @@ import { useInputFieldsActions } from '../batch-test-panel/input-fields/use-inpu const PIPELINE_INPUT_FIELDS: InputField[] = [ { name: 'query', type: 'string' }, - { name: 'Expect Results', type: 'string' }, + { name: 'expect_results', type: 'string' }, ] const PipelineBatchActions = ({ From 24b482893dd414631779668ccd75224e3673e2e9 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 20:36:07 +0800 Subject: [PATCH 5/9] fix(web): pipeline batch test template --- .../evaluation/__tests__/index.spec.tsx | 31 +++++++++++++++++-- .../input-fields/use-input-fields-actions.ts | 4 ++- .../pipeline/pipeline-batch-actions.tsx | 2 ++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index a3b0d2fcac..95e295903d 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -622,6 +622,33 @@ describe('Evaluation', () => { expect(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })).toBeEnabled() }) + it('should download the fixed pipeline template columns', () => { + const createElement = document.createElement.bind(document) + let downloadLink: HTMLAnchorElement | undefined + const createElementSpy = vi.spyOn(document, 'createElement').mockImplementation((tagName, options) => { + const element = createElement(tagName, options) + + if (tagName === 'a') { + downloadLink = element as HTMLAnchorElement + vi.spyOn(downloadLink, 'click').mockImplementation(() => {}) + } + + return element + }) + + renderWithQueryClient() + + fireEvent.click(screen.getByRole('button', { name: 'select-model' })) + fireEvent.click(screen.getByRole('button', { name: /Context Precision/i })) + fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.downloadTemplate' })) + + expect(downloadLink?.download).toBe('pipeline-evaluation-template.csv') + expect(decodeURIComponent(downloadLink?.href ?? '')).toContain('query,expect_results\n') + expect(decodeURIComponent(downloadLink?.href ?? '')).not.toContain('expected_output') + + createElementSpy.mockRestore() + }) + it('should upload and start a pipeline evaluation run', async () => { const startRun = vi.fn() mockUseStartEvaluationRunMutation.mockReturnValue({ @@ -640,14 +667,14 @@ describe('Evaluation', () => { fireEvent.click(screen.getByRole('button', { name: 'evaluation.pipeline.uploadAndRun' })) expect(screen.getAllByText('query').length).toBeGreaterThan(0) - expect(screen.getAllByText('Expect Results').length).toBeGreaterThan(0) + expect(screen.getAllByText('expect_results').length).toBeGreaterThan(0) const fileInput = document.querySelector('input[type="file"][accept=".csv"]') expect(fileInput).toBeInTheDocument() fireEvent.change(fileInput!, { target: { - files: [new File(['query,Expect Results'], 'pipeline-evaluation.csv', { type: 'text/csv' })], + files: [new File(['query,expect_results'], 'pipeline-evaluation.csv', { type: 'text/csv' })], }, }) diff --git a/web/app/components/evaluation/components/batch-test-panel/input-fields/use-input-fields-actions.ts b/web/app/components/evaluation/components/batch-test-panel/input-fields/use-input-fields-actions.ts index f390b9a9ad..c4b4b0182b 100644 --- a/web/app/components/evaluation/components/batch-test-panel/input-fields/use-input-fields-actions.ts +++ b/web/app/components/evaluation/components/batch-test-panel/input-fields/use-input-fields-actions.ts @@ -21,6 +21,7 @@ type UseInputFieldsActionsParams = EvaluationResourceProps & { isInputFieldsLoading: boolean isPanelReady: boolean isRunnable: boolean + templateContent?: string templateFileName: string } @@ -31,6 +32,7 @@ export const useInputFieldsActions = ({ isInputFieldsLoading, isPanelReady, isRunnable, + templateContent, templateFileName, }: UseInputFieldsActionsParams) => { const { t } = useTranslation('evaluation') @@ -79,7 +81,7 @@ export const useInputFieldsActions = ({ return } - const content = buildTemplateCsvContent(inputFields) + const content = templateContent ?? buildTemplateCsvContent(inputFields) const link = document.createElement('a') link.href = `data:text/csv;charset=utf-8,${encodeURIComponent(content)}` link.download = templateFileName diff --git a/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx b/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx index 0051491d80..474227c660 100644 --- a/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx +++ b/web/app/components/evaluation/components/pipeline/pipeline-batch-actions.tsx @@ -13,6 +13,7 @@ const PIPELINE_INPUT_FIELDS: InputField[] = [ { name: 'query', type: 'string' }, { name: 'expect_results', type: 'string' }, ] +const PIPELINE_TEMPLATE_CONTENT = 'query,expect_results\n' const PipelineBatchActions = ({ resourceType, @@ -29,6 +30,7 @@ const PipelineBatchActions = ({ isInputFieldsLoading: false, isPanelReady: isConfigReady, isRunnable, + templateContent: PIPELINE_TEMPLATE_CONTENT, templateFileName: EVALUATION_TEMPLATE_FILE_NAMES[resourceType], }) From c59a80a41f13dee64be67f3221c19aa58c4d1591 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 20:38:54 +0800 Subject: [PATCH 6/9] fix(web): test detail of dataset --- .../evaluation/components/pipeline/pipeline-results-panel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/app/components/evaluation/components/pipeline/pipeline-results-panel.tsx b/web/app/components/evaluation/components/pipeline/pipeline-results-panel.tsx index c66f3ecc2f..48050ad27c 100644 --- a/web/app/components/evaluation/components/pipeline/pipeline-results-panel.tsx +++ b/web/app/components/evaluation/components/pipeline/pipeline-results-panel.tsx @@ -57,7 +57,7 @@ const PipelineResultsPanel = ({ if (isEmpty) { return ( -
+