From b92b8becd1d4d68658c82895e13c5560ed81e753 Mon Sep 17 00:00:00 2001 From: JzoNg Date: Mon, 30 Mar 2026 15:39:52 +0800 Subject: [PATCH] feat(web): metric selector --- .../evaluation/__tests__/index.spec.tsx | 131 +++++++++-- .../evaluation/__tests__/store.spec.ts | 24 +++ .../evaluation/components/metric-section.tsx | 19 +- .../evaluation/components/metric-selector.tsx | 203 ------------------ .../components/metric-selector/index.tsx | 149 +++++++++++++ .../metric-selector/selector-empty-state.tsx | 26 +++ .../metric-selector/selector-footer.tsx | 30 +++ .../selector-metric-section.tsx | 135 ++++++++++++ .../components/metric-selector/types.ts | 18 ++ .../use-metric-selector-data.ts | 167 ++++++++++++++ .../components/metric-selector/utils.ts | 77 +++++++ web/app/components/evaluation/store-utils.ts | 9 +- web/app/components/evaluation/store.ts | 20 +- web/app/components/evaluation/types.ts | 3 + web/i18n/en-US/evaluation.json | 11 +- 15 files changed, 786 insertions(+), 236 deletions(-) delete mode 100644 web/app/components/evaluation/components/metric-selector.tsx create mode 100644 web/app/components/evaluation/components/metric-selector/index.tsx create mode 100644 web/app/components/evaluation/components/metric-selector/selector-empty-state.tsx create mode 100644 web/app/components/evaluation/components/metric-selector/selector-footer.tsx create mode 100644 web/app/components/evaluation/components/metric-selector/selector-metric-section.tsx create mode 100644 web/app/components/evaluation/components/metric-selector/types.ts create mode 100644 web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts create mode 100644 web/app/components/evaluation/components/metric-selector/utils.ts diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index 55edd6ceb2..8666c8b96d 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -3,6 +3,9 @@ import Evaluation from '..' import { getEvaluationMockConfig } from '../mock' import { useEvaluationStore } from '../store' +const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) +const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn()) + vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({ useModelList: () => ({ data: [{ @@ -20,12 +23,39 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec ), })) +vi.mock('@/service/use-evaluation', () => ({ + useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args), + useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args), +})) + describe('Evaluation', () => { beforeEach(() => { useEvaluationStore.setState({ resources: {} }) + vi.clearAllMocks() + + mockUseAvailableEvaluationMetrics.mockReturnValue({ + data: { + metrics: ['answer-correctness', 'faithfulness'], + }, + isLoading: false, + }) + + mockUseEvaluationNodeInfoMutation.mockReturnValue({ + isPending: false, + mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { + options?.onSuccess?.({ + 'answer-correctness': [ + { node_id: 'node-answer', title: 'Answer Node', type: 'llm' }, + ], + 'faithfulness': [ + { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' }, + ], + }) + }, + }) }) - it('should search, add metrics, and create a batch history record', async () => { + it('should search, select metric nodes, and create a batch history record', async () => { vi.useFakeTimers() render() @@ -33,32 +63,27 @@ describe('Evaluation', () => { expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini') fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) - expect(screen.getByTestId('evaluation-metric-loading')).toBeInTheDocument() - await act(async () => { - vi.advanceTimersByTime(200) - }) - - fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), { + fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), { target: { value: 'does-not-exist' }, }) - await act(async () => { - vi.advanceTimersByTime(200) - }) - expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument() - fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), { + fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), { target: { value: 'faith' }, }) - await act(async () => { - vi.advanceTimersByTime(200) + fireEvent.click(screen.getByTestId('evaluation-metric-node-faithfulness-node-faithfulness')) + expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0) + expect(screen.getAllByText('Retriever Node').length).toBeGreaterThan(0) + + fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), { + target: { value: '' }, }) - fireEvent.click(screen.getByRole('button', { name: /Faithfulness/i })) - expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0) + fireEvent.click(screen.getByTestId('evaluation-metric-node-answer-correctness-node-answer')) + expect(screen.getAllByText('Answer Correctness').length).toBeGreaterThan(0) fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' })) expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument() @@ -109,4 +134,78 @@ describe('Evaluation', () => { expect(screen.queryByText('evaluation.conditions.selectTime')).not.toBeInTheDocument() }) + + it('should render the metric no-node empty state', () => { + mockUseAvailableEvaluationMetrics.mockReturnValue({ + data: { + metrics: ['context-precision'], + }, + isLoading: false, + }) + + mockUseEvaluationNodeInfoMutation.mockReturnValue({ + isPending: false, + mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { + options?.onSuccess?.({ + 'context-precision': [], + }) + }, + }) + + render() + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) + + expect(screen.getByText('evaluation.metrics.noNodesInWorkflow')).toBeInTheDocument() + }) + + it('should render the global empty state when no metrics are available', () => { + mockUseAvailableEvaluationMetrics.mockReturnValue({ + data: { + metrics: [], + }, + isLoading: false, + }) + + render() + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) + + expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument() + }) + + it('should show more nodes when a metric has more than three nodes', () => { + mockUseAvailableEvaluationMetrics.mockReturnValue({ + data: { + metrics: ['answer-correctness'], + }, + isLoading: false, + }) + + mockUseEvaluationNodeInfoMutation.mockReturnValue({ + isPending: false, + mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { + options?.onSuccess?.({ + 'answer-correctness': [ + { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, + { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, + { node_id: 'node-3', title: 'LLM 3', type: 'llm' }, + { node_id: 'node-4', title: 'LLM 4', type: 'llm' }, + ], + }) + }, + }) + + render() + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) + + expect(screen.getByText('LLM 3')).toBeInTheDocument() + expect(screen.queryByText('LLM 4')).not.toBeInTheDocument() + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.showMore' })) + + expect(screen.getByText('LLM 4')).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'evaluation.metrics.showLess' })).toBeInTheDocument() + }) }) diff --git a/web/app/components/evaluation/__tests__/store.spec.ts b/web/app/components/evaluation/__tests__/store.spec.ts index d37952be61..83d9fd9de4 100644 --- a/web/app/components/evaluation/__tests__/store.spec.ts +++ b/web/app/components/evaluation/__tests__/store.spec.ts @@ -51,6 +51,30 @@ describe('evaluation store', () => { expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false) }) + it('should upsert builtin metric node selections', () => { + const resourceType = 'workflow' + const resourceId = 'app-4' + const store = useEvaluationStore.getState() + const config = getEvaluationMockConfig(resourceType) + const metricId = config.builtinMetrics[0].id + + store.ensureResource(resourceType, resourceId) + store.addBuiltinMetric(resourceType, resourceId, metricId, [ + { node_id: 'node-1', title: 'Answer Node', type: 'answer' }, + ]) + + store.addBuiltinMetric(resourceType, resourceId, metricId, [ + { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' }, + ]) + + const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId) + + expect(metric?.nodeInfoList).toEqual([ + { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' }, + ]) + expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1) + }) + it('should update condition groups and adapt operators to field types', () => { const resourceType = 'pipeline' const resourceId = 'dataset-1' diff --git a/web/app/components/evaluation/components/metric-section.tsx b/web/app/components/evaluation/components/metric-section.tsx index f72b214c27..704b432ca3 100644 --- a/web/app/components/evaluation/components/metric-section.tsx +++ b/web/app/components/evaluation/components/metric-section.tsx @@ -36,7 +36,7 @@ const MetricSection = ({ )} {resource.metrics.map(metric => ( -
+
{metric.label}
@@ -46,6 +46,22 @@ const MetricSection = ({ {badge} ))}
+ {metric.kind === 'builtin' && ( +
+
{t('metrics.nodesLabel')}
+
+ {metric.nodeInfoList?.length + ? metric.nodeInfoList.map(nodeInfo => ( + + {nodeInfo.title} + + )) + : ( + {t('metrics.nodesAll')} + )} +
+
+ )}
diff --git a/web/app/components/evaluation/components/metric-selector.tsx b/web/app/components/evaluation/components/metric-selector.tsx deleted file mode 100644 index 0ea9231b61..0000000000 --- a/web/app/components/evaluation/components/metric-selector.tsx +++ /dev/null @@ -1,203 +0,0 @@ -'use client' - -import type { ChangeEvent } from 'react' -import type { EvaluationResourceProps } from '../types' -import { useEffect, useMemo, useRef, useState } from 'react' -import { useTranslation } from 'react-i18next' -import Badge from '@/app/components/base/badge' -import Button from '@/app/components/base/button' -import Input from '@/app/components/base/input' -import { - Popover, - PopoverContent, - PopoverTrigger, -} from '@/app/components/base/ui/popover' -import { cn } from '@/utils/classnames' -import { getEvaluationMockConfig } from '../mock' -import { useEvaluationResource, useEvaluationStore } from '../store' - -type MetricSelectorProps = EvaluationResourceProps & { - triggerVariant?: 'primary' | 'warning' | 'secondary' | 'secondary-accent' | 'ghost' | 'ghost-accent' | 'tertiary' - triggerClassName?: string - triggerStyle?: 'button' | 'text' -} - -const MetricSelector = ({ - resourceType, - resourceId, - triggerVariant = 'secondary', - triggerClassName, - triggerStyle = 'button', -}: MetricSelectorProps) => { - const { t } = useTranslation('evaluation') - const config = getEvaluationMockConfig(resourceType) - const metricGroupLabels = { - quality: t('metrics.groups.quality'), - operations: t('metrics.groups.operations'), - } - const metrics = useEvaluationResource(resourceType, resourceId).metrics - const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric) - const addCustomMetric = useEvaluationStore(state => state.addCustomMetric) - const [open, setOpen] = useState(false) - const [query, setQuery] = useState('') - const [showAll, setShowAll] = useState(false) - const [isLoading, setIsLoading] = useState(false) - const loadingTimerRef = useRef(null) - - const triggerLoading = () => { - if (loadingTimerRef.current) - window.clearTimeout(loadingTimerRef.current) - - setIsLoading(true) - loadingTimerRef.current = window.setTimeout(() => { - setIsLoading(false) - }, 180) - } - - const handleOpenChange = (nextOpen: boolean) => { - setOpen(nextOpen) - - if (nextOpen) { - triggerLoading() - return - } - - if (loadingTimerRef.current) - window.clearTimeout(loadingTimerRef.current) - setIsLoading(false) - } - - const handleQueryChange = (event: ChangeEvent) => { - setQuery(event.target.value) - if (open) - triggerLoading() - } - - useEffect(() => { - return () => { - if (loadingTimerRef.current) - window.clearTimeout(loadingTimerRef.current) - } - }, []) - - const filteredGroups = useMemo(() => { - const filteredMetrics = config.builtinMetrics.filter((metric) => { - const keyword = query.trim().toLowerCase() - if (!keyword) - return true - - return metric.label.toLowerCase().includes(keyword) || metric.description.toLowerCase().includes(keyword) - }) - - const grouped = filteredMetrics.reduce>((acc, metric) => { - acc[metric.group] = [...(acc[metric.group] ?? []), metric] - return acc - }, {}) - - return Object.entries(grouped) - }, [config.builtinMetrics, query]) - - return ( - - -