From 2607eb8d32ecf94c4321beb5514351114e6d79de Mon Sep 17 00:00:00 2001 From: JzoNg Date: Wed, 29 Apr 2026 13:56:33 +0800 Subject: [PATCH] feat(web): default metrics --- .../evaluation/__tests__/index.spec.tsx | 101 ++++++++++-------- .../layout/non-pipeline-evaluation.tsx | 4 +- .../metric-section/__tests__/index.spec.tsx | 73 +++++++------ .../components/metric-section/index.tsx | 58 ++-------- .../components/metric-selector/index.tsx | 3 - .../components/metric-selector/types.ts | 4 +- .../use-metric-selector-data.ts | 85 +++++---------- .../components/metric-selector/utils.ts | 41 +++++-- web/app/components/evaluation/types.ts | 6 ++ web/contract/console/evaluation.ts | 15 +++ web/contract/router.ts | 2 + web/service/use-evaluation.ts | 19 +++- web/types/evaluation.ts | 5 + 13 files changed, 213 insertions(+), 203 deletions(-) diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index 4a8d409d9a..c060b65e6f 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -7,8 +7,8 @@ import { useEvaluationStore } from '../store' const mockUpload = vi.hoisted(() => vi.fn()) const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) +const mockUseDefaultEvaluationMetrics = vi.hoisted(() => vi.fn()) const mockUseEvaluationConfig = vi.hoisted(() => vi.fn()) -const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn()) const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn()) const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn()) const mockUsePublishedPipelineInfo = vi.hoisted(() => vi.fn()) @@ -51,7 +51,7 @@ vi.mock('@/service/base', () => ({ vi.mock('@/service/use-evaluation', () => ({ useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args), useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args), - useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args), + useDefaultEvaluationMetrics: (...args: unknown[]) => mockUseDefaultEvaluationMetrics(...args), useSaveEvaluationConfigMutation: (...args: unknown[]) => mockUseSaveEvaluationConfigMutation(...args), useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args), })) @@ -141,18 +141,41 @@ describe('Evaluation', () => { isLoading: false, }) - mockUseEvaluationNodeInfoMutation.mockReturnValue({ - isPending: false, - mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { - options?.onSuccess?.({ - 'answer-correctness': [ - { node_id: 'node-answer', title: 'Answer Node', type: 'llm' }, - ], - 'faithfulness': [ - { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' }, - ], - }) + mockUseDefaultEvaluationMetrics.mockReturnValue({ + data: { + default_metrics: [ + { + metric: 'answer-correctness', + value_type: 'number', + node_info_list: [ + { node_id: 'node-answer', title: 'Answer Node', type: 'llm' }, + ], + }, + { + metric: 'faithfulness', + value_type: 'number', + node_info_list: [ + { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' }, + ], + }, + { + metric: 'context-precision', + value_type: 'number', + node_info_list: [], + }, + { + metric: 'context-recall', + value_type: 'number', + node_info_list: [], + }, + { + metric: 'context-relevance', + value_type: 'number', + node_info_list: [], + }, + ], }, + isLoading: false, }) mockUseSaveEvaluationConfigMutation.mockReturnValue({ isPending: false, @@ -361,22 +384,19 @@ describe('Evaluation', () => { }) it('should render the metric no-node empty state', () => { - mockUseAvailableEvaluationMetrics.mockReturnValue({ + mockUseDefaultEvaluationMetrics.mockReturnValue({ data: { - metrics: ['context-precision'], + default_metrics: [ + { + metric: 'context-precision', + value_type: 'number', + node_info_list: [], + }, + ], }, isLoading: false, }) - mockUseEvaluationNodeInfoMutation.mockReturnValue({ - isPending: false, - mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { - options?.onSuccess?.({ - 'context-precision': [], - }) - }, - }) - renderWithQueryClient() fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) @@ -385,9 +405,9 @@ describe('Evaluation', () => { }) it('should render the global empty state when no metrics are available', () => { - mockUseAvailableEvaluationMetrics.mockReturnValue({ + mockUseDefaultEvaluationMetrics.mockReturnValue({ data: { - metrics: [], + default_metrics: [], }, isLoading: false, }) @@ -400,27 +420,24 @@ describe('Evaluation', () => { }) it('should show more nodes when a metric has more than three nodes', () => { - mockUseAvailableEvaluationMetrics.mockReturnValue({ + mockUseDefaultEvaluationMetrics.mockReturnValue({ data: { - metrics: ['answer-correctness'], + default_metrics: [ + { + metric: 'answer-correctness', + value_type: 'number', + node_info_list: [ + { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, + { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, + { node_id: 'node-3', title: 'LLM 3', type: 'llm' }, + { node_id: 'node-4', title: 'LLM 4', type: 'llm' }, + ], + }, + ], }, isLoading: false, }) - mockUseEvaluationNodeInfoMutation.mockReturnValue({ - isPending: false, - mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { - options?.onSuccess?.({ - 'answer-correctness': [ - { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, - { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, - { node_id: 'node-3', title: 'LLM 3', type: 'llm' }, - { node_id: 'node-4', title: 'LLM 4', type: 'llm' }, - ], - }) - }, - }) - renderWithQueryClient() fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) diff --git a/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx index 5c6acf289d..69444beaa9 100644 --- a/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx +++ b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx @@ -1,6 +1,6 @@ 'use client' -import type { EvaluationResourceProps } from '../../types' +import type { NonPipelineEvaluationResourceProps } from '../../types' import { useTranslation } from 'react-i18next' import { useDocLink } from '@/context/i18n' import BatchTestPanel from '../batch-test-panel' @@ -13,7 +13,7 @@ import SectionHeader, { InlineSectionHeader } from '../section-header' const NonPipelineEvaluation = ({ resourceType, resourceId, -}: EvaluationResourceProps) => { +}: NonPipelineEvaluationResourceProps) => { const { t } = useTranslation('evaluation') const { t: tCommon } = useTranslation('common') const docLink = useDocLink() diff --git a/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx b/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx index a234635c50..6b240966bf 100644 --- a/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx @@ -4,13 +4,11 @@ import MetricSection from '..' import { useEvaluationStore } from '../../../store' const mockUseAvailableEvaluationWorkflows = vi.hoisted(() => vi.fn()) -const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) -const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn()) +const mockUseDefaultEvaluationMetrics = vi.hoisted(() => vi.fn()) vi.mock('@/service/use-evaluation', () => ({ useAvailableEvaluationWorkflows: (...args: unknown[]) => mockUseAvailableEvaluationWorkflows(...args), - useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args), - useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args), + useDefaultEvaluationMetrics: (...args: unknown[]) => mockUseDefaultEvaluationMetrics(...args), })) const resourceType = 'apps' as const @@ -37,9 +35,17 @@ describe('MetricSection', () => { vi.clearAllMocks() useEvaluationStore.setState({ resources: {} }) - mockUseAvailableEvaluationMetrics.mockReturnValue({ + mockUseDefaultEvaluationMetrics.mockReturnValue({ data: { - metrics: ['answer-correctness'], + default_metrics: [ + { + metric: 'answer-correctness', + value_type: 'number', + node_info_list: [ + { node_id: 'node-answer', title: 'Answer Node', type: 'llm' }, + ], + }, + ], }, isLoading: false, }) @@ -54,17 +60,6 @@ describe('MetricSection', () => { isFetchingNextPage: false, isLoading: false, }) - - mockUseEvaluationNodeInfoMutation.mockReturnValue({ - isPending: false, - mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { - options?.onSuccess?.({ - 'answer-correctness': [ - { node_id: 'node-answer', title: 'Answer Node', type: 'llm' }, - ], - }) - }, - }) }) // Verify the empty state block extracted from MetricSection. @@ -138,16 +133,20 @@ describe('MetricSection', () => { it('should show only unselected nodes in the add-node dropdown and append the selected node', () => { // Arrange - mockUseEvaluationNodeInfoMutation.mockReturnValue({ - isPending: false, - mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { - options?.onSuccess?.({ - 'answer-correctness': [ - { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, - { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, - ], - }) + mockUseDefaultEvaluationMetrics.mockReturnValue({ + data: { + default_metrics: [ + { + metric: 'answer-correctness', + value_type: 'number', + node_info_list: [ + { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, + { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, + ], + }, + ], }, + isLoading: false, }) act(() => { @@ -171,16 +170,20 @@ describe('MetricSection', () => { it('should hide the add-node button when the builtin metric already targets all nodes', () => { // Arrange - mockUseEvaluationNodeInfoMutation.mockReturnValue({ - isPending: false, - mutate: (_input: unknown, options?: { onSuccess?: (data: Record>) => void }) => { - options?.onSuccess?.({ - 'answer-correctness': [ - { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, - { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, - ], - }) + mockUseDefaultEvaluationMetrics.mockReturnValue({ + data: { + default_metrics: [ + { + metric: 'answer-correctness', + value_type: 'number', + node_info_list: [ + { node_id: 'node-1', title: 'LLM 1', type: 'llm' }, + { node_id: 'node-2', title: 'LLM 2', type: 'llm' }, + ], + }, + ], }, + isLoading: false, }) act(() => { diff --git a/web/app/components/evaluation/components/metric-section/index.tsx b/web/app/components/evaluation/components/metric-section/index.tsx index 7887b2733b..be7f0cc8cc 100644 --- a/web/app/components/evaluation/components/metric-section/index.tsx +++ b/web/app/components/evaluation/components/metric-section/index.tsx @@ -1,13 +1,11 @@ 'use client' -import type { EvaluationResourceProps } from '../../types' -import type { NodeInfo } from '@/types/evaluation' -import { useEffect, useMemo, useState } from 'react' +import type { NonPipelineEvaluationResourceProps } from '../../types' import { useTranslation } from 'react-i18next' -import { useAvailableEvaluationMetrics, useEvaluationNodeInfoMutation } from '@/service/use-evaluation' +import { useDefaultEvaluationMetrics } from '@/service/use-evaluation' import { useEvaluationResource } from '../../store' import MetricSelector from '../metric-selector' -import { toEvaluationTargetType } from '../metric-selector/utils' +import { getDefaultMetricNodeInfoMap } from '../metric-selector/utils' import { InlineSectionHeader } from '../section-header' import MetricCard from './metric-card' import MetricSectionEmptyState from './metric-section-empty-state' @@ -15,55 +13,13 @@ import MetricSectionEmptyState from './metric-section-empty-state' const MetricSection = ({ resourceType, resourceId, -}: EvaluationResourceProps) => { +}: NonPipelineEvaluationResourceProps) => { const { t } = useTranslation('evaluation') const resource = useEvaluationResource(resourceType, resourceId) - const [nodeInfoMap, setNodeInfoMap] = useState>({}) const hasMetrics = resource.metrics.length > 0 const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin') - const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics - const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo) - const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation() - const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics]) - const availableMetricIdsKey = availableMetricIds.join(',') - const resolvedNodeInfoMap = shouldLoadNodeInfo ? nodeInfoMap : {} - - useEffect(() => { - if (!shouldLoadNodeInfo || availableMetricIds.length === 0) - return - - let isActive = true - - loadNodeInfo( - { - params: { - targetType: toEvaluationTargetType(resourceType), - targetId: resourceId, - }, - body: { - metrics: availableMetricIds, - }, - }, - { - onSuccess: (data) => { - if (!isActive) - return - - setNodeInfoMap(data) - }, - onError: () => { - if (!isActive) - return - - setNodeInfoMap({}) - }, - }, - ) - - return () => { - isActive = false - } - }, [availableMetricIds, availableMetricIdsKey, loadNodeInfo, resourceId, resourceType, shouldLoadNodeInfo]) + const { data: defaultMetricsData } = useDefaultEvaluationMetrics(resourceType, resourceId, hasBuiltinMetrics) + const nodeInfoMap = getDefaultMetricNodeInfoMap(defaultMetricsData?.default_metrics ?? []) return (
@@ -79,7 +35,7 @@ const MetricSection = ({ resourceType={resourceType} resourceId={resourceId} metric={metric} - availableNodeInfoList={metric.kind === 'builtin' ? (resolvedNodeInfoMap[metric.optionId] ?? []) : undefined} + availableNodeInfoList={metric.kind === 'builtin' ? (nodeInfoMap[metric.optionId] ?? []) : undefined} /> ))} state.addCustomMetric) const [open, setOpen] = useState(false) const [query, setQuery] = useState('') - const [nodeInfoMap, setNodeInfoMap] = useState>>({}) const [collapsedMetricMap, setCollapsedMetricMap] = useState>({}) const [expandedMetricNodesMap, setExpandedMetricNodesMap] = useState>({}) const hasCustomMetric = resource.metrics.some(metric => metric.kind === 'custom-workflow') @@ -44,8 +43,6 @@ const MetricSelector = ({ query, resourceType, resourceId, - nodeInfoMap, - setNodeInfoMap, }) const handleOpenChange = (nextOpen: boolean) => { diff --git a/web/app/components/evaluation/components/metric-selector/types.ts b/web/app/components/evaluation/components/metric-selector/types.ts index 102329701e..11f7518257 100644 --- a/web/app/components/evaluation/components/metric-selector/types.ts +++ b/web/app/components/evaluation/components/metric-selector/types.ts @@ -1,7 +1,7 @@ -import type { EvaluationMetric, EvaluationResourceProps, MetricOption } from '../../types' +import type { EvaluationMetric, MetricOption, NonPipelineEvaluationResourceProps } from '../../types' import type { NodeInfo } from '@/types/evaluation' -export type MetricSelectorProps = EvaluationResourceProps & { +export type MetricSelectorProps = NonPipelineEvaluationResourceProps & { triggerClassName?: string triggerStyle?: 'button' | 'text' } diff --git a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts index 24e1b494bc..278364ab49 100644 --- a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts +++ b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts @@ -1,24 +1,24 @@ +import type { MetricOption, NonPipelineEvaluationResourceType } from '../../types' import type { BuiltinMetricMap, MetricSelectorSection } from './types' import type { NodeInfo } from '@/types/evaluation' -import { useEffect, useMemo } from 'react' +import { useMemo } from 'react' import { useTranslation } from 'react-i18next' -import { useAvailableEvaluationMetrics, useEvaluationNodeInfoMutation } from '@/service/use-evaluation' +import { useDefaultEvaluationMetrics } from '@/service/use-evaluation' import { getTranslatedMetricDescription } from '../../default-metric-descriptions' import { getEvaluationMockConfig } from '../../mock' import { useEvaluationResource, useEvaluationStore } from '../../store' import { buildMetricOption, dedupeNodeInfoList, - toEvaluationTargetType, + getDefaultMetricNodeInfoMap, + normalizeMetricValueType, } from './utils' type UseMetricSelectorDataOptions = { open: boolean query: string - resourceType: 'apps' | 'datasets' | 'snippets' + resourceType: NonPipelineEvaluationResourceType resourceId: string - nodeInfoMap: Record - setNodeInfoMap: (value: Record) => void } type UseMetricSelectorDataResult = { @@ -33,16 +33,13 @@ export const useMetricSelectorData = ({ query, resourceType, resourceId, - nodeInfoMap, - setNodeInfoMap, }: UseMetricSelectorDataOptions): UseMetricSelectorDataResult => { const { t } = useTranslation('evaluation') const config = getEvaluationMockConfig(resourceType) const metrics = useEvaluationResource(resourceType, resourceId).metrics const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric) const removeMetric = useEvaluationStore(state => state.removeMetric) - const { data: availableMetricsData, isLoading: isAvailableMetricsLoading } = useAvailableEvaluationMetrics(open) - const { mutate: loadNodeInfo, isPending: isNodeInfoLoading } = useEvaluationNodeInfoMutation() + const { data: defaultMetricsData, isLoading: isDefaultMetricsLoading } = useDefaultEvaluationMetrics(resourceType, resourceId, open) const builtinMetrics = useMemo(() => { return metrics.filter(metric => metric.kind === 'builtin') @@ -52,54 +49,29 @@ export const useMetricSelectorData = ({ return new Map(builtinMetrics.map(metric => [metric.optionId, metric] as const)) }, [builtinMetrics]) - const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics]) - const availableMetricIdsKey = availableMetricIds.join(',') + const defaultMetrics = useMemo(() => defaultMetricsData?.default_metrics ?? [], [defaultMetricsData?.default_metrics]) + const nodeInfoMap = useMemo(() => getDefaultMetricNodeInfoMap(defaultMetrics), [defaultMetrics]) const resolvedMetrics = useMemo(() => { const metricsMap = new Map(config.builtinMetrics.map(metric => [metric.id, metric] as const)) - return availableMetricIds.map(metricId => metricsMap.get(metricId) ?? buildMetricOption(metricId)) - }, [availableMetricIds, config.builtinMetrics]) + return defaultMetrics + .map((defaultMetric) => { + if (!defaultMetric.metric) + return null - useEffect(() => { - if (!open) - return + const configMetric = metricsMap.get(defaultMetric.metric) + if (configMetric) { + return { + ...configMetric, + valueType: normalizeMetricValueType(defaultMetric.value_type), + } + } - if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0) - return - - let isActive = true - - loadNodeInfo( - { - params: { - targetType: toEvaluationTargetType(resourceType), - targetId: resourceId, - }, - body: { - metrics: availableMetricIds, - }, - }, - { - onSuccess: (data) => { - if (!isActive) - return - - setNodeInfoMap(data) - }, - onError: () => { - if (!isActive) - return - - setNodeInfoMap({}) - }, - }, - ) - - return () => { - isActive = false - } - }, [availableMetricIds, availableMetricIdsKey, loadNodeInfo, open, resourceId, resourceType, setNodeInfoMap]) + return buildMetricOption(defaultMetric.metric, defaultMetric.value_type) + }) + .filter((metric): metric is MetricOption => !!metric) + }, [config.builtinMetrics, defaultMetrics]) const filteredSections = useMemo(() => { const keyword = query.trim().toLowerCase() @@ -110,8 +82,7 @@ export const useMetricSelectorData = ({ || metric.label.toLowerCase().includes(keyword) || metricDescription.toLowerCase().includes(keyword) const metricNodes = nodeInfoMap[metric.id] ?? [] - const supportsNodeSelection = resourceType !== 'datasets' - const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0 + const hasNoNodeInfo = metricNodes.length === 0 if (hasNoNodeInfo) { if (!metricMatches) @@ -146,8 +117,8 @@ export const useMetricSelectorData = ({ hasNoNodeInfo: false, visibleNodes, } - }).filter(section => !!section) - }, [nodeInfoMap, query, resolvedMetrics, resourceType, t]) + }).filter((section): section is MetricSelectorSection => !!section) + }, [nodeInfoMap, query, resolvedMetrics, t]) const toggleNodeSelection = (metricId: string, nodeInfo: NodeInfo) => { const addedMetric = builtinMetricMap.get(metricId) @@ -170,7 +141,7 @@ export const useMetricSelectorData = ({ return { builtinMetricMap, filteredSections, - isRemoteLoading: isAvailableMetricsLoading || isNodeInfoLoading, + isRemoteLoading: isDefaultMetricsLoading, toggleNodeSelection, } } diff --git a/web/app/components/evaluation/components/metric-selector/utils.ts b/web/app/components/evaluation/components/metric-selector/utils.ts index 2a6ca53158..3bf6ebd06e 100644 --- a/web/app/components/evaluation/components/metric-selector/utils.ts +++ b/web/app/components/evaluation/components/metric-selector/utils.ts @@ -1,10 +1,15 @@ -import type { MetricOption } from '../../types' +import type { ConditionMetricValueType, MetricOption } from '../../types' import type { MetricVisualTone } from './types' -import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation' +import type { EvaluationDefaultMetric, NodeInfo } from '@/types/evaluation' import { getDefaultMetricDescription } from '../../default-metric-descriptions' -export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => { - return resourceType === 'snippets' ? 'snippets' : 'apps' +const defaultConditionMetricValueType: ConditionMetricValueType = 'number' + +export const normalizeMetricValueType = (valueType: string | undefined): ConditionMetricValueType => { + if (valueType === 'string' || valueType === 'number' || valueType === 'boolean') + return valueType + + return defaultConditionMetricValueType } const humanizeMetricId = (metricId: string) => { @@ -15,13 +20,33 @@ const humanizeMetricId = (metricId: string) => { .join(' ') } -export const buildMetricOption = (metricId: string): MetricOption => ({ +export const buildMetricOption = (metricId: string, valueType?: string): MetricOption => ({ id: metricId, label: humanizeMetricId(metricId), description: getDefaultMetricDescription(metricId), - valueType: 'number', + valueType: normalizeMetricValueType(valueType), }) +export const dedupeNodeInfoList = (nodeInfoList: NodeInfo[]) => { + return Array.from(new Map(nodeInfoList.map(nodeInfo => [nodeInfo.node_id, nodeInfo])).values()) +} + +export const getDefaultMetricNodeInfoMap = (defaultMetrics: EvaluationDefaultMetric[]) => { + const nodeInfoMap: Record = {} + + defaultMetrics.forEach((defaultMetric) => { + if (!defaultMetric.metric) + return + + nodeInfoMap[defaultMetric.metric] = dedupeNodeInfoList([ + ...(nodeInfoMap[defaultMetric.metric] ?? []), + ...(defaultMetric.node_info_list ?? []), + ]) + }) + + return nodeInfoMap +} + export const getMetricVisual = (metricId: string): { icon: string, tone: MetricVisualTone } => { if (['context-precision', 'context-recall'].includes(metricId)) { return { @@ -71,7 +96,3 @@ export const getToneClasses = (tone: MetricVisualTone) => { solid: 'bg-util-colors-indigo-indigo-500 text-white', } } - -export const dedupeNodeInfoList = (nodeInfoList: NodeInfo[]) => { - return Array.from(new Map(nodeInfoList.map(nodeInfo => [nodeInfo.node_id, nodeInfo])).values()) -} diff --git a/web/app/components/evaluation/types.ts b/web/app/components/evaluation/types.ts index c0a1574cf8..57d0a66832 100644 --- a/web/app/components/evaluation/types.ts +++ b/web/app/components/evaluation/types.ts @@ -1,12 +1,18 @@ import type { NodeInfo } from '@/types/evaluation' export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets' +export type NonPipelineEvaluationResourceType = Exclude export type EvaluationResourceProps = { resourceType: EvaluationResourceType resourceId: string } +export type NonPipelineEvaluationResourceProps = { + resourceType: NonPipelineEvaluationResourceType + resourceId: string +} + export type MetricKind = 'builtin' | 'custom-workflow' export type BatchTestTab = 'input-fields' | 'history' diff --git a/web/contract/console/evaluation.ts b/web/contract/console/evaluation.ts index 5daa973916..47580da438 100644 --- a/web/contract/console/evaluation.ts +++ b/web/contract/console/evaluation.ts @@ -2,6 +2,8 @@ import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig, EvaluationConfigData, + EvaluationDefaultMetricsResponse, + EvaluationDefaultMetricsTargetType, EvaluationFileInfo, EvaluationLogsResponse, EvaluationMetricsListResponse, @@ -255,6 +257,19 @@ export const evaluationMetricsContract = base }>()) .output(type()) +export const evaluationDefaultMetricsContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/default-metrics', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationDefaultMetricsTargetType + targetId: string + } + }>()) + .output(type()) + export const evaluationNodeInfoContract = base .route({ path: '/{targetType}/{targetId}/evaluation/node-info', diff --git a/web/contract/router.ts b/web/contract/router.ts index e5610dc81c..dc00a5cbbb 100644 --- a/web/contract/router.ts +++ b/web/contract/router.ts @@ -14,6 +14,7 @@ import { datasetEvaluationRunDetailContract, datasetEvaluationTemplateDownloadContract, evaluationConfigContract, + evaluationDefaultMetricsContract, evaluationFileContract, evaluationLogsContract, evaluationMetricsContract, @@ -145,6 +146,7 @@ export const consoleRouterContract = { runDetail: evaluationRunDetailContract, cancelRun: cancelEvaluationRunContract, metrics: evaluationMetricsContract, + defaultMetrics: evaluationDefaultMetricsContract, nodeInfo: evaluationNodeInfoContract, availableMetrics: availableEvaluationMetricsContract, availableWorkflows: availableEvaluationWorkflowsContract, diff --git a/web/service/use-evaluation.ts b/web/service/use-evaluation.ts index f167a3ea40..cb3f46bf5b 100644 --- a/web/service/use-evaluation.ts +++ b/web/service/use-evaluation.ts @@ -1,4 +1,4 @@ -import type { EvaluationResourceType } from '@/app/components/evaluation/types' +import type { EvaluationResourceType, NonPipelineEvaluationResourceType } from '@/app/components/evaluation/types' import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation' import { keepPreviousData, @@ -60,6 +60,23 @@ export const useAvailableEvaluationMetrics = (enabled = true) => { })) } +export const useDefaultEvaluationMetrics = ( + resourceType: NonPipelineEvaluationResourceType, + resourceId: string, + enabled = true, +) => { + return useQuery(consoleQuery.evaluation.defaultMetrics.queryOptions({ + input: { + params: { + targetType: resourceType, + targetId: resourceId, + }, + }, + enabled: !!resourceId && enabled, + refetchOnWindowFocus: false, + })) +} + export const useEvaluationWorkflowAssociatedTargets = ( workflowId: string | undefined, options?: { enabled?: boolean }, diff --git a/web/types/evaluation.ts b/web/types/evaluation.ts index 543ade6b17..1a889795c5 100644 --- a/web/types/evaluation.ts +++ b/web/types/evaluation.ts @@ -1,4 +1,5 @@ export type EvaluationTargetType = 'apps' | 'snippets' | 'datasets' +export type EvaluationDefaultMetricsTargetType = 'apps' | 'snippets' export type EvaluationJudgmentConditionValue = string | string[] | boolean @@ -33,6 +34,10 @@ export type EvaluationDefaultMetric = { node_info_list?: NodeInfo[] } +export type EvaluationDefaultMetricsResponse = { + default_metrics: EvaluationDefaultMetric[] +} + export type EvaluationCustomizedMetric = { evaluation_workflow_id?: string input_fields?: Record