mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 18:27:19 +08:00
feat(web): default metrics
This commit is contained in:
parent
d8173b1cda
commit
2607eb8d32
@ -7,8 +7,8 @@ import { useEvaluationStore } from '../store'
|
||||
|
||||
const mockUpload = vi.hoisted(() => vi.fn())
|
||||
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
|
||||
const mockUseDefaultEvaluationMetrics = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
|
||||
const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn())
|
||||
const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn())
|
||||
const mockUsePublishedPipelineInfo = vi.hoisted(() => vi.fn())
|
||||
@ -51,7 +51,7 @@ vi.mock('@/service/base', () => ({
|
||||
vi.mock('@/service/use-evaluation', () => ({
|
||||
useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
|
||||
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
|
||||
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
|
||||
useDefaultEvaluationMetrics: (...args: unknown[]) => mockUseDefaultEvaluationMetrics(...args),
|
||||
useSaveEvaluationConfigMutation: (...args: unknown[]) => mockUseSaveEvaluationConfigMutation(...args),
|
||||
useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args),
|
||||
}))
|
||||
@ -141,18 +141,41 @@ describe('Evaluation', () => {
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
mockUseEvaluationNodeInfoMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
|
||||
options?.onSuccess?.({
|
||||
'answer-correctness': [
|
||||
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
|
||||
],
|
||||
'faithfulness': [
|
||||
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
||||
],
|
||||
})
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'answer-correctness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
|
||||
],
|
||||
},
|
||||
{
|
||||
metric: 'faithfulness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
||||
],
|
||||
},
|
||||
{
|
||||
metric: 'context-precision',
|
||||
value_type: 'number',
|
||||
node_info_list: [],
|
||||
},
|
||||
{
|
||||
metric: 'context-recall',
|
||||
value_type: 'number',
|
||||
node_info_list: [],
|
||||
},
|
||||
{
|
||||
metric: 'context-relevance',
|
||||
value_type: 'number',
|
||||
node_info_list: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
mockUseSaveEvaluationConfigMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
@ -361,22 +384,19 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should render the metric no-node empty state', () => {
|
||||
mockUseAvailableEvaluationMetrics.mockReturnValue({
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
metrics: ['context-precision'],
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'context-precision',
|
||||
value_type: 'number',
|
||||
node_info_list: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
mockUseEvaluationNodeInfoMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
|
||||
options?.onSuccess?.({
|
||||
'context-precision': [],
|
||||
})
|
||||
},
|
||||
})
|
||||
|
||||
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-3" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
@ -385,9 +405,9 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should render the global empty state when no metrics are available', () => {
|
||||
mockUseAvailableEvaluationMetrics.mockReturnValue({
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
metrics: [],
|
||||
default_metrics: [],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
@ -400,27 +420,24 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should show more nodes when a metric has more than three nodes', () => {
|
||||
mockUseAvailableEvaluationMetrics.mockReturnValue({
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
metrics: ['answer-correctness'],
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'answer-correctness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
|
||||
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
|
||||
{ node_id: 'node-3', title: 'LLM 3', type: 'llm' },
|
||||
{ node_id: 'node-4', title: 'LLM 4', type: 'llm' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
mockUseEvaluationNodeInfoMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
|
||||
options?.onSuccess?.({
|
||||
'answer-correctness': [
|
||||
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
|
||||
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
|
||||
{ node_id: 'node-3', title: 'LLM 3', type: 'llm' },
|
||||
{ node_id: 'node-4', title: 'LLM 4', type: 'llm' },
|
||||
],
|
||||
})
|
||||
},
|
||||
})
|
||||
|
||||
renderWithQueryClient(<Evaluation resourceType="apps" resourceId="app-5" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps } from '../../types'
|
||||
import type { NonPipelineEvaluationResourceProps } from '../../types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import BatchTestPanel from '../batch-test-panel'
|
||||
@ -13,7 +13,7 @@ import SectionHeader, { InlineSectionHeader } from '../section-header'
|
||||
const NonPipelineEvaluation = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
}: NonPipelineEvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const { t: tCommon } = useTranslation('common')
|
||||
const docLink = useDocLink()
|
||||
|
||||
@ -4,13 +4,11 @@ import MetricSection from '..'
|
||||
import { useEvaluationStore } from '../../../store'
|
||||
|
||||
const mockUseAvailableEvaluationWorkflows = vi.hoisted(() => vi.fn())
|
||||
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
|
||||
const mockUseDefaultEvaluationMetrics = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('@/service/use-evaluation', () => ({
|
||||
useAvailableEvaluationWorkflows: (...args: unknown[]) => mockUseAvailableEvaluationWorkflows(...args),
|
||||
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
|
||||
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
|
||||
useDefaultEvaluationMetrics: (...args: unknown[]) => mockUseDefaultEvaluationMetrics(...args),
|
||||
}))
|
||||
|
||||
const resourceType = 'apps' as const
|
||||
@ -37,9 +35,17 @@ describe('MetricSection', () => {
|
||||
vi.clearAllMocks()
|
||||
useEvaluationStore.setState({ resources: {} })
|
||||
|
||||
mockUseAvailableEvaluationMetrics.mockReturnValue({
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
metrics: ['answer-correctness'],
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'answer-correctness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
@ -54,17 +60,6 @@ describe('MetricSection', () => {
|
||||
isFetchingNextPage: false,
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
mockUseEvaluationNodeInfoMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
|
||||
options?.onSuccess?.({
|
||||
'answer-correctness': [
|
||||
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
|
||||
],
|
||||
})
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
// Verify the empty state block extracted from MetricSection.
|
||||
@ -138,16 +133,20 @@ describe('MetricSection', () => {
|
||||
|
||||
it('should show only unselected nodes in the add-node dropdown and append the selected node', () => {
|
||||
// Arrange
|
||||
mockUseEvaluationNodeInfoMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
|
||||
options?.onSuccess?.({
|
||||
'answer-correctness': [
|
||||
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
|
||||
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
|
||||
],
|
||||
})
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'answer-correctness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
|
||||
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
act(() => {
|
||||
@ -171,16 +170,20 @@ describe('MetricSection', () => {
|
||||
|
||||
it('should hide the add-node button when the builtin metric already targets all nodes', () => {
|
||||
// Arrange
|
||||
mockUseEvaluationNodeInfoMutation.mockReturnValue({
|
||||
isPending: false,
|
||||
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
|
||||
options?.onSuccess?.({
|
||||
'answer-correctness': [
|
||||
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
|
||||
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
|
||||
],
|
||||
})
|
||||
mockUseDefaultEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
default_metrics: [
|
||||
{
|
||||
metric: 'answer-correctness',
|
||||
value_type: 'number',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
|
||||
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
act(() => {
|
||||
|
||||
@ -1,13 +1,11 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps } from '../../types'
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
import { useEffect, useMemo, useState } from 'react'
|
||||
import type { NonPipelineEvaluationResourceProps } from '../../types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useAvailableEvaluationMetrics, useEvaluationNodeInfoMutation } from '@/service/use-evaluation'
|
||||
import { useDefaultEvaluationMetrics } from '@/service/use-evaluation'
|
||||
import { useEvaluationResource } from '../../store'
|
||||
import MetricSelector from '../metric-selector'
|
||||
import { toEvaluationTargetType } from '../metric-selector/utils'
|
||||
import { getDefaultMetricNodeInfoMap } from '../metric-selector/utils'
|
||||
import { InlineSectionHeader } from '../section-header'
|
||||
import MetricCard from './metric-card'
|
||||
import MetricSectionEmptyState from './metric-section-empty-state'
|
||||
@ -15,55 +13,13 @@ import MetricSectionEmptyState from './metric-section-empty-state'
|
||||
const MetricSection = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
}: NonPipelineEvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const [nodeInfoMap, setNodeInfoMap] = useState<Record<string, NodeInfo[]>>({})
|
||||
const hasMetrics = resource.metrics.length > 0
|
||||
const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin')
|
||||
const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics
|
||||
const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo)
|
||||
const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation()
|
||||
const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])
|
||||
const availableMetricIdsKey = availableMetricIds.join(',')
|
||||
const resolvedNodeInfoMap = shouldLoadNodeInfo ? nodeInfoMap : {}
|
||||
|
||||
useEffect(() => {
|
||||
if (!shouldLoadNodeInfo || availableMetricIds.length === 0)
|
||||
return
|
||||
|
||||
let isActive = true
|
||||
|
||||
loadNodeInfo(
|
||||
{
|
||||
params: {
|
||||
targetType: toEvaluationTargetType(resourceType),
|
||||
targetId: resourceId,
|
||||
},
|
||||
body: {
|
||||
metrics: availableMetricIds,
|
||||
},
|
||||
},
|
||||
{
|
||||
onSuccess: (data) => {
|
||||
if (!isActive)
|
||||
return
|
||||
|
||||
setNodeInfoMap(data)
|
||||
},
|
||||
onError: () => {
|
||||
if (!isActive)
|
||||
return
|
||||
|
||||
setNodeInfoMap({})
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
return () => {
|
||||
isActive = false
|
||||
}
|
||||
}, [availableMetricIds, availableMetricIdsKey, loadNodeInfo, resourceId, resourceType, shouldLoadNodeInfo])
|
||||
const { data: defaultMetricsData } = useDefaultEvaluationMetrics(resourceType, resourceId, hasBuiltinMetrics)
|
||||
const nodeInfoMap = getDefaultMetricNodeInfoMap(defaultMetricsData?.default_metrics ?? [])
|
||||
|
||||
return (
|
||||
<section className="max-w-[700px] py-4">
|
||||
@ -79,7 +35,7 @@ const MetricSection = ({
|
||||
resourceType={resourceType}
|
||||
resourceId={resourceId}
|
||||
metric={metric}
|
||||
availableNodeInfoList={metric.kind === 'builtin' ? (resolvedNodeInfoMap[metric.optionId] ?? []) : undefined}
|
||||
availableNodeInfoList={metric.kind === 'builtin' ? (nodeInfoMap[metric.optionId] ?? []) : undefined}
|
||||
/>
|
||||
))}
|
||||
<MetricSelector
|
||||
|
||||
@ -29,7 +29,6 @@ const MetricSelector = ({
|
||||
const addCustomMetric = useEvaluationStore(state => state.addCustomMetric)
|
||||
const [open, setOpen] = useState(false)
|
||||
const [query, setQuery] = useState('')
|
||||
const [nodeInfoMap, setNodeInfoMap] = useState<Record<string, Array<{ node_id: string, title: string, type: string }>>>({})
|
||||
const [collapsedMetricMap, setCollapsedMetricMap] = useState<Record<string, boolean>>({})
|
||||
const [expandedMetricNodesMap, setExpandedMetricNodesMap] = useState<Record<string, boolean>>({})
|
||||
const hasCustomMetric = resource.metrics.some(metric => metric.kind === 'custom-workflow')
|
||||
@ -44,8 +43,6 @@ const MetricSelector = ({
|
||||
query,
|
||||
resourceType,
|
||||
resourceId,
|
||||
nodeInfoMap,
|
||||
setNodeInfoMap,
|
||||
})
|
||||
|
||||
const handleOpenChange = (nextOpen: boolean) => {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import type { EvaluationMetric, EvaluationResourceProps, MetricOption } from '../../types'
|
||||
import type { EvaluationMetric, MetricOption, NonPipelineEvaluationResourceProps } from '../../types'
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
|
||||
export type MetricSelectorProps = EvaluationResourceProps & {
|
||||
export type MetricSelectorProps = NonPipelineEvaluationResourceProps & {
|
||||
triggerClassName?: string
|
||||
triggerStyle?: 'button' | 'text'
|
||||
}
|
||||
|
||||
@ -1,24 +1,24 @@
|
||||
import type { MetricOption, NonPipelineEvaluationResourceType } from '../../types'
|
||||
import type { BuiltinMetricMap, MetricSelectorSection } from './types'
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
import { useEffect, useMemo } from 'react'
|
||||
import { useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useAvailableEvaluationMetrics, useEvaluationNodeInfoMutation } from '@/service/use-evaluation'
|
||||
import { useDefaultEvaluationMetrics } from '@/service/use-evaluation'
|
||||
import { getTranslatedMetricDescription } from '../../default-metric-descriptions'
|
||||
import { getEvaluationMockConfig } from '../../mock'
|
||||
import { useEvaluationResource, useEvaluationStore } from '../../store'
|
||||
import {
|
||||
buildMetricOption,
|
||||
dedupeNodeInfoList,
|
||||
toEvaluationTargetType,
|
||||
getDefaultMetricNodeInfoMap,
|
||||
normalizeMetricValueType,
|
||||
} from './utils'
|
||||
|
||||
type UseMetricSelectorDataOptions = {
|
||||
open: boolean
|
||||
query: string
|
||||
resourceType: 'apps' | 'datasets' | 'snippets'
|
||||
resourceType: NonPipelineEvaluationResourceType
|
||||
resourceId: string
|
||||
nodeInfoMap: Record<string, NodeInfo[]>
|
||||
setNodeInfoMap: (value: Record<string, NodeInfo[]>) => void
|
||||
}
|
||||
|
||||
type UseMetricSelectorDataResult = {
|
||||
@ -33,16 +33,13 @@ export const useMetricSelectorData = ({
|
||||
query,
|
||||
resourceType,
|
||||
resourceId,
|
||||
nodeInfoMap,
|
||||
setNodeInfoMap,
|
||||
}: UseMetricSelectorDataOptions): UseMetricSelectorDataResult => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
const metrics = useEvaluationResource(resourceType, resourceId).metrics
|
||||
const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric)
|
||||
const removeMetric = useEvaluationStore(state => state.removeMetric)
|
||||
const { data: availableMetricsData, isLoading: isAvailableMetricsLoading } = useAvailableEvaluationMetrics(open)
|
||||
const { mutate: loadNodeInfo, isPending: isNodeInfoLoading } = useEvaluationNodeInfoMutation()
|
||||
const { data: defaultMetricsData, isLoading: isDefaultMetricsLoading } = useDefaultEvaluationMetrics(resourceType, resourceId, open)
|
||||
|
||||
const builtinMetrics = useMemo(() => {
|
||||
return metrics.filter(metric => metric.kind === 'builtin')
|
||||
@ -52,54 +49,29 @@ export const useMetricSelectorData = ({
|
||||
return new Map(builtinMetrics.map(metric => [metric.optionId, metric] as const))
|
||||
}, [builtinMetrics])
|
||||
|
||||
const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])
|
||||
const availableMetricIdsKey = availableMetricIds.join(',')
|
||||
const defaultMetrics = useMemo(() => defaultMetricsData?.default_metrics ?? [], [defaultMetricsData?.default_metrics])
|
||||
const nodeInfoMap = useMemo(() => getDefaultMetricNodeInfoMap(defaultMetrics), [defaultMetrics])
|
||||
|
||||
const resolvedMetrics = useMemo(() => {
|
||||
const metricsMap = new Map(config.builtinMetrics.map(metric => [metric.id, metric] as const))
|
||||
|
||||
return availableMetricIds.map(metricId => metricsMap.get(metricId) ?? buildMetricOption(metricId))
|
||||
}, [availableMetricIds, config.builtinMetrics])
|
||||
return defaultMetrics
|
||||
.map((defaultMetric) => {
|
||||
if (!defaultMetric.metric)
|
||||
return null
|
||||
|
||||
useEffect(() => {
|
||||
if (!open)
|
||||
return
|
||||
const configMetric = metricsMap.get(defaultMetric.metric)
|
||||
if (configMetric) {
|
||||
return {
|
||||
...configMetric,
|
||||
valueType: normalizeMetricValueType(defaultMetric.value_type),
|
||||
}
|
||||
}
|
||||
|
||||
if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0)
|
||||
return
|
||||
|
||||
let isActive = true
|
||||
|
||||
loadNodeInfo(
|
||||
{
|
||||
params: {
|
||||
targetType: toEvaluationTargetType(resourceType),
|
||||
targetId: resourceId,
|
||||
},
|
||||
body: {
|
||||
metrics: availableMetricIds,
|
||||
},
|
||||
},
|
||||
{
|
||||
onSuccess: (data) => {
|
||||
if (!isActive)
|
||||
return
|
||||
|
||||
setNodeInfoMap(data)
|
||||
},
|
||||
onError: () => {
|
||||
if (!isActive)
|
||||
return
|
||||
|
||||
setNodeInfoMap({})
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
return () => {
|
||||
isActive = false
|
||||
}
|
||||
}, [availableMetricIds, availableMetricIdsKey, loadNodeInfo, open, resourceId, resourceType, setNodeInfoMap])
|
||||
return buildMetricOption(defaultMetric.metric, defaultMetric.value_type)
|
||||
})
|
||||
.filter((metric): metric is MetricOption => !!metric)
|
||||
}, [config.builtinMetrics, defaultMetrics])
|
||||
|
||||
const filteredSections = useMemo(() => {
|
||||
const keyword = query.trim().toLowerCase()
|
||||
@ -110,8 +82,7 @@ export const useMetricSelectorData = ({
|
||||
|| metric.label.toLowerCase().includes(keyword)
|
||||
|| metricDescription.toLowerCase().includes(keyword)
|
||||
const metricNodes = nodeInfoMap[metric.id] ?? []
|
||||
const supportsNodeSelection = resourceType !== 'datasets'
|
||||
const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0
|
||||
const hasNoNodeInfo = metricNodes.length === 0
|
||||
|
||||
if (hasNoNodeInfo) {
|
||||
if (!metricMatches)
|
||||
@ -146,8 +117,8 @@ export const useMetricSelectorData = ({
|
||||
hasNoNodeInfo: false,
|
||||
visibleNodes,
|
||||
}
|
||||
}).filter(section => !!section)
|
||||
}, [nodeInfoMap, query, resolvedMetrics, resourceType, t])
|
||||
}).filter((section): section is MetricSelectorSection => !!section)
|
||||
}, [nodeInfoMap, query, resolvedMetrics, t])
|
||||
|
||||
const toggleNodeSelection = (metricId: string, nodeInfo: NodeInfo) => {
|
||||
const addedMetric = builtinMetricMap.get(metricId)
|
||||
@ -170,7 +141,7 @@ export const useMetricSelectorData = ({
|
||||
return {
|
||||
builtinMetricMap,
|
||||
filteredSections,
|
||||
isRemoteLoading: isAvailableMetricsLoading || isNodeInfoLoading,
|
||||
isRemoteLoading: isDefaultMetricsLoading,
|
||||
toggleNodeSelection,
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,10 +1,15 @@
|
||||
import type { MetricOption } from '../../types'
|
||||
import type { ConditionMetricValueType, MetricOption } from '../../types'
|
||||
import type { MetricVisualTone } from './types'
|
||||
import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation'
|
||||
import type { EvaluationDefaultMetric, NodeInfo } from '@/types/evaluation'
|
||||
import { getDefaultMetricDescription } from '../../default-metric-descriptions'
|
||||
|
||||
export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => {
|
||||
return resourceType === 'snippets' ? 'snippets' : 'apps'
|
||||
const defaultConditionMetricValueType: ConditionMetricValueType = 'number'
|
||||
|
||||
export const normalizeMetricValueType = (valueType: string | undefined): ConditionMetricValueType => {
|
||||
if (valueType === 'string' || valueType === 'number' || valueType === 'boolean')
|
||||
return valueType
|
||||
|
||||
return defaultConditionMetricValueType
|
||||
}
|
||||
|
||||
const humanizeMetricId = (metricId: string) => {
|
||||
@ -15,13 +20,33 @@ const humanizeMetricId = (metricId: string) => {
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
export const buildMetricOption = (metricId: string): MetricOption => ({
|
||||
export const buildMetricOption = (metricId: string, valueType?: string): MetricOption => ({
|
||||
id: metricId,
|
||||
label: humanizeMetricId(metricId),
|
||||
description: getDefaultMetricDescription(metricId),
|
||||
valueType: 'number',
|
||||
valueType: normalizeMetricValueType(valueType),
|
||||
})
|
||||
|
||||
export const dedupeNodeInfoList = (nodeInfoList: NodeInfo[]) => {
|
||||
return Array.from(new Map(nodeInfoList.map(nodeInfo => [nodeInfo.node_id, nodeInfo])).values())
|
||||
}
|
||||
|
||||
export const getDefaultMetricNodeInfoMap = (defaultMetrics: EvaluationDefaultMetric[]) => {
|
||||
const nodeInfoMap: Record<string, NodeInfo[]> = {}
|
||||
|
||||
defaultMetrics.forEach((defaultMetric) => {
|
||||
if (!defaultMetric.metric)
|
||||
return
|
||||
|
||||
nodeInfoMap[defaultMetric.metric] = dedupeNodeInfoList([
|
||||
...(nodeInfoMap[defaultMetric.metric] ?? []),
|
||||
...(defaultMetric.node_info_list ?? []),
|
||||
])
|
||||
})
|
||||
|
||||
return nodeInfoMap
|
||||
}
|
||||
|
||||
export const getMetricVisual = (metricId: string): { icon: string, tone: MetricVisualTone } => {
|
||||
if (['context-precision', 'context-recall'].includes(metricId)) {
|
||||
return {
|
||||
@ -71,7 +96,3 @@ export const getToneClasses = (tone: MetricVisualTone) => {
|
||||
solid: 'bg-util-colors-indigo-indigo-500 text-white',
|
||||
}
|
||||
}
|
||||
|
||||
export const dedupeNodeInfoList = (nodeInfoList: NodeInfo[]) => {
|
||||
return Array.from(new Map(nodeInfoList.map(nodeInfo => [nodeInfo.node_id, nodeInfo])).values())
|
||||
}
|
||||
|
||||
@ -1,12 +1,18 @@
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
|
||||
export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets'
|
||||
export type NonPipelineEvaluationResourceType = Exclude<EvaluationResourceType, 'datasets'>
|
||||
|
||||
export type EvaluationResourceProps = {
|
||||
resourceType: EvaluationResourceType
|
||||
resourceId: string
|
||||
}
|
||||
|
||||
export type NonPipelineEvaluationResourceProps = {
|
||||
resourceType: NonPipelineEvaluationResourceType
|
||||
resourceId: string
|
||||
}
|
||||
|
||||
export type MetricKind = 'builtin' | 'custom-workflow'
|
||||
|
||||
export type BatchTestTab = 'input-fields' | 'history'
|
||||
|
||||
@ -2,6 +2,8 @@ import type {
|
||||
AvailableEvaluationWorkflowsResponse,
|
||||
EvaluationConfig,
|
||||
EvaluationConfigData,
|
||||
EvaluationDefaultMetricsResponse,
|
||||
EvaluationDefaultMetricsTargetType,
|
||||
EvaluationFileInfo,
|
||||
EvaluationLogsResponse,
|
||||
EvaluationMetricsListResponse,
|
||||
@ -255,6 +257,19 @@ export const evaluationMetricsContract = base
|
||||
}>())
|
||||
.output(type<EvaluationMetricsMapResponse>())
|
||||
|
||||
export const evaluationDefaultMetricsContract = base
|
||||
.route({
|
||||
path: '/{targetType}/{targetId}/evaluation/default-metrics',
|
||||
method: 'GET',
|
||||
})
|
||||
.input(type<{
|
||||
params: {
|
||||
targetType: EvaluationDefaultMetricsTargetType
|
||||
targetId: string
|
||||
}
|
||||
}>())
|
||||
.output(type<EvaluationDefaultMetricsResponse>())
|
||||
|
||||
export const evaluationNodeInfoContract = base
|
||||
.route({
|
||||
path: '/{targetType}/{targetId}/evaluation/node-info',
|
||||
|
||||
@ -14,6 +14,7 @@ import {
|
||||
datasetEvaluationRunDetailContract,
|
||||
datasetEvaluationTemplateDownloadContract,
|
||||
evaluationConfigContract,
|
||||
evaluationDefaultMetricsContract,
|
||||
evaluationFileContract,
|
||||
evaluationLogsContract,
|
||||
evaluationMetricsContract,
|
||||
@ -145,6 +146,7 @@ export const consoleRouterContract = {
|
||||
runDetail: evaluationRunDetailContract,
|
||||
cancelRun: cancelEvaluationRunContract,
|
||||
metrics: evaluationMetricsContract,
|
||||
defaultMetrics: evaluationDefaultMetricsContract,
|
||||
nodeInfo: evaluationNodeInfoContract,
|
||||
availableMetrics: availableEvaluationMetricsContract,
|
||||
availableWorkflows: availableEvaluationWorkflowsContract,
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import type { EvaluationResourceType } from '@/app/components/evaluation/types'
|
||||
import type { EvaluationResourceType, NonPipelineEvaluationResourceType } from '@/app/components/evaluation/types'
|
||||
import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation'
|
||||
import {
|
||||
keepPreviousData,
|
||||
@ -60,6 +60,23 @@ export const useAvailableEvaluationMetrics = (enabled = true) => {
|
||||
}))
|
||||
}
|
||||
|
||||
export const useDefaultEvaluationMetrics = (
|
||||
resourceType: NonPipelineEvaluationResourceType,
|
||||
resourceId: string,
|
||||
enabled = true,
|
||||
) => {
|
||||
return useQuery(consoleQuery.evaluation.defaultMetrics.queryOptions({
|
||||
input: {
|
||||
params: {
|
||||
targetType: resourceType,
|
||||
targetId: resourceId,
|
||||
},
|
||||
},
|
||||
enabled: !!resourceId && enabled,
|
||||
refetchOnWindowFocus: false,
|
||||
}))
|
||||
}
|
||||
|
||||
export const useEvaluationWorkflowAssociatedTargets = (
|
||||
workflowId: string | undefined,
|
||||
options?: { enabled?: boolean },
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
export type EvaluationTargetType = 'apps' | 'snippets' | 'datasets'
|
||||
export type EvaluationDefaultMetricsTargetType = 'apps' | 'snippets'
|
||||
|
||||
export type EvaluationJudgmentConditionValue = string | string[] | boolean
|
||||
|
||||
@ -33,6 +34,10 @@ export type EvaluationDefaultMetric = {
|
||||
node_info_list?: NodeInfo[]
|
||||
}
|
||||
|
||||
export type EvaluationDefaultMetricsResponse = {
|
||||
default_metrics: EvaluationDefaultMetric[]
|
||||
}
|
||||
|
||||
export type EvaluationCustomizedMetric = {
|
||||
evaluation_workflow_id?: string
|
||||
input_fields?: Record<string, string | null | undefined>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user