feat(web): dataset evaluation configure fetch

This commit is contained in:
JzoNg 2026-04-09 14:21:01 +08:00
parent 73d95245f8
commit cfb5b9dfea
18 changed files with 467 additions and 97 deletions

View File

@ -5,7 +5,7 @@ const Page = async (props: {
}) => {
const { appId } = await props.params
return <Evaluation resourceType="workflow" resourceId={appId} />
return <Evaluation resourceType="apps" resourceId={appId} />
}
export default Page

View File

@ -5,7 +5,7 @@ const Page = async (props: {
}) => {
const { datasetId } = await props.params
return <Evaluation resourceType="pipeline" resourceId={datasetId} />
return <Evaluation resourceType="datasets" resourceId={datasetId} />
}
export default Page

View File

@ -4,6 +4,7 @@ import { getEvaluationMockConfig } from '../mock'
import { useEvaluationStore } from '../store'
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
@ -38,6 +39,7 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec
}))
vi.mock('@/service/use-evaluation', () => ({
useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
}))
@ -46,6 +48,9 @@ describe('Evaluation', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
vi.clearAllMocks()
mockUseEvaluationConfig.mockReturnValue({
data: null,
})
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
@ -72,7 +77,7 @@ describe('Evaluation', () => {
it('should search, select metric nodes, and create a batch history record', async () => {
vi.useFakeTimers()
render(<Evaluation resourceType="workflow" resourceId="app-1" />)
render(<Evaluation resourceType="apps" resourceId="app-1" />)
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
@ -113,7 +118,7 @@ describe('Evaluation', () => {
})
it('should render time placeholders and hide the value row for empty operators', () => {
const resourceType = 'workflow'
const resourceType = 'apps'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@ -126,7 +131,7 @@ describe('Evaluation', () => {
store.ensureResource(resourceType, resourceId)
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
const group = useEvaluationStore.getState().resources['apps:app-2'].conditions[0]
groupId = group.id
itemId = group.items[0].id
@ -166,7 +171,7 @@ describe('Evaluation', () => {
},
})
render(<Evaluation resourceType="workflow" resourceId="app-3" />)
render(<Evaluation resourceType="apps" resourceId="app-3" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@ -181,7 +186,7 @@ describe('Evaluation', () => {
isLoading: false,
})
render(<Evaluation resourceType="workflow" resourceId="app-4" />)
render(<Evaluation resourceType="apps" resourceId="app-4" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@ -210,7 +215,7 @@ describe('Evaluation', () => {
},
})
render(<Evaluation resourceType="workflow" resourceId="app-5" />)
render(<Evaluation resourceType="apps" resourceId="app-5" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@ -224,7 +229,7 @@ describe('Evaluation', () => {
})
it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
render(<Evaluation resourceType="pipeline" resourceId="dataset-1" />)
render(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
@ -236,7 +241,7 @@ describe('Evaluation', () => {
})
it('should enable pipeline batch actions after selecting a judge model and metric', () => {
render(<Evaluation resourceType="pipeline" resourceId="dataset-2" />)
render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))

View File

@ -1,3 +1,4 @@
import type { EvaluationConfig } from '@/types/evaluation'
import { getEvaluationMockConfig } from '../mock'
import {
getAllowedOperators,
@ -12,7 +13,7 @@ describe('evaluation store', () => {
})
it('should configure a custom metric mapping to a valid state', () => {
const resourceType = 'workflow'
const resourceType = 'apps'
const resourceId = 'app-1'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@ -20,7 +21,7 @@ describe('evaluation store', () => {
store.ensureResource(resourceType, resourceId)
store.addCustomMetric(resourceType, resourceId)
const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
const initialMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
expect(initialMetric).toBeDefined()
expect(isCustomMetricConfigured(initialMetric!)).toBe(false)
@ -34,14 +35,14 @@ describe('evaluation store', () => {
targetVariableId: config.workflowOptions[0].targetVariables[0].id,
})
const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
const configuredMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id')
expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label)
})
it('should add and remove builtin metrics', () => {
const resourceType = 'workflow'
const resourceType = 'apps'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@ -49,16 +50,16 @@ describe('evaluation store', () => {
store.ensureResource(resourceType, resourceId)
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)
const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
const addedMetric = useEvaluationStore.getState().resources['apps:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
expect(addedMetric).toBeDefined()
store.removeMetric(resourceType, resourceId, addedMetric!.id)
expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
})
it('should upsert builtin metric node selections', () => {
const resourceType = 'workflow'
const resourceType = 'apps'
const resourceId = 'app-4'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@ -73,38 +74,38 @@ describe('evaluation store', () => {
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
])
const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId)
const metric = useEvaluationStore.getState().resources['apps:app-4'].metrics.find(item => item.optionId === metricId)
expect(metric?.nodeInfoList).toEqual([
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
])
expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
expect(useEvaluationStore.getState().resources['apps:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
})
it('should update condition groups and adapt operators to field types', () => {
const resourceType = 'pipeline'
const resourceType = 'datasets'
const resourceId = 'dataset-1'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
store.ensureResource(resourceType, resourceId)
const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
const initialGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or')
store.addConditionGroup(resourceType, resourceId)
const booleanField = config.fieldOptions.find(field => field.type === 'boolean')!
const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0]
const currentItem = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0].items[0]
store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id)
const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
const updatedGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
expect(updatedGroup.logicalOperator).toBe('or')
expect(updatedGroup.items[0].operator).toBe('is')
expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not'])
})
it('should support time fields and clear values for empty operators', () => {
const resourceType = 'workflow'
const resourceType = 'apps'
const resourceId = 'app-3'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@ -112,15 +113,89 @@ describe('evaluation store', () => {
store.ensureResource(resourceType, resourceId)
const timeField = config.fieldOptions.find(field => field.type === 'time')!
const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
const item = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]
store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id)
store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty')
store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, timeField.id)
store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, 'is_empty')
const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
const updatedItem = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]
expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty'])
expect(requiresConditionValue('is_empty')).toBe(false)
expect(updatedItem.value).toBeNull()
})
it('should hydrate resource state from evaluation config', () => {
const resourceType = 'apps'
const resourceId = 'app-5'
const store = useEvaluationStore.getState()
const config: EvaluationConfig = {
evaluation_model: 'gpt-4o-mini',
evaluation_model_provider: 'openai',
metrics_config: {
default_metrics: [{
metric: 'faithfulness',
node_info_list: [
{ node_id: 'node-1', title: 'Retriever', type: 'retriever' },
],
}],
customized_metrics: {
evaluation_workflow_id: 'workflow-precision-review',
input_fields: {
'app.input.query': 'query',
},
},
},
judgement_conditions: [{
logical_operator: 'or',
items: [{
field_id: 'system.has_context',
operator: 'is',
value: true,
}],
}],
}
store.ensureResource(resourceType, resourceId)
store.setBatchTab(resourceType, resourceId, 'history')
store.setUploadedFileName(resourceType, resourceId, 'batch.csv')
useEvaluationStore.setState(state => ({
resources: {
...state.resources,
'apps:app-5': {
...state.resources['apps:app-5'],
batchRecords: [{
id: 'batch-1',
fileName: 'batch.csv',
status: 'success',
startedAt: '10:00:00',
summary: 'App evaluation batch',
}],
},
},
}))
store.hydrateResource(resourceType, resourceId, config)
const hydratedState = useEvaluationStore.getState().resources['apps:app-5']
expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
expect(hydratedState.metrics).toHaveLength(2)
expect(hydratedState.metrics[0].optionId).toBe('faithfulness')
expect(hydratedState.metrics[0].nodeInfoList).toEqual([
{ node_id: 'node-1', title: 'Retriever', type: 'retriever' },
])
expect(hydratedState.metrics[1].kind).toBe('custom-workflow')
expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review')
expect(hydratedState.metrics[1].customConfig?.mappings[0].sourceFieldId).toBe('app.input.query')
expect(hydratedState.metrics[1].customConfig?.mappings[0].targetVariableId).toBe('query')
expect(hydratedState.conditions[0].logicalOperator).toBe('or')
expect(hydratedState.conditions[0].items[0]).toMatchObject({
fieldId: 'system.has_context',
operator: 'is',
value: true,
})
expect(hydratedState.activeBatchTab).toBe('history')
expect(hydratedState.uploadedFileName).toBe('batch.csv')
expect(hydratedState.batchRecords).toHaveLength(1)
})
})

View File

@ -1,13 +1,13 @@
'use client'
import type { EvaluationResourceProps } from '../types'
import type { EvaluationResourceProps } from '../../types'
import { useTranslation } from 'react-i18next'
import { useDocLink } from '@/context/i18n'
import BatchTestPanel from './batch-test-panel'
import ConditionsSection from './conditions-section'
import JudgeModelSelector from './judge-model-selector'
import MetricSection from './metric-section'
import SectionHeader, { InlineSectionHeader } from './section-header'
import BatchTestPanel from '../batch-test-panel'
import ConditionsSection from '../conditions-section'
import JudgeModelSelector from '../judge-model-selector'
import MetricSection from '../metric-section'
import SectionHeader, { InlineSectionHeader } from '../section-header'
const NonPipelineEvaluation = ({
resourceType,

View File

@ -1,6 +1,6 @@
'use client'
import type { EvaluationResourceProps, MetricOption } from '../types'
import type { EvaluationResourceProps, MetricOption } from '../../types'
import { useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
@ -11,10 +11,10 @@ import { toast } from '@/app/components/base/ui/toast'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
import { useDocLink } from '@/context/i18n'
import { cn } from '@/utils/classnames'
import { getEvaluationMockConfig } from '../mock'
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../store'
import JudgeModelSelector from './judge-model-selector'
import SectionHeader, { InlineSectionHeader } from './section-header'
import { getEvaluationMockConfig } from '../../mock'
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
import JudgeModelSelector from '../judge-model-selector'
import SectionHeader, { InlineSectionHeader } from '../section-header'
type PipelineMetricItemProps = {
metric: MetricOption

View File

@ -1,5 +1,5 @@
import { act, fireEvent, render, screen } from '@testing-library/react'
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
import { act, fireEvent, render, screen } from '@testing-library/react'
import MetricSection from '..'
import { useEvaluationStore } from '../../../store'
@ -13,7 +13,7 @@ vi.mock('@/service/use-evaluation', () => ({
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
}))
const resourceType = 'workflow' as const
const resourceType = 'apps' as const
const resourceId = 'metric-section-resource'
const renderMetricSection = () => {

View File

@ -21,7 +21,7 @@ const MetricSection = ({
const [nodeInfoMap, setNodeInfoMap] = useState<Record<string, NodeInfo[]>>({})
const hasMetrics = resource.metrics.length > 0
const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin')
const shouldLoadNodeInfo = resourceType !== 'pipeline' && !!resourceId && hasBuiltinMetrics
const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics
const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo)
const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation()
const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])

View File

@ -13,7 +13,7 @@ import {
type UseMetricSelectorDataOptions = {
open: boolean
query: string
resourceType: 'workflow' | 'pipeline' | 'snippet'
resourceType: 'apps' | 'datasets' | 'snippets'
resourceId: string
nodeInfoMap: Record<string, NodeInfo[]>
setNodeInfoMap: (value: Record<string, NodeInfo[]>) => void
@ -63,7 +63,7 @@ export const useMetricSelectorData = ({
if (!open)
return
if (resourceType === 'pipeline' || !resourceId || availableMetricIds.length === 0)
if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0)
return
let isActive = true
@ -107,7 +107,7 @@ export const useMetricSelectorData = ({
|| metric.label.toLowerCase().includes(keyword)
|| metric.description.toLowerCase().includes(keyword)
const metricNodes = nodeInfoMap[metric.id] ?? []
const supportsNodeSelection = resourceType !== 'pipeline'
const supportsNodeSelection = resourceType !== 'datasets'
const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0
if (hasNoNodeInfo) {

View File

@ -2,8 +2,8 @@ import type { MetricOption } from '../../types'
import type { MetricVisualTone } from './types'
import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation'
export const toEvaluationTargetType = (resourceType: 'workflow' | 'snippet'): EvaluationTargetType => {
return resourceType === 'snippet' ? 'snippets' : 'app'
export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => {
return resourceType === 'snippets' ? 'snippets' : 'app'
}
const humanizeMetricId = (metricId: string) => {

View File

@ -2,21 +2,31 @@
import type { EvaluationResourceProps } from './types'
import { useEffect } from 'react'
import NonPipelineEvaluation from './components/non-pipeline-evaluation'
import PipelineEvaluation from './components/pipeline-evaluation'
import { useEvaluationConfig } from '@/service/use-evaluation'
import NonPipelineEvaluation from './components/layout/non-pipeline-evaluation'
import PipelineEvaluation from './components/layout/pipeline-evaluation'
import { useEvaluationStore } from './store'
const Evaluation = ({
resourceType,
resourceId,
}: EvaluationResourceProps) => {
const { data: config } = useEvaluationConfig(resourceType, resourceId)
const ensureResource = useEvaluationStore(state => state.ensureResource)
const hydrateResource = useEvaluationStore(state => state.hydrateResource)
useEffect(() => {
ensureResource(resourceType, resourceId)
}, [ensureResource, resourceId, resourceType])
if (resourceType === 'pipeline') {
useEffect(() => {
if (!config)
return
hydrateResource(resourceType, resourceId, config)
}, [config, hydrateResource, resourceId, resourceType])
if (resourceType === 'datasets') {
return (
<PipelineEvaluation
resourceType={resourceType}

View File

@ -160,7 +160,7 @@ export const getDefaultOperator = (fieldType: EvaluationFieldOption['type']): Co
}
export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): EvaluationMockConfig => {
if (resourceType === 'pipeline') {
if (resourceType === 'datasets') {
return {
judgeModels,
builtinMetrics: pipelineBuiltinMetrics,
@ -176,7 +176,7 @@ export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): E
}
}
if (resourceType === 'snippet') {
if (resourceType === 'snippets') {
return {
judgeModels,
builtinMetrics,

View File

@ -9,24 +9,195 @@ import type {
JudgmentConditionGroup,
MetricOption,
} from './types'
import type { NodeInfo } from '@/types/evaluation'
import type {
EvaluationConditionValue,
EvaluationConfig,
EvaluationCustomizedMetric,
EvaluationDefaultMetric,
EvaluationJudgementConditionGroup,
EvaluationJudgementConditionItem,
EvaluationMetricsConfig,
NodeInfo,
} from '@/types/evaluation'
import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
import { encodeModelSelection } from './utils'
type EvaluationStoreResources = Record<string, EvaluationResourceState>
const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
const humanizeMetricId = (metricId: string) => {
return metricId
.split(/[-_]/g)
.filter(Boolean)
.map(part => part.charAt(0).toUpperCase() + part.slice(1))
.join(' ')
}
const resolveMetricOption = (resourceType: EvaluationResourceType, metricId: string): MetricOption => {
const config = getEvaluationMockConfig(resourceType)
return config.builtinMetrics.find(metric => metric.id === metricId) ?? {
id: metricId,
label: humanizeMetricId(metricId),
description: '',
group: config.builtinMetrics[0]?.group ?? 'other',
badges: ['Built-in'],
}
}
const normalizeNodeInfoList = (value: NodeInfo[] | undefined): NodeInfo[] => {
if (!value?.length)
return []
return value
.map((item) => {
const nodeId = typeof item.node_id === 'string' ? item.node_id : ''
const title = typeof item.title === 'string' ? item.title : nodeId
const type = typeof item.type === 'string' ? item.type : ''
if (!nodeId)
return null
return {
node_id: nodeId,
title,
type,
}
})
.filter((item): item is NodeInfo => !!item)
}
const normalizeDefaultMetrics = (
resourceType: EvaluationResourceType,
value: EvaluationDefaultMetric[] | undefined,
): EvaluationMetric[] => {
if (!value?.length)
return []
return value
.map((item) => {
const metricId = typeof item.metric === 'string' ? item.metric : ''
if (!metricId)
return null
const metricOption = resolveMetricOption(resourceType, metricId)
return createBuiltinMetric(metricOption, normalizeNodeInfoList(item.node_info_list ?? []))
})
.filter((item): item is EvaluationMetric => !!item)
}
const normalizeCustomMetricMappings = (
value: EvaluationCustomizedMetric['input_fields'],
): CustomMetricMapping[] => {
if (!value)
return [createCustomMetricMapping()]
const mappings = Object.entries(value)
.filter((entry): entry is [string, string] => {
const [, targetVariableId] = entry
return typeof targetVariableId === 'string' && !!targetVariableId
})
.map(([sourceFieldId, targetVariableId]) => ({
id: createId('mapping'),
sourceFieldId,
targetVariableId,
}))
return mappings.length > 0 ? mappings : [createCustomMetricMapping()]
}
const normalizeCustomMetric = (
value: EvaluationCustomizedMetric | null | undefined,
): EvaluationMetric[] => {
if (!value)
return []
const workflowId = typeof value.evaluation_workflow_id === 'string' ? value.evaluation_workflow_id : null
if (!workflowId)
return []
const customMetric = createCustomMetric()
return [{
...customMetric,
customConfig: customMetric.customConfig
? {
...customMetric.customConfig,
workflowId,
mappings: normalizeCustomMetricMappings(value.input_fields),
}
: customMetric.customConfig,
}]
}
const normalizeConditionItem = (
resourceType: EvaluationResourceType,
value: EvaluationJudgementConditionItem,
): JudgmentConditionGroup['items'][number] => {
const fieldId = typeof value.fieldId === 'string'
? value.fieldId
: typeof value.field_id === 'string'
? value.field_id
: null
const operatorValue = typeof value.operator === 'string' ? value.operator : null
const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
const allowedOperators = field ? getComparisonOperators(field.type) : ['contains']
const operator = operatorValue && allowedOperators.includes(operatorValue as ComparisonOperator)
? operatorValue as ComparisonOperator
: field
? getDefaultOperator(field.type)
: 'contains'
const rawValue: EvaluationConditionValue = value.value ?? null
return {
id: typeof value.id === 'string' ? value.id : createId('condition'),
fieldId,
operator,
value: getConditionValue(field, operator, rawValue),
}
}
const normalizeConditionGroups = (
resourceType: EvaluationResourceType,
value: EvaluationConfig['judgement_conditions'],
): JudgmentConditionGroup[] => {
const groupsValue: EvaluationJudgementConditionGroup[] = Array.isArray(value)
? value
: Array.isArray(value?.groups)
? value.groups
: []
const groups = groupsValue
.map((group) => {
const itemsValue = Array.isArray(group.items) ? group.items : []
const items = itemsValue
.map(item => normalizeConditionItem(resourceType, item))
if (items.length === 0)
return null
return {
id: typeof group.id === 'string' ? group.id : createId('group'),
logicalOperator: group.logicalOperator === 'or' || group.logical_operator === 'or' ? 'or' : 'and',
items,
} satisfies JudgmentConditionGroup
})
.filter((group): group is JudgmentConditionGroup => !!group)
return groups.length > 0 ? groups : [createConditionGroup(resourceType)]
}
export const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}`
const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty']
export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator)
export const getConditionValue = (
export function getConditionValue(
field: EvaluationFieldOption | undefined,
operator: ComparisonOperator,
previousValue: string | number | boolean | null = null,
) => {
) {
if (!field || !requiresConditionValue(operator))
return null
@ -42,36 +213,42 @@ export const getConditionValue = (
return typeof previousValue === 'string' ? previousValue : null
}
export const createBuiltinMetric = (metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric => ({
id: createId('metric'),
optionId: metric.id,
kind: 'builtin',
label: metric.label,
description: metric.description,
badges: metric.badges,
nodeInfoList,
})
export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric {
return {
id: createId('metric'),
optionId: metric.id,
kind: 'builtin',
label: metric.label,
description: metric.description,
badges: metric.badges,
nodeInfoList,
}
}
export const createCustomMetricMapping = (): CustomMetricMapping => ({
id: createId('mapping'),
sourceFieldId: null,
targetVariableId: null,
})
export function createCustomMetricMapping(): CustomMetricMapping {
return {
id: createId('mapping'),
sourceFieldId: null,
targetVariableId: null,
}
}
export const createCustomMetric = (): EvaluationMetric => ({
id: createId('metric'),
optionId: createId('custom'),
kind: 'custom-workflow',
label: 'Custom Evaluator',
description: 'Map workflow variables to your evaluation inputs.',
badges: ['Workflow'],
customConfig: {
workflowId: null,
workflowAppId: null,
workflowName: null,
mappings: [createCustomMetricMapping()],
},
})
export function createCustomMetric(): EvaluationMetric {
return {
id: createId('metric'),
optionId: createId('custom'),
kind: 'custom-workflow',
label: 'Custom Evaluator',
description: 'Map workflow variables to your evaluation inputs.',
badges: ['Workflow'],
customConfig: {
workflowId: null,
workflowAppId: null,
workflowName: null,
mappings: [createCustomMetricMapping()],
},
}
}
export const buildConditionItem = (resourceType: EvaluationResourceType) => {
const field = getEvaluationMockConfig(resourceType).fieldOptions[0]
@ -85,11 +262,13 @@ export const buildConditionItem = (resourceType: EvaluationResourceType) => {
}
}
export const createConditionGroup = (resourceType: EvaluationResourceType): JudgmentConditionGroup => ({
id: createId('group'),
logicalOperator: 'and',
items: [buildConditionItem(resourceType)],
})
export function createConditionGroup(resourceType: EvaluationResourceType): JudgmentConditionGroup {
return {
id: createId('group'),
logicalOperator: 'and',
items: [buildConditionItem(resourceType)],
}
}
export const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
return {
@ -102,6 +281,24 @@ export const buildInitialState = (resourceType: EvaluationResourceType): Evaluat
}
}
export const buildStateFromEvaluationConfig = (
resourceType: EvaluationResourceType,
config: EvaluationConfig,
): EvaluationResourceState => {
const metricsConfig: EvaluationMetricsConfig = config.metrics_config ?? {}
const defaultMetrics = normalizeDefaultMetrics(resourceType, metricsConfig.default_metrics)
const customMetrics = normalizeCustomMetric(metricsConfig.customized_metrics)
return {
...buildInitialState(resourceType),
judgeModelId: config.evaluation_model && config.evaluation_model_provider
? encodeModelSelection(config.evaluation_model_provider, config.evaluation_model)
: null,
metrics: [...defaultMetrics, ...customMetrics],
conditions: normalizeConditionGroups(resourceType, config.judgement_conditions),
}
}
const getResourceState = (
resources: EvaluationStoreResources,
resourceType: EvaluationResourceType,

View File

@ -3,13 +3,14 @@ import type {
EvaluationResourceState,
EvaluationResourceType,
} from './types'
import type { NodeInfo } from '@/types/evaluation'
import type { EvaluationConfig, NodeInfo } from '@/types/evaluation'
import { create } from 'zustand'
import { getDefaultOperator, getEvaluationMockConfig } from './mock'
import {
buildConditionItem,
buildInitialState,
buildResourceKey,
buildStateFromEvaluationConfig,
createBatchTestRecord,
createBuiltinMetric,
createConditionGroup,
@ -28,6 +29,7 @@ import {
type EvaluationStore = {
resources: Record<string, EvaluationResourceState>
ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
hydrateResource: (resourceType: EvaluationResourceType, resourceId: string, config: EvaluationConfig) => void
setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void
addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
@ -82,6 +84,19 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
},
}))
},
hydrateResource: (resourceType, resourceId, config) => {
set(state => ({
resources: {
...state.resources,
[buildResourceKey(resourceType, resourceId)]: {
...buildStateFromEvaluationConfig(resourceType, config),
activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields',
uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null,
batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [],
},
},
}))
},
setJudgeModel: (resourceType, resourceId, judgeModelId) => {
set(state => ({
resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({

View File

@ -1,6 +1,6 @@
import type { NodeInfo } from '@/types/evaluation'
export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets'
export type EvaluationResourceProps = {
resourceType: EvaluationResourceType

View File

@ -22,7 +22,7 @@ const SnippetEvaluationPage = ({ snippetId }: SnippetEvaluationPageProps) => {
snippet={snippet}
section="evaluation"
>
<Evaluation resourceType="snippet" resourceId={snippetId} />
<Evaluation resourceType="snippets" resourceId={snippetId} />
</SnippetLayout>
)
}

View File

@ -1,4 +1,5 @@
import type { AvailableEvaluationWorkflowsResponse } from '@/types/evaluation'
import type { EvaluationResourceType } from '@/app/components/evaluation/types'
import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation'
import {
keepPreviousData,
useInfiniteQuery,
@ -28,6 +29,45 @@ const normalizeAvailableEvaluationWorkflowsParams = (params: AvailableEvaluation
}
}
const toEvaluationTargetType = (resourceType: Exclude<EvaluationResourceType, 'datasets'>) => {
return resourceType === 'snippets' ? 'snippets' : 'app'
}
const getEvaluationConfigQueryOptions = (
resourceType: EvaluationResourceType,
resourceId: string,
) => {
if (resourceType === 'datasets') {
return consoleQuery.datasetEvaluation.config.queryOptions({
input: {
params: {
datasetId: resourceId,
},
},
enabled: !!resourceId,
refetchOnWindowFocus: false,
})
}
return consoleQuery.evaluation.config.queryOptions({
input: {
params: {
targetType: toEvaluationTargetType(resourceType),
targetId: resourceId,
},
},
enabled: !!resourceId,
refetchOnWindowFocus: false,
})
}
export const useEvaluationConfig = (
resourceType: EvaluationResourceType,
resourceId: string,
) => {
return useQuery<EvaluationConfig>(getEvaluationConfigQueryOptions(resourceType, resourceId))
}
export const useAvailableEvaluationMetrics = (enabled = true) => {
return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({
enabled,

View File

@ -1,10 +1,38 @@
export type EvaluationTargetType = 'app' | 'snippets'
export type EvaluationMetricsConfig = {
default_metrics?: EvaluationDefaultMetric[]
customized_metrics?: EvaluationCustomizedMetric | null
}
export type EvaluationConditionValue = string | number | boolean | null
export type EvaluationJudgementConditionItem = {
id?: string
fieldId?: string
field_id?: string
operator?: string
value?: EvaluationConditionValue
}
export type EvaluationJudgementConditionGroup = {
id?: string
logicalOperator?: 'and' | 'or'
logical_operator?: 'and' | 'or'
items?: EvaluationJudgementConditionItem[]
}
export type EvaluationJudgementConditions
= | EvaluationJudgementConditionGroup[]
| {
groups?: EvaluationJudgementConditionGroup[]
}
export type EvaluationConfig = {
evaluation_model: string | null
evaluation_model_provider: string | null
metrics_config: Record<string, unknown> | null
judgement_conditions: Record<string, unknown> | null
metrics_config: EvaluationMetricsConfig | null
judgement_conditions: EvaluationJudgementConditions | null
}
export type NodeInfo = {
@ -20,8 +48,8 @@ export type EvaluationDefaultMetric = {
export type EvaluationCustomizedMetric = {
evaluation_workflow_id?: string
input_fields?: Record<string, unknown>
output_fields?: Record<string, unknown>[]
input_fields?: Record<string, string | null | undefined>
output_fields?: Array<Record<string, string | null | undefined>>
}
export type EvaluationConfigData = {
@ -29,7 +57,7 @@ export type EvaluationConfigData = {
evaluation_model_provider?: string
default_metrics?: EvaluationDefaultMetric[]
customized_metrics?: EvaluationCustomizedMetric | null
judgment_config?: Record<string, unknown> | null
judgment_config?: EvaluationJudgementConditions | null
}
export type EvaluationRunRequest = EvaluationConfigData & {