diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx
index 161f37108d..d602c15d5e 100644
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx
@@ -5,7 +5,7 @@ const Page = async (props: {
}) => {
const { appId } = await props.params
- return
+ return
}
export default Page
diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx
index d502266d16..97ba166391 100644
--- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx
+++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx
@@ -5,7 +5,7 @@ const Page = async (props: {
}) => {
const { datasetId } = await props.params
- return
+ return
}
export default Page
diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx
index 748ce5981f..302df31c9d 100644
--- a/web/app/components/evaluation/__tests__/index.spec.tsx
+++ b/web/app/components/evaluation/__tests__/index.spec.tsx
@@ -4,6 +4,7 @@ import { getEvaluationMockConfig } from '../mock'
import { useEvaluationStore } from '../store'
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
+const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
@@ -38,6 +39,7 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec
}))
vi.mock('@/service/use-evaluation', () => ({
+ useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
}))
@@ -46,6 +48,9 @@ describe('Evaluation', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
vi.clearAllMocks()
+ mockUseEvaluationConfig.mockReturnValue({
+ data: null,
+ })
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
@@ -72,7 +77,7 @@ describe('Evaluation', () => {
it('should search, select metric nodes, and create a batch history record', async () => {
vi.useFakeTimers()
- render()
+ render()
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
@@ -113,7 +118,7 @@ describe('Evaluation', () => {
})
it('should render time placeholders and hide the value row for empty operators', () => {
- const resourceType = 'workflow'
+ const resourceType = 'apps'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@@ -126,7 +131,7 @@ describe('Evaluation', () => {
store.ensureResource(resourceType, resourceId)
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
- const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
+ const group = useEvaluationStore.getState().resources['apps:app-2'].conditions[0]
groupId = group.id
itemId = group.items[0].id
@@ -166,7 +171,7 @@ describe('Evaluation', () => {
},
})
- render()
+ render()
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@@ -181,7 +186,7 @@ describe('Evaluation', () => {
isLoading: false,
})
- render()
+ render()
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@@ -210,7 +215,7 @@ describe('Evaluation', () => {
},
})
- render()
+ render()
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
@@ -224,7 +229,7 @@ describe('Evaluation', () => {
})
it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
- render()
+ render()
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
@@ -236,7 +241,7 @@ describe('Evaluation', () => {
})
it('should enable pipeline batch actions after selecting a judge model and metric', () => {
- render()
+ render()
fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
diff --git a/web/app/components/evaluation/__tests__/store.spec.ts b/web/app/components/evaluation/__tests__/store.spec.ts
index 698231bf6f..9415e8430a 100644
--- a/web/app/components/evaluation/__tests__/store.spec.ts
+++ b/web/app/components/evaluation/__tests__/store.spec.ts
@@ -1,3 +1,4 @@
+import type { EvaluationConfig } from '@/types/evaluation'
import { getEvaluationMockConfig } from '../mock'
import {
getAllowedOperators,
@@ -12,7 +13,7 @@ describe('evaluation store', () => {
})
it('should configure a custom metric mapping to a valid state', () => {
- const resourceType = 'workflow'
+ const resourceType = 'apps'
const resourceId = 'app-1'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@@ -20,7 +21,7 @@ describe('evaluation store', () => {
store.ensureResource(resourceType, resourceId)
store.addCustomMetric(resourceType, resourceId)
- const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
+ const initialMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
expect(initialMetric).toBeDefined()
expect(isCustomMetricConfigured(initialMetric!)).toBe(false)
@@ -34,14 +35,14 @@ describe('evaluation store', () => {
targetVariableId: config.workflowOptions[0].targetVariables[0].id,
})
- const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
+ const configuredMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id')
expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label)
})
it('should add and remove builtin metrics', () => {
- const resourceType = 'workflow'
+ const resourceType = 'apps'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@@ -49,16 +50,16 @@ describe('evaluation store', () => {
store.ensureResource(resourceType, resourceId)
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)
- const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
+ const addedMetric = useEvaluationStore.getState().resources['apps:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
expect(addedMetric).toBeDefined()
store.removeMetric(resourceType, resourceId, addedMetric!.id)
- expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
+ expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
})
it('should upsert builtin metric node selections', () => {
- const resourceType = 'workflow'
+ const resourceType = 'apps'
const resourceId = 'app-4'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@@ -73,38 +74,38 @@ describe('evaluation store', () => {
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
])
- const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId)
+ const metric = useEvaluationStore.getState().resources['apps:app-4'].metrics.find(item => item.optionId === metricId)
expect(metric?.nodeInfoList).toEqual([
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
])
- expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
+ expect(useEvaluationStore.getState().resources['apps:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
})
it('should update condition groups and adapt operators to field types', () => {
- const resourceType = 'pipeline'
+ const resourceType = 'datasets'
const resourceId = 'dataset-1'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
store.ensureResource(resourceType, resourceId)
- const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
+ const initialGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or')
store.addConditionGroup(resourceType, resourceId)
const booleanField = config.fieldOptions.find(field => field.type === 'boolean')!
- const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0]
+ const currentItem = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0].items[0]
store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id)
- const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
+ const updatedGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
expect(updatedGroup.logicalOperator).toBe('or')
expect(updatedGroup.items[0].operator).toBe('is')
expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not'])
})
it('should support time fields and clear values for empty operators', () => {
- const resourceType = 'workflow'
+ const resourceType = 'apps'
const resourceId = 'app-3'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
@@ -112,15 +113,89 @@ describe('evaluation store', () => {
store.ensureResource(resourceType, resourceId)
const timeField = config.fieldOptions.find(field => field.type === 'time')!
- const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
+ const item = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]
- store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id)
- store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty')
+ store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, timeField.id)
+ store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, 'is_empty')
- const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
+ const updatedItem = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]
expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty'])
expect(requiresConditionValue('is_empty')).toBe(false)
expect(updatedItem.value).toBeNull()
})
+
+ it('should hydrate resource state from evaluation config', () => {
+ const resourceType = 'apps'
+ const resourceId = 'app-5'
+ const store = useEvaluationStore.getState()
+ const config: EvaluationConfig = {
+ evaluation_model: 'gpt-4o-mini',
+ evaluation_model_provider: 'openai',
+ metrics_config: {
+ default_metrics: [{
+ metric: 'faithfulness',
+ node_info_list: [
+ { node_id: 'node-1', title: 'Retriever', type: 'retriever' },
+ ],
+ }],
+ customized_metrics: {
+ evaluation_workflow_id: 'workflow-precision-review',
+ input_fields: {
+ 'app.input.query': 'query',
+ },
+ },
+ },
+ judgement_conditions: [{
+ logical_operator: 'or',
+ items: [{
+ field_id: 'system.has_context',
+ operator: 'is',
+ value: true,
+ }],
+ }],
+ }
+
+ store.ensureResource(resourceType, resourceId)
+ store.setBatchTab(resourceType, resourceId, 'history')
+ store.setUploadedFileName(resourceType, resourceId, 'batch.csv')
+ useEvaluationStore.setState(state => ({
+ resources: {
+ ...state.resources,
+ 'apps:app-5': {
+ ...state.resources['apps:app-5'],
+ batchRecords: [{
+ id: 'batch-1',
+ fileName: 'batch.csv',
+ status: 'success',
+ startedAt: '10:00:00',
+ summary: 'App evaluation batch',
+ }],
+ },
+ },
+ }))
+ store.hydrateResource(resourceType, resourceId, config)
+
+ const hydratedState = useEvaluationStore.getState().resources['apps:app-5']
+
+ expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
+ expect(hydratedState.metrics).toHaveLength(2)
+ expect(hydratedState.metrics[0].optionId).toBe('faithfulness')
+ expect(hydratedState.metrics[0].nodeInfoList).toEqual([
+ { node_id: 'node-1', title: 'Retriever', type: 'retriever' },
+ ])
+ expect(hydratedState.metrics[1].kind).toBe('custom-workflow')
+ expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review')
+ expect(hydratedState.metrics[1].customConfig?.mappings[0].sourceFieldId).toBe('app.input.query')
+ expect(hydratedState.metrics[1].customConfig?.mappings[0].targetVariableId).toBe('query')
+ expect(hydratedState.conditions[0].logicalOperator).toBe('or')
+ expect(hydratedState.conditions[0].items[0]).toMatchObject({
+ fieldId: 'system.has_context',
+ operator: 'is',
+ value: true,
+ })
+ expect(hydratedState.activeBatchTab).toBe('history')
+ expect(hydratedState.uploadedFileName).toBe('batch.csv')
+ expect(hydratedState.batchRecords).toHaveLength(1)
+ })
})
diff --git a/web/app/components/evaluation/components/non-pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx
similarity index 85%
rename from web/app/components/evaluation/components/non-pipeline-evaluation.tsx
rename to web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx
index 1b7d507339..5d47a754ff 100644
--- a/web/app/components/evaluation/components/non-pipeline-evaluation.tsx
+++ b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx
@@ -1,13 +1,13 @@
'use client'
-import type { EvaluationResourceProps } from '../types'
+import type { EvaluationResourceProps } from '../../types'
import { useTranslation } from 'react-i18next'
import { useDocLink } from '@/context/i18n'
-import BatchTestPanel from './batch-test-panel'
-import ConditionsSection from './conditions-section'
-import JudgeModelSelector from './judge-model-selector'
-import MetricSection from './metric-section'
-import SectionHeader, { InlineSectionHeader } from './section-header'
+import BatchTestPanel from '../batch-test-panel'
+import ConditionsSection from '../conditions-section'
+import JudgeModelSelector from '../judge-model-selector'
+import MetricSection from '../metric-section'
+import SectionHeader, { InlineSectionHeader } from '../section-header'
const NonPipelineEvaluation = ({
resourceType,
diff --git a/web/app/components/evaluation/components/pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
similarity index 97%
rename from web/app/components/evaluation/components/pipeline-evaluation.tsx
rename to web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
index 4464b85a73..73ba754f28 100644
--- a/web/app/components/evaluation/components/pipeline-evaluation.tsx
+++ b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
@@ -1,6 +1,6 @@
'use client'
-import type { EvaluationResourceProps, MetricOption } from '../types'
+import type { EvaluationResourceProps, MetricOption } from '../../types'
import { useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
@@ -11,10 +11,10 @@ import { toast } from '@/app/components/base/ui/toast'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
import { useDocLink } from '@/context/i18n'
import { cn } from '@/utils/classnames'
-import { getEvaluationMockConfig } from '../mock'
-import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../store'
-import JudgeModelSelector from './judge-model-selector'
-import SectionHeader, { InlineSectionHeader } from './section-header'
+import { getEvaluationMockConfig } from '../../mock'
+import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
+import JudgeModelSelector from '../judge-model-selector'
+import SectionHeader, { InlineSectionHeader } from '../section-header'
type PipelineMetricItemProps = {
metric: MetricOption
diff --git a/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx b/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx
index 7b1cb56daf..03343d2a1c 100644
--- a/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx
+++ b/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx
@@ -1,5 +1,5 @@
-import { act, fireEvent, render, screen } from '@testing-library/react'
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
+import { act, fireEvent, render, screen } from '@testing-library/react'
import MetricSection from '..'
import { useEvaluationStore } from '../../../store'
@@ -13,7 +13,7 @@ vi.mock('@/service/use-evaluation', () => ({
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
}))
-const resourceType = 'workflow' as const
+const resourceType = 'apps' as const
const resourceId = 'metric-section-resource'
const renderMetricSection = () => {
diff --git a/web/app/components/evaluation/components/metric-section/index.tsx b/web/app/components/evaluation/components/metric-section/index.tsx
index eac266c996..7887b2733b 100644
--- a/web/app/components/evaluation/components/metric-section/index.tsx
+++ b/web/app/components/evaluation/components/metric-section/index.tsx
@@ -21,7 +21,7 @@ const MetricSection = ({
const [nodeInfoMap, setNodeInfoMap] = useState>({})
const hasMetrics = resource.metrics.length > 0
const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin')
- const shouldLoadNodeInfo = resourceType !== 'pipeline' && !!resourceId && hasBuiltinMetrics
+ const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics
const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo)
const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation()
const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])
diff --git a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts
index 317b325a0f..46f8e9e740 100644
--- a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts
+++ b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts
@@ -13,7 +13,7 @@ import {
type UseMetricSelectorDataOptions = {
open: boolean
query: string
- resourceType: 'workflow' | 'pipeline' | 'snippet'
+ resourceType: 'apps' | 'datasets' | 'snippets'
resourceId: string
nodeInfoMap: Record
setNodeInfoMap: (value: Record) => void
@@ -63,7 +63,7 @@ export const useMetricSelectorData = ({
if (!open)
return
- if (resourceType === 'pipeline' || !resourceId || availableMetricIds.length === 0)
+ if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0)
return
let isActive = true
@@ -107,7 +107,7 @@ export const useMetricSelectorData = ({
|| metric.label.toLowerCase().includes(keyword)
|| metric.description.toLowerCase().includes(keyword)
const metricNodes = nodeInfoMap[metric.id] ?? []
- const supportsNodeSelection = resourceType !== 'pipeline'
+ const supportsNodeSelection = resourceType !== 'datasets'
const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0
if (hasNoNodeInfo) {
diff --git a/web/app/components/evaluation/components/metric-selector/utils.ts b/web/app/components/evaluation/components/metric-selector/utils.ts
index 8c369d9920..a88e5069d7 100644
--- a/web/app/components/evaluation/components/metric-selector/utils.ts
+++ b/web/app/components/evaluation/components/metric-selector/utils.ts
@@ -2,8 +2,8 @@ import type { MetricOption } from '../../types'
import type { MetricVisualTone } from './types'
import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation'
-export const toEvaluationTargetType = (resourceType: 'workflow' | 'snippet'): EvaluationTargetType => {
- return resourceType === 'snippet' ? 'snippets' : 'app'
+export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => {
+ return resourceType === 'snippets' ? 'snippets' : 'app'
}
const humanizeMetricId = (metricId: string) => {
diff --git a/web/app/components/evaluation/index.tsx b/web/app/components/evaluation/index.tsx
index 5806bf140a..b12157b7fa 100644
--- a/web/app/components/evaluation/index.tsx
+++ b/web/app/components/evaluation/index.tsx
@@ -2,21 +2,31 @@
import type { EvaluationResourceProps } from './types'
import { useEffect } from 'react'
-import NonPipelineEvaluation from './components/non-pipeline-evaluation'
-import PipelineEvaluation from './components/pipeline-evaluation'
+import { useEvaluationConfig } from '@/service/use-evaluation'
+import NonPipelineEvaluation from './components/layout/non-pipeline-evaluation'
+import PipelineEvaluation from './components/layout/pipeline-evaluation'
import { useEvaluationStore } from './store'
const Evaluation = ({
resourceType,
resourceId,
}: EvaluationResourceProps) => {
+ const { data: config } = useEvaluationConfig(resourceType, resourceId)
const ensureResource = useEvaluationStore(state => state.ensureResource)
+ const hydrateResource = useEvaluationStore(state => state.hydrateResource)
useEffect(() => {
ensureResource(resourceType, resourceId)
}, [ensureResource, resourceId, resourceType])
- if (resourceType === 'pipeline') {
+ useEffect(() => {
+ if (!config)
+ return
+
+ hydrateResource(resourceType, resourceId, config)
+ }, [config, hydrateResource, resourceId, resourceType])
+
+ if (resourceType === 'datasets') {
return (
{
- if (resourceType === 'pipeline') {
+ if (resourceType === 'datasets') {
return {
judgeModels,
builtinMetrics: pipelineBuiltinMetrics,
@@ -176,7 +176,7 @@ export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): E
}
}
- if (resourceType === 'snippet') {
+ if (resourceType === 'snippets') {
return {
judgeModels,
builtinMetrics,
diff --git a/web/app/components/evaluation/store-utils.ts b/web/app/components/evaluation/store-utils.ts
index a03c12d259..c1acc1300c 100644
--- a/web/app/components/evaluation/store-utils.ts
+++ b/web/app/components/evaluation/store-utils.ts
@@ -9,24 +9,195 @@ import type {
JudgmentConditionGroup,
MetricOption,
} from './types'
-import type { NodeInfo } from '@/types/evaluation'
+import type {
+ EvaluationConditionValue,
+ EvaluationConfig,
+ EvaluationCustomizedMetric,
+ EvaluationDefaultMetric,
+ EvaluationJudgementConditionGroup,
+ EvaluationJudgementConditionItem,
+ EvaluationMetricsConfig,
+ NodeInfo,
+} from '@/types/evaluation'
import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
+import { encodeModelSelection } from './utils'
type EvaluationStoreResources = Record
const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
+const humanizeMetricId = (metricId: string) => {
+ return metricId
+ .split(/[-_]/g)
+ .filter(Boolean)
+ .map(part => part.charAt(0).toUpperCase() + part.slice(1))
+ .join(' ')
+}
+
+const resolveMetricOption = (resourceType: EvaluationResourceType, metricId: string): MetricOption => {
+ const config = getEvaluationMockConfig(resourceType)
+ return config.builtinMetrics.find(metric => metric.id === metricId) ?? {
+ id: metricId,
+ label: humanizeMetricId(metricId),
+ description: '',
+ group: config.builtinMetrics[0]?.group ?? 'other',
+ badges: ['Built-in'],
+ }
+}
+
+const normalizeNodeInfoList = (value: NodeInfo[] | undefined): NodeInfo[] => {
+ if (!value?.length)
+ return []
+
+ return value
+ .map((item) => {
+ const nodeId = typeof item.node_id === 'string' ? item.node_id : ''
+ const title = typeof item.title === 'string' ? item.title : nodeId
+ const type = typeof item.type === 'string' ? item.type : ''
+
+ if (!nodeId)
+ return null
+
+ return {
+ node_id: nodeId,
+ title,
+ type,
+ }
+ })
+ .filter((item): item is NodeInfo => !!item)
+}
+
+const normalizeDefaultMetrics = (
+ resourceType: EvaluationResourceType,
+ value: EvaluationDefaultMetric[] | undefined,
+): EvaluationMetric[] => {
+ if (!value?.length)
+ return []
+
+ return value
+ .map((item) => {
+ const metricId = typeof item.metric === 'string' ? item.metric : ''
+ if (!metricId)
+ return null
+
+ const metricOption = resolveMetricOption(resourceType, metricId)
+ return createBuiltinMetric(metricOption, normalizeNodeInfoList(item.node_info_list ?? []))
+ })
+ .filter((item): item is EvaluationMetric => !!item)
+}
+
+const normalizeCustomMetricMappings = (
+ value: EvaluationCustomizedMetric['input_fields'],
+): CustomMetricMapping[] => {
+ if (!value)
+ return [createCustomMetricMapping()]
+
+ const mappings = Object.entries(value)
+ .filter((entry): entry is [string, string] => {
+ const [, targetVariableId] = entry
+ return typeof targetVariableId === 'string' && !!targetVariableId
+ })
+ .map(([sourceFieldId, targetVariableId]) => ({
+ id: createId('mapping'),
+ sourceFieldId,
+ targetVariableId,
+ }))
+
+ return mappings.length > 0 ? mappings : [createCustomMetricMapping()]
+}
+
+const normalizeCustomMetric = (
+ value: EvaluationCustomizedMetric | null | undefined,
+): EvaluationMetric[] => {
+ if (!value)
+ return []
+
+ const workflowId = typeof value.evaluation_workflow_id === 'string' ? value.evaluation_workflow_id : null
+ if (!workflowId)
+ return []
+
+ const customMetric = createCustomMetric()
+
+ return [{
+ ...customMetric,
+ customConfig: customMetric.customConfig
+ ? {
+ ...customMetric.customConfig,
+ workflowId,
+ mappings: normalizeCustomMetricMappings(value.input_fields),
+ }
+ : customMetric.customConfig,
+ }]
+}
+
+const normalizeConditionItem = (
+ resourceType: EvaluationResourceType,
+ value: EvaluationJudgementConditionItem,
+): JudgmentConditionGroup['items'][number] => {
+ const fieldId = typeof value.fieldId === 'string'
+ ? value.fieldId
+ : typeof value.field_id === 'string'
+ ? value.field_id
+ : null
+ const operatorValue = typeof value.operator === 'string' ? value.operator : null
+ const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
+ const allowedOperators = field ? getComparisonOperators(field.type) : ['contains']
+ const operator = operatorValue && allowedOperators.includes(operatorValue as ComparisonOperator)
+ ? operatorValue as ComparisonOperator
+ : field
+ ? getDefaultOperator(field.type)
+ : 'contains'
+ const rawValue: EvaluationConditionValue = value.value ?? null
+
+ return {
+ id: typeof value.id === 'string' ? value.id : createId('condition'),
+ fieldId,
+ operator,
+ value: getConditionValue(field, operator, rawValue),
+ }
+}
+
+const normalizeConditionGroups = (
+ resourceType: EvaluationResourceType,
+ value: EvaluationConfig['judgement_conditions'],
+): JudgmentConditionGroup[] => {
+ const groupsValue: EvaluationJudgementConditionGroup[] = Array.isArray(value)
+ ? value
+ : Array.isArray(value?.groups)
+ ? value.groups
+ : []
+
+ const groups = groupsValue
+ .map((group) => {
+ const itemsValue = Array.isArray(group.items) ? group.items : []
+ const items = itemsValue
+ .map(item => normalizeConditionItem(resourceType, item))
+
+ if (items.length === 0)
+ return null
+
+ return {
+ id: typeof group.id === 'string' ? group.id : createId('group'),
+ logicalOperator: group.logicalOperator === 'or' || group.logical_operator === 'or' ? 'or' : 'and',
+ items,
+ } satisfies JudgmentConditionGroup
+ })
+ .filter((group): group is JudgmentConditionGroup => !!group)
+
+ return groups.length > 0 ? groups : [createConditionGroup(resourceType)]
+}
+
export const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}`
const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty']
export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator)
-export const getConditionValue = (
+export function getConditionValue(
field: EvaluationFieldOption | undefined,
operator: ComparisonOperator,
previousValue: string | number | boolean | null = null,
-) => {
+) {
if (!field || !requiresConditionValue(operator))
return null
@@ -42,36 +213,42 @@ export const getConditionValue = (
return typeof previousValue === 'string' ? previousValue : null
}
-export const createBuiltinMetric = (metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric => ({
- id: createId('metric'),
- optionId: metric.id,
- kind: 'builtin',
- label: metric.label,
- description: metric.description,
- badges: metric.badges,
- nodeInfoList,
-})
+export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric {
+ return {
+ id: createId('metric'),
+ optionId: metric.id,
+ kind: 'builtin',
+ label: metric.label,
+ description: metric.description,
+ badges: metric.badges,
+ nodeInfoList,
+ }
+}
-export const createCustomMetricMapping = (): CustomMetricMapping => ({
- id: createId('mapping'),
- sourceFieldId: null,
- targetVariableId: null,
-})
+export function createCustomMetricMapping(): CustomMetricMapping {
+ return {
+ id: createId('mapping'),
+ sourceFieldId: null,
+ targetVariableId: null,
+ }
+}
-export const createCustomMetric = (): EvaluationMetric => ({
- id: createId('metric'),
- optionId: createId('custom'),
- kind: 'custom-workflow',
- label: 'Custom Evaluator',
- description: 'Map workflow variables to your evaluation inputs.',
- badges: ['Workflow'],
- customConfig: {
- workflowId: null,
- workflowAppId: null,
- workflowName: null,
- mappings: [createCustomMetricMapping()],
- },
-})
+export function createCustomMetric(): EvaluationMetric {
+ return {
+ id: createId('metric'),
+ optionId: createId('custom'),
+ kind: 'custom-workflow',
+ label: 'Custom Evaluator',
+ description: 'Map workflow variables to your evaluation inputs.',
+ badges: ['Workflow'],
+ customConfig: {
+ workflowId: null,
+ workflowAppId: null,
+ workflowName: null,
+ mappings: [createCustomMetricMapping()],
+ },
+ }
+}
export const buildConditionItem = (resourceType: EvaluationResourceType) => {
const field = getEvaluationMockConfig(resourceType).fieldOptions[0]
@@ -85,11 +262,13 @@ export const buildConditionItem = (resourceType: EvaluationResourceType) => {
}
}
-export const createConditionGroup = (resourceType: EvaluationResourceType): JudgmentConditionGroup => ({
- id: createId('group'),
- logicalOperator: 'and',
- items: [buildConditionItem(resourceType)],
-})
+export function createConditionGroup(resourceType: EvaluationResourceType): JudgmentConditionGroup {
+ return {
+ id: createId('group'),
+ logicalOperator: 'and',
+ items: [buildConditionItem(resourceType)],
+ }
+}
export const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
return {
@@ -102,6 +281,24 @@ export const buildInitialState = (resourceType: EvaluationResourceType): Evaluat
}
}
+export const buildStateFromEvaluationConfig = (
+ resourceType: EvaluationResourceType,
+ config: EvaluationConfig,
+): EvaluationResourceState => {
+ const metricsConfig: EvaluationMetricsConfig = config.metrics_config ?? {}
+ const defaultMetrics = normalizeDefaultMetrics(resourceType, metricsConfig.default_metrics)
+ const customMetrics = normalizeCustomMetric(metricsConfig.customized_metrics)
+
+ return {
+ ...buildInitialState(resourceType),
+ judgeModelId: config.evaluation_model && config.evaluation_model_provider
+ ? encodeModelSelection(config.evaluation_model_provider, config.evaluation_model)
+ : null,
+ metrics: [...defaultMetrics, ...customMetrics],
+ conditions: normalizeConditionGroups(resourceType, config.judgement_conditions),
+ }
+}
+
const getResourceState = (
resources: EvaluationStoreResources,
resourceType: EvaluationResourceType,
diff --git a/web/app/components/evaluation/store.ts b/web/app/components/evaluation/store.ts
index c91744da9b..61e1f773bb 100644
--- a/web/app/components/evaluation/store.ts
+++ b/web/app/components/evaluation/store.ts
@@ -3,13 +3,14 @@ import type {
EvaluationResourceState,
EvaluationResourceType,
} from './types'
-import type { NodeInfo } from '@/types/evaluation'
+import type { EvaluationConfig, NodeInfo } from '@/types/evaluation'
import { create } from 'zustand'
import { getDefaultOperator, getEvaluationMockConfig } from './mock'
import {
buildConditionItem,
buildInitialState,
buildResourceKey,
+ buildStateFromEvaluationConfig,
createBatchTestRecord,
createBuiltinMetric,
createConditionGroup,
@@ -28,6 +29,7 @@ import {
type EvaluationStore = {
resources: Record
ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
+ hydrateResource: (resourceType: EvaluationResourceType, resourceId: string, config: EvaluationConfig) => void
setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void
addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
@@ -82,6 +84,19 @@ export const useEvaluationStore = create((set, get) => ({
},
}))
},
+ hydrateResource: (resourceType, resourceId, config) => {
+ set(state => ({
+ resources: {
+ ...state.resources,
+ [buildResourceKey(resourceType, resourceId)]: {
+ ...buildStateFromEvaluationConfig(resourceType, config),
+ activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields',
+ uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null,
+ batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [],
+ },
+ },
+ }))
+ },
setJudgeModel: (resourceType, resourceId, judgeModelId) => {
set(state => ({
resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
diff --git a/web/app/components/evaluation/types.ts b/web/app/components/evaluation/types.ts
index 7cdf67b3c9..936c47365e 100644
--- a/web/app/components/evaluation/types.ts
+++ b/web/app/components/evaluation/types.ts
@@ -1,6 +1,6 @@
import type { NodeInfo } from '@/types/evaluation'
-export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
+export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets'
export type EvaluationResourceProps = {
resourceType: EvaluationResourceType
diff --git a/web/app/components/snippets/snippet-evaluation-page.tsx b/web/app/components/snippets/snippet-evaluation-page.tsx
index 736f36fc1e..5691be1977 100644
--- a/web/app/components/snippets/snippet-evaluation-page.tsx
+++ b/web/app/components/snippets/snippet-evaluation-page.tsx
@@ -22,7 +22,7 @@ const SnippetEvaluationPage = ({ snippetId }: SnippetEvaluationPageProps) => {
snippet={snippet}
section="evaluation"
>
-
+
)
}
diff --git a/web/service/use-evaluation.ts b/web/service/use-evaluation.ts
index 45a5e5bc29..e57aa7e162 100644
--- a/web/service/use-evaluation.ts
+++ b/web/service/use-evaluation.ts
@@ -1,4 +1,5 @@
-import type { AvailableEvaluationWorkflowsResponse } from '@/types/evaluation'
+import type { EvaluationResourceType } from '@/app/components/evaluation/types'
+import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation'
import {
keepPreviousData,
useInfiniteQuery,
@@ -28,6 +29,45 @@ const normalizeAvailableEvaluationWorkflowsParams = (params: AvailableEvaluation
}
}
+const toEvaluationTargetType = (resourceType: Exclude) => {
+ return resourceType === 'snippets' ? 'snippets' : 'app'
+}
+
+const getEvaluationConfigQueryOptions = (
+ resourceType: EvaluationResourceType,
+ resourceId: string,
+) => {
+ if (resourceType === 'datasets') {
+ return consoleQuery.datasetEvaluation.config.queryOptions({
+ input: {
+ params: {
+ datasetId: resourceId,
+ },
+ },
+ enabled: !!resourceId,
+ refetchOnWindowFocus: false,
+ })
+ }
+
+ return consoleQuery.evaluation.config.queryOptions({
+ input: {
+ params: {
+ targetType: toEvaluationTargetType(resourceType),
+ targetId: resourceId,
+ },
+ },
+ enabled: !!resourceId,
+ refetchOnWindowFocus: false,
+ })
+}
+
+export const useEvaluationConfig = (
+ resourceType: EvaluationResourceType,
+ resourceId: string,
+) => {
+ return useQuery(getEvaluationConfigQueryOptions(resourceType, resourceId))
+}
+
export const useAvailableEvaluationMetrics = (enabled = true) => {
return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({
enabled,
diff --git a/web/types/evaluation.ts b/web/types/evaluation.ts
index be9a532c26..0c92d34fc2 100644
--- a/web/types/evaluation.ts
+++ b/web/types/evaluation.ts
@@ -1,10 +1,38 @@
export type EvaluationTargetType = 'app' | 'snippets'
+export type EvaluationMetricsConfig = {
+ default_metrics?: EvaluationDefaultMetric[]
+ customized_metrics?: EvaluationCustomizedMetric | null
+}
+
+export type EvaluationConditionValue = string | number | boolean | null
+
+export type EvaluationJudgementConditionItem = {
+ id?: string
+ fieldId?: string
+ field_id?: string
+ operator?: string
+ value?: EvaluationConditionValue
+}
+
+export type EvaluationJudgementConditionGroup = {
+ id?: string
+ logicalOperator?: 'and' | 'or'
+ logical_operator?: 'and' | 'or'
+ items?: EvaluationJudgementConditionItem[]
+}
+
+export type EvaluationJudgementConditions
+ = | EvaluationJudgementConditionGroup[]
+ | {
+ groups?: EvaluationJudgementConditionGroup[]
+ }
+
export type EvaluationConfig = {
evaluation_model: string | null
evaluation_model_provider: string | null
- metrics_config: Record | null
- judgement_conditions: Record | null
+ metrics_config: EvaluationMetricsConfig | null
+ judgement_conditions: EvaluationJudgementConditions | null
}
export type NodeInfo = {
@@ -20,8 +48,8 @@ export type EvaluationDefaultMetric = {
export type EvaluationCustomizedMetric = {
evaluation_workflow_id?: string
- input_fields?: Record
- output_fields?: Record[]
+ input_fields?: Record
+ output_fields?: Array>
}
export type EvaluationConfigData = {
@@ -29,7 +57,7 @@ export type EvaluationConfigData = {
evaluation_model_provider?: string
default_metrics?: EvaluationDefaultMetric[]
customized_metrics?: EvaluationCustomizedMetric | null
- judgment_config?: Record | null
+ judgment_config?: EvaluationJudgementConditions | null
}
export type EvaluationRunRequest = EvaluationConfigData & {