diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx index 161f37108d..d602c15d5e 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx @@ -5,7 +5,7 @@ const Page = async (props: { }) => { const { appId } = await props.params - return + return } export default Page diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx index d502266d16..97ba166391 100644 --- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx @@ -5,7 +5,7 @@ const Page = async (props: { }) => { const { datasetId } = await props.params - return + return } export default Page diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index 748ce5981f..302df31c9d 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -4,6 +4,7 @@ import { getEvaluationMockConfig } from '../mock' import { useEvaluationStore } from '../store' const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) +const mockUseEvaluationConfig = vi.hoisted(() => vi.fn()) const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn()) vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({ @@ -38,6 +39,7 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec })) vi.mock('@/service/use-evaluation', () => ({ + useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args), useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args), useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args), })) @@ -46,6 +48,9 @@ describe('Evaluation', () => { beforeEach(() => { useEvaluationStore.setState({ resources: {} }) vi.clearAllMocks() + mockUseEvaluationConfig.mockReturnValue({ + data: null, + }) mockUseAvailableEvaluationMetrics.mockReturnValue({ data: { @@ -72,7 +77,7 @@ describe('Evaluation', () => { it('should search, select metric nodes, and create a batch history record', async () => { vi.useFakeTimers() - render() + render() expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini') @@ -113,7 +118,7 @@ describe('Evaluation', () => { }) it('should render time placeholders and hide the value row for empty operators', () => { - const resourceType = 'workflow' + const resourceType = 'apps' const resourceId = 'app-2' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) @@ -126,7 +131,7 @@ describe('Evaluation', () => { store.ensureResource(resourceType, resourceId) store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini') - const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0] + const group = useEvaluationStore.getState().resources['apps:app-2'].conditions[0] groupId = group.id itemId = group.items[0].id @@ -166,7 +171,7 @@ describe('Evaluation', () => { }, }) - render() + render() fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) @@ -181,7 +186,7 @@ describe('Evaluation', () => { isLoading: false, }) - render() + render() fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) @@ -210,7 +215,7 @@ describe('Evaluation', () => { }, }) - render() + render() fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) @@ -224,7 +229,7 @@ describe('Evaluation', () => { }) it('should render the pipeline-specific layout without auto-selecting a judge model', () => { - render() + render() expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty') expect(screen.getByText('evaluation.history.title')).toBeInTheDocument() @@ -236,7 +241,7 @@ describe('Evaluation', () => { }) it('should enable pipeline batch actions after selecting a judge model and metric', () => { - render() + render() fireEvent.click(screen.getByRole('button', { name: 'select-model' })) fireEvent.click(screen.getByRole('button', { name: /Context Precision/i })) diff --git a/web/app/components/evaluation/__tests__/store.spec.ts b/web/app/components/evaluation/__tests__/store.spec.ts index 698231bf6f..9415e8430a 100644 --- a/web/app/components/evaluation/__tests__/store.spec.ts +++ b/web/app/components/evaluation/__tests__/store.spec.ts @@ -1,3 +1,4 @@ +import type { EvaluationConfig } from '@/types/evaluation' import { getEvaluationMockConfig } from '../mock' import { getAllowedOperators, @@ -12,7 +13,7 @@ describe('evaluation store', () => { }) it('should configure a custom metric mapping to a valid state', () => { - const resourceType = 'workflow' + const resourceType = 'apps' const resourceId = 'app-1' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) @@ -20,7 +21,7 @@ describe('evaluation store', () => { store.ensureResource(resourceType, resourceId) store.addCustomMetric(resourceType, resourceId) - const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow') + const initialMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.kind === 'custom-workflow') expect(initialMetric).toBeDefined() expect(isCustomMetricConfigured(initialMetric!)).toBe(false) @@ -34,14 +35,14 @@ describe('evaluation store', () => { targetVariableId: config.workflowOptions[0].targetVariables[0].id, }) - const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id) + const configuredMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id) expect(isCustomMetricConfigured(configuredMetric!)).toBe(true) expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id') expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label) }) it('should add and remove builtin metrics', () => { - const resourceType = 'workflow' + const resourceType = 'apps' const resourceId = 'app-2' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) @@ -49,16 +50,16 @@ describe('evaluation store', () => { store.ensureResource(resourceType, resourceId) store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id) - const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id) + const addedMetric = useEvaluationStore.getState().resources['apps:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id) expect(addedMetric).toBeDefined() store.removeMetric(resourceType, resourceId, addedMetric!.id) - expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false) + expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false) }) it('should upsert builtin metric node selections', () => { - const resourceType = 'workflow' + const resourceType = 'apps' const resourceId = 'app-4' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) @@ -73,38 +74,38 @@ describe('evaluation store', () => { { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' }, ]) - const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId) + const metric = useEvaluationStore.getState().resources['apps:app-4'].metrics.find(item => item.optionId === metricId) expect(metric?.nodeInfoList).toEqual([ { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' }, ]) - expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1) + expect(useEvaluationStore.getState().resources['apps:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1) }) it('should update condition groups and adapt operators to field types', () => { - const resourceType = 'pipeline' + const resourceType = 'datasets' const resourceId = 'dataset-1' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) store.ensureResource(resourceType, resourceId) - const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0] + const initialGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0] store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or') store.addConditionGroup(resourceType, resourceId) const booleanField = config.fieldOptions.find(field => field.type === 'boolean')! - const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0] + const currentItem = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0].items[0] store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id) - const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0] + const updatedGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0] expect(updatedGroup.logicalOperator).toBe('or') expect(updatedGroup.items[0].operator).toBe('is') expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not']) }) it('should support time fields and clear values for empty operators', () => { - const resourceType = 'workflow' + const resourceType = 'apps' const resourceId = 'app-3' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) @@ -112,15 +113,89 @@ describe('evaluation store', () => { store.ensureResource(resourceType, resourceId) const timeField = config.fieldOptions.find(field => field.type === 'time')! - const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0] + const item = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0] - store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id) - store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty') + store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, timeField.id) + store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, 'is_empty') - const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0] + const updatedItem = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0] expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty']) expect(requiresConditionValue('is_empty')).toBe(false) expect(updatedItem.value).toBeNull() }) + + it('should hydrate resource state from evaluation config', () => { + const resourceType = 'apps' + const resourceId = 'app-5' + const store = useEvaluationStore.getState() + const config: EvaluationConfig = { + evaluation_model: 'gpt-4o-mini', + evaluation_model_provider: 'openai', + metrics_config: { + default_metrics: [{ + metric: 'faithfulness', + node_info_list: [ + { node_id: 'node-1', title: 'Retriever', type: 'retriever' }, + ], + }], + customized_metrics: { + evaluation_workflow_id: 'workflow-precision-review', + input_fields: { + 'app.input.query': 'query', + }, + }, + }, + judgement_conditions: [{ + logical_operator: 'or', + items: [{ + field_id: 'system.has_context', + operator: 'is', + value: true, + }], + }], + } + + store.ensureResource(resourceType, resourceId) + store.setBatchTab(resourceType, resourceId, 'history') + store.setUploadedFileName(resourceType, resourceId, 'batch.csv') + useEvaluationStore.setState(state => ({ + resources: { + ...state.resources, + 'apps:app-5': { + ...state.resources['apps:app-5'], + batchRecords: [{ + id: 'batch-1', + fileName: 'batch.csv', + status: 'success', + startedAt: '10:00:00', + summary: 'App evaluation batch', + }], + }, + }, + })) + store.hydrateResource(resourceType, resourceId, config) + + const hydratedState = useEvaluationStore.getState().resources['apps:app-5'] + + expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini') + expect(hydratedState.metrics).toHaveLength(2) + expect(hydratedState.metrics[0].optionId).toBe('faithfulness') + expect(hydratedState.metrics[0].nodeInfoList).toEqual([ + { node_id: 'node-1', title: 'Retriever', type: 'retriever' }, + ]) + expect(hydratedState.metrics[1].kind).toBe('custom-workflow') + expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review') + expect(hydratedState.metrics[1].customConfig?.mappings[0].sourceFieldId).toBe('app.input.query') + expect(hydratedState.metrics[1].customConfig?.mappings[0].targetVariableId).toBe('query') + expect(hydratedState.conditions[0].logicalOperator).toBe('or') + expect(hydratedState.conditions[0].items[0]).toMatchObject({ + fieldId: 'system.has_context', + operator: 'is', + value: true, + }) + expect(hydratedState.activeBatchTab).toBe('history') + expect(hydratedState.uploadedFileName).toBe('batch.csv') + expect(hydratedState.batchRecords).toHaveLength(1) + }) }) diff --git a/web/app/components/evaluation/components/non-pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx similarity index 85% rename from web/app/components/evaluation/components/non-pipeline-evaluation.tsx rename to web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx index 1b7d507339..5d47a754ff 100644 --- a/web/app/components/evaluation/components/non-pipeline-evaluation.tsx +++ b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx @@ -1,13 +1,13 @@ 'use client' -import type { EvaluationResourceProps } from '../types' +import type { EvaluationResourceProps } from '../../types' import { useTranslation } from 'react-i18next' import { useDocLink } from '@/context/i18n' -import BatchTestPanel from './batch-test-panel' -import ConditionsSection from './conditions-section' -import JudgeModelSelector from './judge-model-selector' -import MetricSection from './metric-section' -import SectionHeader, { InlineSectionHeader } from './section-header' +import BatchTestPanel from '../batch-test-panel' +import ConditionsSection from '../conditions-section' +import JudgeModelSelector from '../judge-model-selector' +import MetricSection from '../metric-section' +import SectionHeader, { InlineSectionHeader } from '../section-header' const NonPipelineEvaluation = ({ resourceType, diff --git a/web/app/components/evaluation/components/pipeline-evaluation.tsx b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx similarity index 97% rename from web/app/components/evaluation/components/pipeline-evaluation.tsx rename to web/app/components/evaluation/components/layout/pipeline-evaluation.tsx index 4464b85a73..73ba754f28 100644 --- a/web/app/components/evaluation/components/pipeline-evaluation.tsx +++ b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx @@ -1,6 +1,6 @@ 'use client' -import type { EvaluationResourceProps, MetricOption } from '../types' +import type { EvaluationResourceProps, MetricOption } from '../../types' import { useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import Badge from '@/app/components/base/badge' @@ -11,10 +11,10 @@ import { toast } from '@/app/components/base/ui/toast' import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip' import { useDocLink } from '@/context/i18n' import { cn } from '@/utils/classnames' -import { getEvaluationMockConfig } from '../mock' -import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../store' -import JudgeModelSelector from './judge-model-selector' -import SectionHeader, { InlineSectionHeader } from './section-header' +import { getEvaluationMockConfig } from '../../mock' +import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store' +import JudgeModelSelector from '../judge-model-selector' +import SectionHeader, { InlineSectionHeader } from '../section-header' type PipelineMetricItemProps = { metric: MetricOption diff --git a/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx b/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx index 7b1cb56daf..03343d2a1c 100644 --- a/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/components/metric-section/__tests__/index.spec.tsx @@ -1,5 +1,5 @@ -import { act, fireEvent, render, screen } from '@testing-library/react' import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { act, fireEvent, render, screen } from '@testing-library/react' import MetricSection from '..' import { useEvaluationStore } from '../../../store' @@ -13,7 +13,7 @@ vi.mock('@/service/use-evaluation', () => ({ useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args), })) -const resourceType = 'workflow' as const +const resourceType = 'apps' as const const resourceId = 'metric-section-resource' const renderMetricSection = () => { diff --git a/web/app/components/evaluation/components/metric-section/index.tsx b/web/app/components/evaluation/components/metric-section/index.tsx index eac266c996..7887b2733b 100644 --- a/web/app/components/evaluation/components/metric-section/index.tsx +++ b/web/app/components/evaluation/components/metric-section/index.tsx @@ -21,7 +21,7 @@ const MetricSection = ({ const [nodeInfoMap, setNodeInfoMap] = useState>({}) const hasMetrics = resource.metrics.length > 0 const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin') - const shouldLoadNodeInfo = resourceType !== 'pipeline' && !!resourceId && hasBuiltinMetrics + const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo) const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation() const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics]) diff --git a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts index 317b325a0f..46f8e9e740 100644 --- a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts +++ b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts @@ -13,7 +13,7 @@ import { type UseMetricSelectorDataOptions = { open: boolean query: string - resourceType: 'workflow' | 'pipeline' | 'snippet' + resourceType: 'apps' | 'datasets' | 'snippets' resourceId: string nodeInfoMap: Record setNodeInfoMap: (value: Record) => void @@ -63,7 +63,7 @@ export const useMetricSelectorData = ({ if (!open) return - if (resourceType === 'pipeline' || !resourceId || availableMetricIds.length === 0) + if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0) return let isActive = true @@ -107,7 +107,7 @@ export const useMetricSelectorData = ({ || metric.label.toLowerCase().includes(keyword) || metric.description.toLowerCase().includes(keyword) const metricNodes = nodeInfoMap[metric.id] ?? [] - const supportsNodeSelection = resourceType !== 'pipeline' + const supportsNodeSelection = resourceType !== 'datasets' const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0 if (hasNoNodeInfo) { diff --git a/web/app/components/evaluation/components/metric-selector/utils.ts b/web/app/components/evaluation/components/metric-selector/utils.ts index 8c369d9920..a88e5069d7 100644 --- a/web/app/components/evaluation/components/metric-selector/utils.ts +++ b/web/app/components/evaluation/components/metric-selector/utils.ts @@ -2,8 +2,8 @@ import type { MetricOption } from '../../types' import type { MetricVisualTone } from './types' import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation' -export const toEvaluationTargetType = (resourceType: 'workflow' | 'snippet'): EvaluationTargetType => { - return resourceType === 'snippet' ? 'snippets' : 'app' +export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => { + return resourceType === 'snippets' ? 'snippets' : 'app' } const humanizeMetricId = (metricId: string) => { diff --git a/web/app/components/evaluation/index.tsx b/web/app/components/evaluation/index.tsx index 5806bf140a..b12157b7fa 100644 --- a/web/app/components/evaluation/index.tsx +++ b/web/app/components/evaluation/index.tsx @@ -2,21 +2,31 @@ import type { EvaluationResourceProps } from './types' import { useEffect } from 'react' -import NonPipelineEvaluation from './components/non-pipeline-evaluation' -import PipelineEvaluation from './components/pipeline-evaluation' +import { useEvaluationConfig } from '@/service/use-evaluation' +import NonPipelineEvaluation from './components/layout/non-pipeline-evaluation' +import PipelineEvaluation from './components/layout/pipeline-evaluation' import { useEvaluationStore } from './store' const Evaluation = ({ resourceType, resourceId, }: EvaluationResourceProps) => { + const { data: config } = useEvaluationConfig(resourceType, resourceId) const ensureResource = useEvaluationStore(state => state.ensureResource) + const hydrateResource = useEvaluationStore(state => state.hydrateResource) useEffect(() => { ensureResource(resourceType, resourceId) }, [ensureResource, resourceId, resourceType]) - if (resourceType === 'pipeline') { + useEffect(() => { + if (!config) + return + + hydrateResource(resourceType, resourceId, config) + }, [config, hydrateResource, resourceId, resourceType]) + + if (resourceType === 'datasets') { return ( { - if (resourceType === 'pipeline') { + if (resourceType === 'datasets') { return { judgeModels, builtinMetrics: pipelineBuiltinMetrics, @@ -176,7 +176,7 @@ export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): E } } - if (resourceType === 'snippet') { + if (resourceType === 'snippets') { return { judgeModels, builtinMetrics, diff --git a/web/app/components/evaluation/store-utils.ts b/web/app/components/evaluation/store-utils.ts index a03c12d259..c1acc1300c 100644 --- a/web/app/components/evaluation/store-utils.ts +++ b/web/app/components/evaluation/store-utils.ts @@ -9,24 +9,195 @@ import type { JudgmentConditionGroup, MetricOption, } from './types' -import type { NodeInfo } from '@/types/evaluation' +import type { + EvaluationConditionValue, + EvaluationConfig, + EvaluationCustomizedMetric, + EvaluationDefaultMetric, + EvaluationJudgementConditionGroup, + EvaluationJudgementConditionItem, + EvaluationMetricsConfig, + NodeInfo, +} from '@/types/evaluation' import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock' +import { encodeModelSelection } from './utils' type EvaluationStoreResources = Record const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}` +const humanizeMetricId = (metricId: string) => { + return metricId + .split(/[-_]/g) + .filter(Boolean) + .map(part => part.charAt(0).toUpperCase() + part.slice(1)) + .join(' ') +} + +const resolveMetricOption = (resourceType: EvaluationResourceType, metricId: string): MetricOption => { + const config = getEvaluationMockConfig(resourceType) + return config.builtinMetrics.find(metric => metric.id === metricId) ?? { + id: metricId, + label: humanizeMetricId(metricId), + description: '', + group: config.builtinMetrics[0]?.group ?? 'other', + badges: ['Built-in'], + } +} + +const normalizeNodeInfoList = (value: NodeInfo[] | undefined): NodeInfo[] => { + if (!value?.length) + return [] + + return value + .map((item) => { + const nodeId = typeof item.node_id === 'string' ? item.node_id : '' + const title = typeof item.title === 'string' ? item.title : nodeId + const type = typeof item.type === 'string' ? item.type : '' + + if (!nodeId) + return null + + return { + node_id: nodeId, + title, + type, + } + }) + .filter((item): item is NodeInfo => !!item) +} + +const normalizeDefaultMetrics = ( + resourceType: EvaluationResourceType, + value: EvaluationDefaultMetric[] | undefined, +): EvaluationMetric[] => { + if (!value?.length) + return [] + + return value + .map((item) => { + const metricId = typeof item.metric === 'string' ? item.metric : '' + if (!metricId) + return null + + const metricOption = resolveMetricOption(resourceType, metricId) + return createBuiltinMetric(metricOption, normalizeNodeInfoList(item.node_info_list ?? [])) + }) + .filter((item): item is EvaluationMetric => !!item) +} + +const normalizeCustomMetricMappings = ( + value: EvaluationCustomizedMetric['input_fields'], +): CustomMetricMapping[] => { + if (!value) + return [createCustomMetricMapping()] + + const mappings = Object.entries(value) + .filter((entry): entry is [string, string] => { + const [, targetVariableId] = entry + return typeof targetVariableId === 'string' && !!targetVariableId + }) + .map(([sourceFieldId, targetVariableId]) => ({ + id: createId('mapping'), + sourceFieldId, + targetVariableId, + })) + + return mappings.length > 0 ? mappings : [createCustomMetricMapping()] +} + +const normalizeCustomMetric = ( + value: EvaluationCustomizedMetric | null | undefined, +): EvaluationMetric[] => { + if (!value) + return [] + + const workflowId = typeof value.evaluation_workflow_id === 'string' ? value.evaluation_workflow_id : null + if (!workflowId) + return [] + + const customMetric = createCustomMetric() + + return [{ + ...customMetric, + customConfig: customMetric.customConfig + ? { + ...customMetric.customConfig, + workflowId, + mappings: normalizeCustomMetricMappings(value.input_fields), + } + : customMetric.customConfig, + }] +} + +const normalizeConditionItem = ( + resourceType: EvaluationResourceType, + value: EvaluationJudgementConditionItem, +): JudgmentConditionGroup['items'][number] => { + const fieldId = typeof value.fieldId === 'string' + ? value.fieldId + : typeof value.field_id === 'string' + ? value.field_id + : null + const operatorValue = typeof value.operator === 'string' ? value.operator : null + const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId) + const allowedOperators = field ? getComparisonOperators(field.type) : ['contains'] + const operator = operatorValue && allowedOperators.includes(operatorValue as ComparisonOperator) + ? operatorValue as ComparisonOperator + : field + ? getDefaultOperator(field.type) + : 'contains' + const rawValue: EvaluationConditionValue = value.value ?? null + + return { + id: typeof value.id === 'string' ? value.id : createId('condition'), + fieldId, + operator, + value: getConditionValue(field, operator, rawValue), + } +} + +const normalizeConditionGroups = ( + resourceType: EvaluationResourceType, + value: EvaluationConfig['judgement_conditions'], +): JudgmentConditionGroup[] => { + const groupsValue: EvaluationJudgementConditionGroup[] = Array.isArray(value) + ? value + : Array.isArray(value?.groups) + ? value.groups + : [] + + const groups = groupsValue + .map((group) => { + const itemsValue = Array.isArray(group.items) ? group.items : [] + const items = itemsValue + .map(item => normalizeConditionItem(resourceType, item)) + + if (items.length === 0) + return null + + return { + id: typeof group.id === 'string' ? group.id : createId('group'), + logicalOperator: group.logicalOperator === 'or' || group.logical_operator === 'or' ? 'or' : 'and', + items, + } satisfies JudgmentConditionGroup + }) + .filter((group): group is JudgmentConditionGroup => !!group) + + return groups.length > 0 ? groups : [createConditionGroup(resourceType)] +} + export const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}` const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty'] export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator) -export const getConditionValue = ( +export function getConditionValue( field: EvaluationFieldOption | undefined, operator: ComparisonOperator, previousValue: string | number | boolean | null = null, -) => { +) { if (!field || !requiresConditionValue(operator)) return null @@ -42,36 +213,42 @@ export const getConditionValue = ( return typeof previousValue === 'string' ? previousValue : null } -export const createBuiltinMetric = (metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric => ({ - id: createId('metric'), - optionId: metric.id, - kind: 'builtin', - label: metric.label, - description: metric.description, - badges: metric.badges, - nodeInfoList, -}) +export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric { + return { + id: createId('metric'), + optionId: metric.id, + kind: 'builtin', + label: metric.label, + description: metric.description, + badges: metric.badges, + nodeInfoList, + } +} -export const createCustomMetricMapping = (): CustomMetricMapping => ({ - id: createId('mapping'), - sourceFieldId: null, - targetVariableId: null, -}) +export function createCustomMetricMapping(): CustomMetricMapping { + return { + id: createId('mapping'), + sourceFieldId: null, + targetVariableId: null, + } +} -export const createCustomMetric = (): EvaluationMetric => ({ - id: createId('metric'), - optionId: createId('custom'), - kind: 'custom-workflow', - label: 'Custom Evaluator', - description: 'Map workflow variables to your evaluation inputs.', - badges: ['Workflow'], - customConfig: { - workflowId: null, - workflowAppId: null, - workflowName: null, - mappings: [createCustomMetricMapping()], - }, -}) +export function createCustomMetric(): EvaluationMetric { + return { + id: createId('metric'), + optionId: createId('custom'), + kind: 'custom-workflow', + label: 'Custom Evaluator', + description: 'Map workflow variables to your evaluation inputs.', + badges: ['Workflow'], + customConfig: { + workflowId: null, + workflowAppId: null, + workflowName: null, + mappings: [createCustomMetricMapping()], + }, + } +} export const buildConditionItem = (resourceType: EvaluationResourceType) => { const field = getEvaluationMockConfig(resourceType).fieldOptions[0] @@ -85,11 +262,13 @@ export const buildConditionItem = (resourceType: EvaluationResourceType) => { } } -export const createConditionGroup = (resourceType: EvaluationResourceType): JudgmentConditionGroup => ({ - id: createId('group'), - logicalOperator: 'and', - items: [buildConditionItem(resourceType)], -}) +export function createConditionGroup(resourceType: EvaluationResourceType): JudgmentConditionGroup { + return { + id: createId('group'), + logicalOperator: 'and', + items: [buildConditionItem(resourceType)], + } +} export const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => { return { @@ -102,6 +281,24 @@ export const buildInitialState = (resourceType: EvaluationResourceType): Evaluat } } +export const buildStateFromEvaluationConfig = ( + resourceType: EvaluationResourceType, + config: EvaluationConfig, +): EvaluationResourceState => { + const metricsConfig: EvaluationMetricsConfig = config.metrics_config ?? {} + const defaultMetrics = normalizeDefaultMetrics(resourceType, metricsConfig.default_metrics) + const customMetrics = normalizeCustomMetric(metricsConfig.customized_metrics) + + return { + ...buildInitialState(resourceType), + judgeModelId: config.evaluation_model && config.evaluation_model_provider + ? encodeModelSelection(config.evaluation_model_provider, config.evaluation_model) + : null, + metrics: [...defaultMetrics, ...customMetrics], + conditions: normalizeConditionGroups(resourceType, config.judgement_conditions), + } +} + const getResourceState = ( resources: EvaluationStoreResources, resourceType: EvaluationResourceType, diff --git a/web/app/components/evaluation/store.ts b/web/app/components/evaluation/store.ts index c91744da9b..61e1f773bb 100644 --- a/web/app/components/evaluation/store.ts +++ b/web/app/components/evaluation/store.ts @@ -3,13 +3,14 @@ import type { EvaluationResourceState, EvaluationResourceType, } from './types' -import type { NodeInfo } from '@/types/evaluation' +import type { EvaluationConfig, NodeInfo } from '@/types/evaluation' import { create } from 'zustand' import { getDefaultOperator, getEvaluationMockConfig } from './mock' import { buildConditionItem, buildInitialState, buildResourceKey, + buildStateFromEvaluationConfig, createBatchTestRecord, createBuiltinMetric, createConditionGroup, @@ -28,6 +29,7 @@ import { type EvaluationStore = { resources: Record ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void + hydrateResource: (resourceType: EvaluationResourceType, resourceId: string, config: EvaluationConfig) => void setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void @@ -82,6 +84,19 @@ export const useEvaluationStore = create((set, get) => ({ }, })) }, + hydrateResource: (resourceType, resourceId, config) => { + set(state => ({ + resources: { + ...state.resources, + [buildResourceKey(resourceType, resourceId)]: { + ...buildStateFromEvaluationConfig(resourceType, config), + activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields', + uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null, + batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [], + }, + }, + })) + }, setJudgeModel: (resourceType, resourceId, judgeModelId) => { set(state => ({ resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({ diff --git a/web/app/components/evaluation/types.ts b/web/app/components/evaluation/types.ts index 7cdf67b3c9..936c47365e 100644 --- a/web/app/components/evaluation/types.ts +++ b/web/app/components/evaluation/types.ts @@ -1,6 +1,6 @@ import type { NodeInfo } from '@/types/evaluation' -export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet' +export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets' export type EvaluationResourceProps = { resourceType: EvaluationResourceType diff --git a/web/app/components/snippets/snippet-evaluation-page.tsx b/web/app/components/snippets/snippet-evaluation-page.tsx index 736f36fc1e..5691be1977 100644 --- a/web/app/components/snippets/snippet-evaluation-page.tsx +++ b/web/app/components/snippets/snippet-evaluation-page.tsx @@ -22,7 +22,7 @@ const SnippetEvaluationPage = ({ snippetId }: SnippetEvaluationPageProps) => { snippet={snippet} section="evaluation" > - + ) } diff --git a/web/service/use-evaluation.ts b/web/service/use-evaluation.ts index 45a5e5bc29..e57aa7e162 100644 --- a/web/service/use-evaluation.ts +++ b/web/service/use-evaluation.ts @@ -1,4 +1,5 @@ -import type { AvailableEvaluationWorkflowsResponse } from '@/types/evaluation' +import type { EvaluationResourceType } from '@/app/components/evaluation/types' +import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation' import { keepPreviousData, useInfiniteQuery, @@ -28,6 +29,45 @@ const normalizeAvailableEvaluationWorkflowsParams = (params: AvailableEvaluation } } +const toEvaluationTargetType = (resourceType: Exclude) => { + return resourceType === 'snippets' ? 'snippets' : 'app' +} + +const getEvaluationConfigQueryOptions = ( + resourceType: EvaluationResourceType, + resourceId: string, +) => { + if (resourceType === 'datasets') { + return consoleQuery.datasetEvaluation.config.queryOptions({ + input: { + params: { + datasetId: resourceId, + }, + }, + enabled: !!resourceId, + refetchOnWindowFocus: false, + }) + } + + return consoleQuery.evaluation.config.queryOptions({ + input: { + params: { + targetType: toEvaluationTargetType(resourceType), + targetId: resourceId, + }, + }, + enabled: !!resourceId, + refetchOnWindowFocus: false, + }) +} + +export const useEvaluationConfig = ( + resourceType: EvaluationResourceType, + resourceId: string, +) => { + return useQuery(getEvaluationConfigQueryOptions(resourceType, resourceId)) +} + export const useAvailableEvaluationMetrics = (enabled = true) => { return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({ enabled, diff --git a/web/types/evaluation.ts b/web/types/evaluation.ts index be9a532c26..0c92d34fc2 100644 --- a/web/types/evaluation.ts +++ b/web/types/evaluation.ts @@ -1,10 +1,38 @@ export type EvaluationTargetType = 'app' | 'snippets' +export type EvaluationMetricsConfig = { + default_metrics?: EvaluationDefaultMetric[] + customized_metrics?: EvaluationCustomizedMetric | null +} + +export type EvaluationConditionValue = string | number | boolean | null + +export type EvaluationJudgementConditionItem = { + id?: string + fieldId?: string + field_id?: string + operator?: string + value?: EvaluationConditionValue +} + +export type EvaluationJudgementConditionGroup = { + id?: string + logicalOperator?: 'and' | 'or' + logical_operator?: 'and' | 'or' + items?: EvaluationJudgementConditionItem[] +} + +export type EvaluationJudgementConditions + = | EvaluationJudgementConditionGroup[] + | { + groups?: EvaluationJudgementConditionGroup[] + } + export type EvaluationConfig = { evaluation_model: string | null evaluation_model_provider: string | null - metrics_config: Record | null - judgement_conditions: Record | null + metrics_config: EvaluationMetricsConfig | null + judgement_conditions: EvaluationJudgementConditions | null } export type NodeInfo = { @@ -20,8 +48,8 @@ export type EvaluationDefaultMetric = { export type EvaluationCustomizedMetric = { evaluation_workflow_id?: string - input_fields?: Record - output_fields?: Record[] + input_fields?: Record + output_fields?: Array> } export type EvaluationConfigData = { @@ -29,7 +57,7 @@ export type EvaluationConfigData = { evaluation_model_provider?: string default_metrics?: EvaluationDefaultMetric[] customized_metrics?: EvaluationCustomizedMetric | null - judgment_config?: Record | null + judgment_config?: EvaluationJudgementConditions | null } export type EvaluationRunRequest = EvaluationConfigData & {