diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx index 6257d38f77..c0e85f6e79 100644 --- a/web/app/components/evaluation/__tests__/index.spec.tsx +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -1,6 +1,5 @@ import { act, fireEvent, render, screen } from '@testing-library/react' import Evaluation from '..' -import { getEvaluationMockConfig } from '../mock' import { useEvaluationStore } from '../store' const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn()) @@ -121,22 +120,19 @@ describe('Evaluation', () => { const resourceType = 'apps' const resourceId = 'app-2' const store = useEvaluationStore.getState() - const config = getEvaluationMockConfig(resourceType) - - const stringField = config.fieldOptions.find(field => field.type === 'string')! - let groupId = '' - let itemId = '' + let conditionId = '' act(() => { store.ensureResource(resourceType, resourceId) store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini') + store.addBuiltinMetric(resourceType, resourceId, 'faithfulness', [ + { node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' }, + ]) + store.addCondition(resourceType, resourceId) - const group = useEvaluationStore.getState().resources['apps:app-2'].conditions[0] - groupId = group.id - itemId = group.items[0].id - - store.updateConditionField(resourceType, resourceId, groupId, itemId, stringField.id) - store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'contains') + const condition = useEvaluationStore.getState().resources['apps:app-2'].conditions.conditions[0] + conditionId = condition.id + store.updateConditionOperator(resourceType, resourceId, conditionId, '=') }) let rerender: ReturnType['rerender'] @@ -147,7 +143,7 @@ describe('Evaluation', () => { expect(screen.getByPlaceholderText('evaluation.conditions.valuePlaceholder')).toBeInTheDocument() act(() => { - store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'is_empty') + store.updateConditionOperator(resourceType, resourceId, conditionId, 'is null') rerender() }) @@ -249,7 +245,7 @@ describe('Evaluation', () => { metric: 'context-precision', }], customized_metrics: null, - judgement_conditions: null, + judgment_config: null, }, }) diff --git a/web/app/components/evaluation/__tests__/store.spec.ts b/web/app/components/evaluation/__tests__/store.spec.ts index ce85c32e65..a8dd8c188a 100644 --- a/web/app/components/evaluation/__tests__/store.spec.ts +++ b/web/app/components/evaluation/__tests__/store.spec.ts @@ -31,6 +31,11 @@ describe('evaluation store', () => { workflowName: config.workflowOptions[0].label, }) store.syncCustomMetricMappings(resourceType, resourceId, initialMetric!.id, ['query']) + store.syncCustomMetricOutputs(resourceType, resourceId, initialMetric!.id, [{ + id: 'score', + valueType: 'number', + }]) + const syncedMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id) store.updateCustomMetricMapping(resourceType, resourceId, initialMetric!.id, syncedMetric!.customConfig!.mappings[0].id, { outputVariableId: 'answer', @@ -40,6 +45,7 @@ describe('evaluation store', () => { expect(isCustomMetricConfigured(configuredMetric!)).toBe(true) expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id') expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label) + expect(configuredMetric!.customConfig!.outputs).toEqual([{ id: 'score', valueType: 'number' }]) }) it('should only add one custom metric', () => { @@ -77,7 +83,7 @@ describe('evaluation store', () => { expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false) }) - it('should upsert builtin metric node selections', () => { + it('should upsert builtin metric node selections and prune stale conditions', () => { const resourceType = 'apps' const resourceId = 'app-4' const store = useEvaluationStore.getState() @@ -88,63 +94,78 @@ describe('evaluation store', () => { store.addBuiltinMetric(resourceType, resourceId, metricId, [ { node_id: 'node-1', title: 'Answer Node', type: 'answer' }, ]) + store.addCondition(resourceType, resourceId) store.addBuiltinMetric(resourceType, resourceId, metricId, [ { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' }, ]) - const metric = useEvaluationStore.getState().resources['apps:app-4'].metrics.find(item => item.optionId === metricId) + const state = useEvaluationStore.getState().resources['apps:app-4'] + const metric = state.metrics.find(item => item.optionId === metricId) expect(metric?.nodeInfoList).toEqual([ { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' }, ]) - expect(useEvaluationStore.getState().resources['apps:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1) + expect(state.metrics.filter(item => item.optionId === metricId)).toHaveLength(1) + expect(state.conditions.conditions).toHaveLength(0) }) - it('should update condition groups and adapt operators to field types', () => { - const resourceType = 'datasets' - const resourceId = 'dataset-1' + it('should build numeric conditions from selected metrics', () => { + const resourceType = 'apps' + const resourceId = 'app-conditions' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) store.ensureResource(resourceType, resourceId) + store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[0].id, [ + { node_id: 'node-answer', title: 'Answer Node', type: 'llm' }, + ]) + store.setConditionLogicalOperator(resourceType, resourceId, 'or') + store.addCondition(resourceType, resourceId) - const initialGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0] - store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or') - store.addConditionGroup(resourceType, resourceId) + const state = useEvaluationStore.getState().resources['apps:app-conditions'] + const condition = state.conditions.conditions[0] - const booleanField = config.fieldOptions.find(field => field.type === 'boolean')! - const currentItem = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0].items[0] - store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id) - - const updatedGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0] - expect(updatedGroup.logicalOperator).toBe('or') - expect(updatedGroup.items[0].operator).toBe('is') - expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not']) + expect(state.conditions.logicalOperator).toBe('or') + expect(condition.variableSelector).toEqual(['node-answer', 'answer-correctness']) + expect(condition.comparisonOperator).toBe('=') + expect(getAllowedOperators(state.metrics, condition.variableSelector)).toEqual(['=', '≠', '>', '<', '≥', '≤', 'is null', 'is not null']) }) - it('should clear values for empty operators', () => { + it('should clear values for operators without values', () => { const resourceType = 'apps' const resourceId = 'app-3' const store = useEvaluationStore.getState() const config = getEvaluationMockConfig(resourceType) store.ensureResource(resourceType, resourceId) + store.addCustomMetric(resourceType, resourceId) - const stringField = config.fieldOptions.find(field => field.type === 'string')! - const item = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0] + const customMetric = useEvaluationStore.getState().resources['apps:app-3'].metrics.find(metric => metric.kind === 'custom-workflow')! + store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, { + workflowId: config.workflowOptions[0].id, + workflowAppId: 'custom-workflow-app-id', + workflowName: config.workflowOptions[0].label, + }) + store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{ + id: 'reason', + valueType: 'string', + }]) + store.addCondition(resourceType, resourceId) - store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, stringField.id) - store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, 'is_empty') + const condition = useEvaluationStore.getState().resources['apps:app-3'].conditions.conditions[0] - const updatedItem = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0] + store.updateConditionMetric(resourceType, resourceId, condition.id, [config.workflowOptions[0].id, 'reason']) + store.updateConditionValue(resourceType, resourceId, condition.id, 'needs follow-up') + store.updateConditionOperator(resourceType, resourceId, condition.id, 'empty') - expect(getAllowedOperators(resourceType, stringField.id)).toEqual(['contains', 'not_contains', 'is', 'is_not', 'is_empty', 'is_not_empty']) - expect(requiresConditionValue('is_empty')).toBe(false) - expect(updatedItem.value).toBeNull() + const updatedCondition = useEvaluationStore.getState().resources['apps:app-3'].conditions.conditions[0] + + expect(requiresConditionValue('empty')).toBe(false) + expect(updatedCondition.value).toBeNull() }) - it('should hydrate resource state from evaluation config', () => { + it('should hydrate resource state from judgment_config', () => { const resourceType = 'apps' const resourceId = 'app-5' const store = useEvaluationStore.getState() @@ -162,15 +183,19 @@ describe('evaluation store', () => { input_fields: { query: 'answer', }, - }, - judgement_conditions: [{ - logical_operator: 'or', - items: [{ - field_id: 'system.has_context', - operator: 'is', - value: true, + output_fields: [{ + variable: 'reason', + value_type: 'string', }], - }], + }, + judgment_config: { + logical_operator: 'or', + conditions: [{ + variable_selector: ['node-1', 'faithfulness'], + comparison_operator: '≥', + value: '0.9', + }], + }, } store.ensureResource(resourceType, resourceId) @@ -206,11 +231,12 @@ describe('evaluation store', () => { expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review') expect(hydratedState.metrics[1].customConfig?.mappings[0].inputVariableId).toBe('query') expect(hydratedState.metrics[1].customConfig?.mappings[0].outputVariableId).toBe('answer') - expect(hydratedState.conditions[0].logicalOperator).toBe('or') - expect(hydratedState.conditions[0].items[0]).toMatchObject({ - fieldId: 'system.has_context', - operator: 'is', - value: true, + expect(hydratedState.metrics[1].customConfig?.outputs).toEqual([{ id: 'reason', valueType: 'string' }]) + expect(hydratedState.conditions.logicalOperator).toBe('or') + expect(hydratedState.conditions.conditions[0]).toMatchObject({ + variableSelector: ['node-1', 'faithfulness'], + comparisonOperator: '≥', + value: '0.9', }) expect(hydratedState.activeBatchTab).toBe('history') expect(hydratedState.uploadedFileName).toBe('batch.csv') diff --git a/web/app/components/evaluation/components/conditions-section/condition-group.tsx b/web/app/components/evaluation/components/conditions-section/condition-group.tsx index 902194c460..c71aee5025 100644 --- a/web/app/components/evaluation/components/conditions-section/condition-group.tsx +++ b/web/app/components/evaluation/components/conditions-section/condition-group.tsx @@ -2,12 +2,12 @@ import type { ComparisonOperator, - EvaluationFieldOption, + ConditionMetricOption, EvaluationResourceProps, - JudgmentConditionGroup, + JudgmentConditionItem, } from '../../types' +import { useMemo } from 'react' import { useTranslation } from 'react-i18next' -import Badge from '@/app/components/base/badge' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import { @@ -20,79 +20,103 @@ import { SelectValue, } from '@/app/components/base/ui/select' import { cn } from '@/utils/classnames' -import { getEvaluationMockConfig } from '../../mock' -import { getAllowedOperators, requiresConditionValue, useEvaluationStore } from '../../store' -import { getFieldTypeIconClassName, getOperatorLabel, groupFieldOptions } from '../../utils' +import { getAllowedOperators, requiresConditionValue, useEvaluationResource, useEvaluationStore } from '../../store' +import { + buildConditionMetricOptions, + getComparisonOperatorLabel, + isSelectorEqual, + serializeVariableSelector, +} from '../../utils' -type ConditionFieldLabelProps = { - field?: EvaluationFieldOption +type ConditionMetricLabelProps = { + metric?: ConditionMetricOption placeholder: string } -type ConditionFieldSelectProps = { - field?: EvaluationFieldOption - fieldOptions: EvaluationFieldOption[] +type ConditionMetricSelectProps = { + metric?: ConditionMetricOption + metricOptions: ConditionMetricOption[] placeholder: string - onChange: (fieldId: string) => void + onChange: (variableSelector: [string, string]) => void } type ConditionOperatorSelectProps = { - field?: EvaluationFieldOption operator: ComparisonOperator operators: ComparisonOperator[] onChange: (operator: ComparisonOperator) => void } -type FieldValueInputProps = { - field?: EvaluationFieldOption - operator: ComparisonOperator - value: string | number | boolean | null - onChange: (value: string | number | boolean | null) => void +type ConditionValueInputProps = { + metric?: ConditionMetricOption + condition: JudgmentConditionItem + onChange: (value: string | string[] | boolean | null) => void } -type ConditionGroupProps = EvaluationResourceProps & { - group: JudgmentConditionGroup - index: number +type ConditionGroupProps = EvaluationResourceProps + +const getMetricValueTypeIconClassName = (valueType: ConditionMetricOption['valueType']) => { + if (valueType === 'number') + return 'i-ri-hashtag' + + if (valueType === 'boolean') + return 'i-ri-checkbox-circle-line' + + return 'i-ri-bar-chart-box-line' } -const ConditionFieldLabel = ({ - field, +const ConditionMetricLabel = ({ + metric, placeholder, -}: ConditionFieldLabelProps) => { - if (!field) +}: ConditionMetricLabelProps) => { + if (!metric) return {placeholder} return (
- - {field.label} + + {metric.label}
- {field.type} + {metric.group}
) } -const ConditionFieldSelect = ({ - field, - fieldOptions, +const ConditionMetricSelect = ({ + metric, + metricOptions, placeholder, onChange, -}: ConditionFieldSelectProps) => { +}: ConditionMetricSelectProps) => { + const groupedMetricOptions = useMemo(() => { + return Object.entries(metricOptions.reduce>((acc, option) => { + acc[option.group] = [...(acc[option.group] ?? []), option] + return acc + }, {})) + }, [metricOptions]) + return ( - { + const nextMetric = metricOptions.find(option => serializeVariableSelector(option.variableSelector) === value) + if (nextMetric) + onChange(nextMetric.variableSelector) + }} + > - + - - {groupFieldOptions(fieldOptions).map(([groupName, fields]) => ( + + {groupedMetricOptions.map(([groupName, options]) => ( {groupName} - {fields.map(option => ( - + {options.map(option => ( +
- + {option.label} + {option.description}
))} @@ -104,22 +128,21 @@ const ConditionFieldSelect = ({ } const ConditionOperatorSelect = ({ - field, operator, operators, onChange, }: ConditionOperatorSelectProps) => { - const { t } = useTranslation('evaluation') + const { t } = useTranslation() return ( onChange(nextValue === 'true')}> + onChange(nextValue)}> - - - - - {(field.options ?? []).map(option => ( - {option.label} - ))} - - - - ) - } + const isMultiValue = condition.comparisonOperator === 'in' || condition.comparisonOperator === 'not in' + const inputValue = Array.isArray(condition.value) + ? condition.value.join(', ') + : typeof condition.value === 'boolean' + ? '' + : condition.value ?? '' return (
{ - if (field.type === 'number') { - const nextValue = e.target.value - onChange(nextValue === '' ? null : Number(nextValue)) + if (isMultiValue) { + onChange(e.target.value.split(',').map(item => item.trim()).filter(Boolean)) return } - onChange(e.target.value) + onChange(e.target.value === '' ? null : e.target.value) }} />
@@ -195,20 +206,17 @@ const FieldValueInput = ({ const ConditionGroup = ({ resourceType, resourceId, - group, - index, }: ConditionGroupProps) => { const { t } = useTranslation('evaluation') - const config = getEvaluationMockConfig(resourceType) + const resource = useEvaluationResource(resourceType, resourceId) + const metricOptions = useMemo(() => buildConditionMetricOptions(resource.metrics), [resource.metrics]) const logicalLabels = { and: t('conditions.logical.and'), or: t('conditions.logical.or'), } - const removeConditionGroup = useEvaluationStore(state => state.removeConditionGroup) - const setConditionGroupOperator = useEvaluationStore(state => state.setConditionGroupOperator) - const addConditionItem = useEvaluationStore(state => state.addConditionItem) - const removeConditionItem = useEvaluationStore(state => state.removeConditionItem) - const updateConditionField = useEvaluationStore(state => state.updateConditionField) + const setConditionLogicalOperator = useEvaluationStore(state => state.setConditionLogicalOperator) + const removeCondition = useEvaluationStore(state => state.removeCondition) + const updateConditionMetric = useEvaluationStore(state => state.updateConditionMetric) const updateConditionOperator = useEvaluationStore(state => state.updateConditionOperator) const updateConditionValue = useEvaluationStore(state => state.updateConditionValue) @@ -216,7 +224,6 @@ const ConditionGroup = ({
- {t('conditions.groupLabel', { index: index + 1 })}
{(['and', 'or'] as const).map(operator => ( ))}
-
- - -
+
- {group.items.map((item) => { - const field = config.fieldOptions.find(option => option.id === item.fieldId) - const allowedOperators = getAllowedOperators(resourceType, item.fieldId) - const showValue = !!field && requiresConditionValue(item.operator) + {resource.conditions.conditions.map((condition) => { + const metric = metricOptions.find(option => isSelectorEqual(option.variableSelector, condition.variableSelector)) + const allowedOperators = getAllowedOperators(resource.metrics, condition.variableSelector) + const showValue = !!metric && requiresConditionValue(condition.comparisonOperator) return ( -
+
- updateConditionField(resourceType, resourceId, group.id, item.id, value)} + onChange={value => updateConditionMetric(resourceType, resourceId, condition.id, value)} />
updateConditionOperator(resourceType, resourceId, group.id, item.id, value)} + onChange={value => updateConditionOperator(resourceType, resourceId, condition.id, value)} />
{showValue && (
- updateConditionValue(resourceType, resourceId, group.id, item.id, value)} + updateConditionValue(resourceType, resourceId, condition.id, value)} />
)} @@ -292,7 +284,7 @@ const ConditionGroup = ({ size="small" variant="ghost" aria-label={t('conditions.removeCondition')} - onClick={() => removeConditionItem(resourceType, resourceId, group.id, item.id)} + onClick={() => removeCondition(resourceType, resourceId, condition.id)} >