mirror of
https://github.com/langgenius/dify.git
synced 2026-05-13 00:33:37 +08:00
348 lines
14 KiB
TypeScript
348 lines
14 KiB
TypeScript
import type { EvaluationConfig } from '@/types/evaluation'
|
|
import { getEvaluationMockConfig } from '../mock'
|
|
import {
|
|
getAllowedOperators,
|
|
isCustomMetricConfigured,
|
|
requiresConditionValue,
|
|
useEvaluationStore,
|
|
} from '../store'
|
|
import { buildEvaluationConfigPayload, buildEvaluationRunRequest } from '../store-utils'
|
|
|
|
describe('evaluation store', () => {
|
|
beforeEach(() => {
|
|
useEvaluationStore.setState({ resources: {} })
|
|
})
|
|
|
|
it('should configure a custom metric mapping to a valid state', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-1'
|
|
const store = useEvaluationStore.getState()
|
|
const config = getEvaluationMockConfig(resourceType)
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addCustomMetric(resourceType, resourceId)
|
|
|
|
const initialMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
|
|
expect(initialMetric).toBeDefined()
|
|
expect(isCustomMetricConfigured(initialMetric!)).toBe(false)
|
|
|
|
store.setCustomMetricWorkflow(resourceType, resourceId, initialMetric!.id, {
|
|
workflowId: config.workflowOptions[0].id,
|
|
workflowAppId: 'custom-workflow-app-id',
|
|
workflowName: config.workflowOptions[0].label,
|
|
})
|
|
store.syncCustomMetricMappings(resourceType, resourceId, initialMetric!.id, ['query'])
|
|
store.syncCustomMetricOutputs(resourceType, resourceId, initialMetric!.id, [{
|
|
id: 'score',
|
|
valueType: 'number',
|
|
}])
|
|
|
|
const syncedMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
|
|
store.updateCustomMetricMapping(resourceType, resourceId, initialMetric!.id, syncedMetric!.customConfig!.mappings[0].id, {
|
|
outputVariableId: 'answer',
|
|
})
|
|
|
|
const configuredMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
|
|
expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
|
|
expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id')
|
|
expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label)
|
|
expect(configuredMetric!.customConfig!.outputs).toEqual([{ id: 'score', valueType: 'number' }])
|
|
})
|
|
|
|
it('should only add one custom metric', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-custom-limit'
|
|
const store = useEvaluationStore.getState()
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addCustomMetric(resourceType, resourceId)
|
|
store.addCustomMetric(resourceType, resourceId)
|
|
|
|
expect(
|
|
useEvaluationStore
|
|
.getState()
|
|
.resources['apps:app-custom-limit']
|
|
.metrics
|
|
.filter(metric => metric.kind === 'custom-workflow'),
|
|
).toHaveLength(1)
|
|
})
|
|
|
|
it('should add and remove builtin metrics', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-2'
|
|
const store = useEvaluationStore.getState()
|
|
const config = getEvaluationMockConfig(resourceType)
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)
|
|
|
|
const addedMetric = useEvaluationStore.getState().resources['apps:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
|
|
expect(addedMetric).toBeDefined()
|
|
|
|
store.removeMetric(resourceType, resourceId, addedMetric!.id)
|
|
|
|
expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
|
|
})
|
|
|
|
it('should upsert builtin metric node selections and prune stale conditions', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-4'
|
|
const store = useEvaluationStore.getState()
|
|
const config = getEvaluationMockConfig(resourceType)
|
|
const metricId = config.builtinMetrics[0].id
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addBuiltinMetric(resourceType, resourceId, metricId, [
|
|
{ node_id: 'node-1', title: 'Answer Node', type: 'answer' },
|
|
])
|
|
store.addCondition(resourceType, resourceId)
|
|
|
|
store.addBuiltinMetric(resourceType, resourceId, metricId, [
|
|
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
|
|
])
|
|
|
|
const state = useEvaluationStore.getState().resources['apps:app-4']
|
|
const metric = state.metrics.find(item => item.optionId === metricId)
|
|
|
|
expect(metric?.nodeInfoList).toEqual([
|
|
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
|
|
])
|
|
expect(state.metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
|
|
expect(state.judgmentConfig.conditions).toHaveLength(0)
|
|
})
|
|
|
|
it('should build numeric conditions from selected metrics', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-conditions'
|
|
const store = useEvaluationStore.getState()
|
|
const config = getEvaluationMockConfig(resourceType)
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[0].id, [
|
|
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
|
|
])
|
|
store.setConditionLogicalOperator(resourceType, resourceId, 'or')
|
|
store.addCondition(resourceType, resourceId)
|
|
|
|
const state = useEvaluationStore.getState().resources['apps:app-conditions']
|
|
const condition = state.judgmentConfig.conditions[0]
|
|
|
|
expect(state.judgmentConfig.logicalOperator).toBe('or')
|
|
expect(condition.variableSelector).toEqual(['node-answer', 'answer-correctness'])
|
|
expect(condition.comparisonOperator).toBe('=')
|
|
expect(getAllowedOperators(state.metrics, condition.variableSelector)).toEqual(['=', '≠', '>', '<', '≥', '≤', 'is null', 'is not null'])
|
|
})
|
|
|
|
it('should add a condition from the selected custom metric output', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-condition-selector'
|
|
const store = useEvaluationStore.getState()
|
|
const config = getEvaluationMockConfig(resourceType)
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addCustomMetric(resourceType, resourceId)
|
|
|
|
const customMetric = useEvaluationStore.getState().resources['apps:app-condition-selector'].metrics.find(metric => metric.kind === 'custom-workflow')!
|
|
store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, {
|
|
workflowId: config.workflowOptions[0].id,
|
|
workflowAppId: 'custom-workflow-app-id',
|
|
workflowName: config.workflowOptions[0].label,
|
|
})
|
|
store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{
|
|
id: 'reason',
|
|
valueType: 'string',
|
|
}])
|
|
|
|
store.addCondition(resourceType, resourceId, [config.workflowOptions[0].id, 'reason'])
|
|
|
|
const condition = useEvaluationStore.getState().resources['apps:app-condition-selector'].judgmentConfig.conditions[0]
|
|
|
|
expect(condition.variableSelector).toEqual([config.workflowOptions[0].id, 'reason'])
|
|
expect(condition.comparisonOperator).toBe('contains')
|
|
expect(condition.value).toBeNull()
|
|
})
|
|
|
|
it('should clear values for operators without values', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-3'
|
|
const store = useEvaluationStore.getState()
|
|
const config = getEvaluationMockConfig(resourceType)
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.addCustomMetric(resourceType, resourceId)
|
|
|
|
const customMetric = useEvaluationStore.getState().resources['apps:app-3'].metrics.find(metric => metric.kind === 'custom-workflow')!
|
|
store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, {
|
|
workflowId: config.workflowOptions[0].id,
|
|
workflowAppId: 'custom-workflow-app-id',
|
|
workflowName: config.workflowOptions[0].label,
|
|
})
|
|
store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{
|
|
id: 'reason',
|
|
valueType: 'string',
|
|
}])
|
|
store.addCondition(resourceType, resourceId)
|
|
|
|
const condition = useEvaluationStore.getState().resources['apps:app-3'].judgmentConfig.conditions[0]
|
|
|
|
store.updateConditionMetric(resourceType, resourceId, condition.id, [config.workflowOptions[0].id, 'reason'])
|
|
store.updateConditionValue(resourceType, resourceId, condition.id, 'needs follow-up')
|
|
store.updateConditionOperator(resourceType, resourceId, condition.id, 'empty')
|
|
|
|
const updatedCondition = useEvaluationStore.getState().resources['apps:app-3'].judgmentConfig.conditions[0]
|
|
|
|
expect(requiresConditionValue('empty')).toBe(false)
|
|
expect(updatedCondition.value).toBeNull()
|
|
})
|
|
|
|
it('should hydrate resource state from judgment_config', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-5'
|
|
const store = useEvaluationStore.getState()
|
|
const config: EvaluationConfig = {
|
|
evaluation_model: 'gpt-4o-mini',
|
|
evaluation_model_provider: 'openai',
|
|
default_metrics: [{
|
|
metric: 'faithfulness',
|
|
node_info_list: [
|
|
{ node_id: 'node-1', title: 'Retriever', type: 'retriever' },
|
|
],
|
|
}],
|
|
customized_metrics: {
|
|
evaluation_workflow_id: 'workflow-precision-review',
|
|
input_fields: {
|
|
query: 'answer',
|
|
},
|
|
output_fields: [{
|
|
variable: 'reason',
|
|
value_type: 'string',
|
|
}],
|
|
},
|
|
judgment_config: {
|
|
logical_operator: 'or',
|
|
conditions: [{
|
|
variable_selector: ['node-1', 'faithfulness'],
|
|
comparison_operator: '≥',
|
|
value: '0.9',
|
|
}],
|
|
},
|
|
}
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.setBatchTab(resourceType, resourceId, 'history')
|
|
store.setUploadedFileName(resourceType, resourceId, 'batch.csv')
|
|
useEvaluationStore.setState(state => ({
|
|
resources: {
|
|
...state.resources,
|
|
'apps:app-5': {
|
|
...state.resources['apps:app-5'],
|
|
batchRecords: [{
|
|
id: 'batch-1',
|
|
fileName: 'batch.csv',
|
|
status: 'success',
|
|
startedAt: '10:00:00',
|
|
summary: 'App evaluation batch',
|
|
}],
|
|
},
|
|
},
|
|
}))
|
|
store.hydrateResource(resourceType, resourceId, config)
|
|
|
|
const hydratedState = useEvaluationStore.getState().resources['apps:app-5']
|
|
|
|
expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
|
|
expect(hydratedState.metrics).toHaveLength(2)
|
|
expect(hydratedState.metrics[0].optionId).toBe('faithfulness')
|
|
expect(hydratedState.metrics[0].threshold).toBe(0.85)
|
|
expect(hydratedState.metrics[0].nodeInfoList).toEqual([
|
|
{ node_id: 'node-1', title: 'Retriever', type: 'retriever' },
|
|
])
|
|
expect(hydratedState.metrics[1].kind).toBe('custom-workflow')
|
|
expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review')
|
|
expect(hydratedState.metrics[1].customConfig?.mappings[0].inputVariableId).toBe('query')
|
|
expect(hydratedState.metrics[1].customConfig?.mappings[0].outputVariableId).toBe('answer')
|
|
expect(hydratedState.metrics[1].customConfig?.outputs).toEqual([{ id: 'reason', valueType: 'string' }])
|
|
expect(hydratedState.judgmentConfig.logicalOperator).toBe('or')
|
|
expect(hydratedState.judgmentConfig.conditions[0]).toMatchObject({
|
|
variableSelector: ['node-1', 'faithfulness'],
|
|
comparisonOperator: '≥',
|
|
value: '0.9',
|
|
})
|
|
expect(hydratedState.activeBatchTab).toBe('history')
|
|
expect(hydratedState.uploadedFileName).toBe('batch.csv')
|
|
expect(hydratedState.batchRecords).toHaveLength(1)
|
|
})
|
|
|
|
it('should build an evaluation config save payload from resource state', () => {
|
|
const resourceType = 'apps'
|
|
const resourceId = 'app-save-config'
|
|
const store = useEvaluationStore.getState()
|
|
|
|
store.ensureResource(resourceType, resourceId)
|
|
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
|
|
store.addBuiltinMetric(resourceType, resourceId, 'faithfulness', [
|
|
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
|
])
|
|
store.addCustomMetric(resourceType, resourceId)
|
|
|
|
const customMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.kind === 'custom-workflow')!
|
|
store.setCustomMetricWorkflow(resourceType, resourceId, customMetric.id, {
|
|
workflowId: 'workflow-precision-review',
|
|
workflowAppId: 'evaluation-workflow-app-id',
|
|
workflowName: 'Precision Review',
|
|
})
|
|
store.syncCustomMetricMappings(resourceType, resourceId, customMetric.id, ['query'])
|
|
store.syncCustomMetricOutputs(resourceType, resourceId, customMetric.id, [{
|
|
id: 'score',
|
|
valueType: 'number',
|
|
}])
|
|
|
|
const syncedMetric = useEvaluationStore.getState().resources['apps:app-save-config'].metrics.find(metric => metric.id === customMetric.id)!
|
|
store.updateCustomMetricMapping(resourceType, resourceId, customMetric.id, syncedMetric.customConfig!.mappings[0].id, {
|
|
outputVariableId: '{{#node-answer.output#}}',
|
|
})
|
|
store.addCondition(resourceType, resourceId, ['workflow-precision-review', 'score'])
|
|
|
|
const condition = useEvaluationStore.getState().resources['apps:app-save-config'].judgmentConfig.conditions[0]
|
|
store.updateConditionOperator(resourceType, resourceId, condition.id, '≥')
|
|
store.updateConditionValue(resourceType, resourceId, condition.id, '0.8')
|
|
|
|
const resource = useEvaluationStore.getState().resources['apps:app-save-config']
|
|
const expectedPayload = {
|
|
evaluation_model: 'gpt-4o-mini',
|
|
evaluation_model_provider: 'openai',
|
|
default_metrics: [{
|
|
metric: 'faithfulness',
|
|
value_type: 'number',
|
|
node_info_list: [
|
|
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
|
|
],
|
|
}],
|
|
customized_metrics: {
|
|
evaluation_workflow_id: 'evaluation-workflow-app-id',
|
|
input_fields: {
|
|
query: '{{#node-answer.output#}}',
|
|
},
|
|
output_fields: [{
|
|
variable: 'score',
|
|
value_type: 'number',
|
|
}],
|
|
},
|
|
judgment_config: {
|
|
logical_operator: 'and',
|
|
conditions: [{
|
|
variable_selector: ['evaluation-workflow-app-id', 'score'],
|
|
comparison_operator: '≥',
|
|
value: '0.8',
|
|
}],
|
|
},
|
|
}
|
|
|
|
expect(buildEvaluationConfigPayload(resource)).toEqual(expectedPayload)
|
|
expect(buildEvaluationRunRequest(resource, 'file-1')).toEqual({
|
|
...expectedPayload,
|
|
file_id: 'file-1',
|
|
})
|
|
})
|
|
})
|