dify/web/app/components/evaluation/__tests__/index.spec.tsx
2026-03-30 15:39:52 +08:00

212 lines
7.6 KiB
TypeScript

import { act, fireEvent, render, screen } from '@testing-library/react'
import Evaluation from '..'
import { getEvaluationMockConfig } from '../mock'
import { useEvaluationStore } from '../store'
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
useModelList: () => ({
data: [{
provider: 'openai',
models: [{ model: 'gpt-4o-mini' }],
}],
}),
}))
vi.mock('@/app/components/header/account-setting/model-provider-page/model-selector', () => ({
default: ({ defaultModel }: { defaultModel?: { provider: string, model: string } }) => (
<div data-testid="evaluation-model-selector">
{defaultModel ? `${defaultModel.provider}:${defaultModel.model}` : 'empty'}
</div>
),
}))
vi.mock('@/service/use-evaluation', () => ({
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
}))
describe('Evaluation', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
vi.clearAllMocks()
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: ['answer-correctness', 'faithfulness'],
},
isLoading: false,
})
mockUseEvaluationNodeInfoMutation.mockReturnValue({
isPending: false,
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
options?.onSuccess?.({
'answer-correctness': [
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
],
'faithfulness': [
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
],
})
},
})
})
it('should search, select metric nodes, and create a batch history record', async () => {
vi.useFakeTimers()
render(<Evaluation resourceType="workflow" resourceId="app-1" />)
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), {
target: { value: 'does-not-exist' },
})
expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument()
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), {
target: { value: 'faith' },
})
fireEvent.click(screen.getByTestId('evaluation-metric-node-faithfulness-node-faithfulness'))
expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0)
expect(screen.getAllByText('Retriever Node').length).toBeGreaterThan(0)
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), {
target: { value: '' },
})
fireEvent.click(screen.getByTestId('evaluation-metric-node-answer-correctness-node-answer'))
expect(screen.getAllByText('Answer Correctness').length).toBeGreaterThan(0)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
await act(async () => {
vi.advanceTimersByTime(1300)
})
expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument()
expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument()
vi.useRealTimers()
})
it('should render time placeholders and hide the value row for empty operators', () => {
const resourceType = 'workflow'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
const timeField = config.fieldOptions.find(field => field.type === 'time')!
let groupId = ''
let itemId = ''
act(() => {
store.ensureResource(resourceType, resourceId)
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
groupId = group.id
itemId = group.items[0].id
store.updateConditionField(resourceType, resourceId, groupId, itemId, timeField.id)
store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'before')
})
let rerender: ReturnType<typeof render>['rerender']
act(() => {
({ rerender } = render(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
})
expect(screen.getByText('evaluation.conditions.selectTime')).toBeInTheDocument()
act(() => {
store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'is_empty')
rerender(<Evaluation resourceType={resourceType} resourceId={resourceId} />)
})
expect(screen.queryByText('evaluation.conditions.selectTime')).not.toBeInTheDocument()
})
it('should render the metric no-node empty state', () => {
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: ['context-precision'],
},
isLoading: false,
})
mockUseEvaluationNodeInfoMutation.mockReturnValue({
isPending: false,
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
options?.onSuccess?.({
'context-precision': [],
})
},
})
render(<Evaluation resourceType="workflow" resourceId="app-3" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByText('evaluation.metrics.noNodesInWorkflow')).toBeInTheDocument()
})
it('should render the global empty state when no metrics are available', () => {
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: [],
},
isLoading: false,
})
render(<Evaluation resourceType="workflow" resourceId="app-4" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument()
})
it('should show more nodes when a metric has more than three nodes', () => {
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: ['answer-correctness'],
},
isLoading: false,
})
mockUseEvaluationNodeInfoMutation.mockReturnValue({
isPending: false,
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
options?.onSuccess?.({
'answer-correctness': [
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
{ node_id: 'node-3', title: 'LLM 3', type: 'llm' },
{ node_id: 'node-4', title: 'LLM 4', type: 'llm' },
],
})
},
})
render(<Evaluation resourceType="workflow" resourceId="app-5" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByText('LLM 3')).toBeInTheDocument()
expect(screen.queryByText('LLM 4')).not.toBeInTheDocument()
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.showMore' }))
expect(screen.getByText('LLM 4')).toBeInTheDocument()
expect(screen.getByRole('button', { name: 'evaluation.metrics.showLess' })).toBeInTheDocument()
})
})