mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 18:27:19 +08:00
feat(web): dataset evaluation configure fetch
This commit is contained in:
parent
73d95245f8
commit
cfb5b9dfea
@ -5,7 +5,7 @@ const Page = async (props: {
|
||||
}) => {
|
||||
const { appId } = await props.params
|
||||
|
||||
return <Evaluation resourceType="workflow" resourceId={appId} />
|
||||
return <Evaluation resourceType="apps" resourceId={appId} />
|
||||
}
|
||||
|
||||
export default Page
|
||||
|
||||
@ -5,7 +5,7 @@ const Page = async (props: {
|
||||
}) => {
|
||||
const { datasetId } = await props.params
|
||||
|
||||
return <Evaluation resourceType="pipeline" resourceId={datasetId} />
|
||||
return <Evaluation resourceType="datasets" resourceId={datasetId} />
|
||||
}
|
||||
|
||||
export default Page
|
||||
|
||||
@ -4,6 +4,7 @@ import { getEvaluationMockConfig } from '../mock'
|
||||
import { useEvaluationStore } from '../store'
|
||||
|
||||
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
|
||||
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
|
||||
@ -38,6 +39,7 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec
|
||||
}))
|
||||
|
||||
vi.mock('@/service/use-evaluation', () => ({
|
||||
useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
|
||||
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
|
||||
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
|
||||
}))
|
||||
@ -46,6 +48,9 @@ describe('Evaluation', () => {
|
||||
beforeEach(() => {
|
||||
useEvaluationStore.setState({ resources: {} })
|
||||
vi.clearAllMocks()
|
||||
mockUseEvaluationConfig.mockReturnValue({
|
||||
data: null,
|
||||
})
|
||||
|
||||
mockUseAvailableEvaluationMetrics.mockReturnValue({
|
||||
data: {
|
||||
@ -72,7 +77,7 @@ describe('Evaluation', () => {
|
||||
it('should search, select metric nodes, and create a batch history record', async () => {
|
||||
vi.useFakeTimers()
|
||||
|
||||
render(<Evaluation resourceType="workflow" resourceId="app-1" />)
|
||||
render(<Evaluation resourceType="apps" resourceId="app-1" />)
|
||||
|
||||
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
|
||||
|
||||
@ -113,7 +118,7 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should render time placeholders and hide the value row for empty operators', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-2'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
@ -126,7 +131,7 @@ describe('Evaluation', () => {
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
|
||||
|
||||
const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
|
||||
const group = useEvaluationStore.getState().resources['apps:app-2'].conditions[0]
|
||||
groupId = group.id
|
||||
itemId = group.items[0].id
|
||||
|
||||
@ -166,7 +171,7 @@ describe('Evaluation', () => {
|
||||
},
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="workflow" resourceId="app-3" />)
|
||||
render(<Evaluation resourceType="apps" resourceId="app-3" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@ -181,7 +186,7 @@ describe('Evaluation', () => {
|
||||
isLoading: false,
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="workflow" resourceId="app-4" />)
|
||||
render(<Evaluation resourceType="apps" resourceId="app-4" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@ -210,7 +215,7 @@ describe('Evaluation', () => {
|
||||
},
|
||||
})
|
||||
|
||||
render(<Evaluation resourceType="workflow" resourceId="app-5" />)
|
||||
render(<Evaluation resourceType="apps" resourceId="app-5" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
|
||||
@ -224,7 +229,7 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
|
||||
render(<Evaluation resourceType="pipeline" resourceId="dataset-1" />)
|
||||
render(<Evaluation resourceType="datasets" resourceId="dataset-1" />)
|
||||
|
||||
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
|
||||
expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
|
||||
@ -236,7 +241,7 @@ describe('Evaluation', () => {
|
||||
})
|
||||
|
||||
it('should enable pipeline batch actions after selecting a judge model and metric', () => {
|
||||
render(<Evaluation resourceType="pipeline" resourceId="dataset-2" />)
|
||||
render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
|
||||
fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import type { EvaluationConfig } from '@/types/evaluation'
|
||||
import { getEvaluationMockConfig } from '../mock'
|
||||
import {
|
||||
getAllowedOperators,
|
||||
@ -12,7 +13,7 @@ describe('evaluation store', () => {
|
||||
})
|
||||
|
||||
it('should configure a custom metric mapping to a valid state', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-1'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
@ -20,7 +21,7 @@ describe('evaluation store', () => {
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.addCustomMetric(resourceType, resourceId)
|
||||
|
||||
const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
|
||||
const initialMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
|
||||
expect(initialMetric).toBeDefined()
|
||||
expect(isCustomMetricConfigured(initialMetric!)).toBe(false)
|
||||
|
||||
@ -34,14 +35,14 @@ describe('evaluation store', () => {
|
||||
targetVariableId: config.workflowOptions[0].targetVariables[0].id,
|
||||
})
|
||||
|
||||
const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
|
||||
const configuredMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
|
||||
expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
|
||||
expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id')
|
||||
expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label)
|
||||
})
|
||||
|
||||
it('should add and remove builtin metrics', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-2'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
@ -49,16 +50,16 @@ describe('evaluation store', () => {
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)
|
||||
|
||||
const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
|
||||
const addedMetric = useEvaluationStore.getState().resources['apps:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
|
||||
expect(addedMetric).toBeDefined()
|
||||
|
||||
store.removeMetric(resourceType, resourceId, addedMetric!.id)
|
||||
|
||||
expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
|
||||
expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
|
||||
})
|
||||
|
||||
it('should upsert builtin metric node selections', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-4'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
@ -73,38 +74,38 @@ describe('evaluation store', () => {
|
||||
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
|
||||
])
|
||||
|
||||
const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId)
|
||||
const metric = useEvaluationStore.getState().resources['apps:app-4'].metrics.find(item => item.optionId === metricId)
|
||||
|
||||
expect(metric?.nodeInfoList).toEqual([
|
||||
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
|
||||
])
|
||||
expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
|
||||
expect(useEvaluationStore.getState().resources['apps:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
|
||||
})
|
||||
|
||||
it('should update condition groups and adapt operators to field types', () => {
|
||||
const resourceType = 'pipeline'
|
||||
const resourceType = 'datasets'
|
||||
const resourceId = 'dataset-1'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
|
||||
const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
|
||||
const initialGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
|
||||
store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or')
|
||||
store.addConditionGroup(resourceType, resourceId)
|
||||
|
||||
const booleanField = config.fieldOptions.find(field => field.type === 'boolean')!
|
||||
const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0]
|
||||
const currentItem = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0].items[0]
|
||||
store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id)
|
||||
|
||||
const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
|
||||
const updatedGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
|
||||
expect(updatedGroup.logicalOperator).toBe('or')
|
||||
expect(updatedGroup.items[0].operator).toBe('is')
|
||||
expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not'])
|
||||
})
|
||||
|
||||
it('should support time fields and clear values for empty operators', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-3'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
@ -112,15 +113,89 @@ describe('evaluation store', () => {
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
|
||||
const timeField = config.fieldOptions.find(field => field.type === 'time')!
|
||||
const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
|
||||
const item = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]
|
||||
|
||||
store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id)
|
||||
store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty')
|
||||
store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, timeField.id)
|
||||
store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, 'is_empty')
|
||||
|
||||
const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
|
||||
const updatedItem = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]
|
||||
|
||||
expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty'])
|
||||
expect(requiresConditionValue('is_empty')).toBe(false)
|
||||
expect(updatedItem.value).toBeNull()
|
||||
})
|
||||
|
||||
it('should hydrate resource state from evaluation config', () => {
|
||||
const resourceType = 'apps'
|
||||
const resourceId = 'app-5'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config: EvaluationConfig = {
|
||||
evaluation_model: 'gpt-4o-mini',
|
||||
evaluation_model_provider: 'openai',
|
||||
metrics_config: {
|
||||
default_metrics: [{
|
||||
metric: 'faithfulness',
|
||||
node_info_list: [
|
||||
{ node_id: 'node-1', title: 'Retriever', type: 'retriever' },
|
||||
],
|
||||
}],
|
||||
customized_metrics: {
|
||||
evaluation_workflow_id: 'workflow-precision-review',
|
||||
input_fields: {
|
||||
'app.input.query': 'query',
|
||||
},
|
||||
},
|
||||
},
|
||||
judgement_conditions: [{
|
||||
logical_operator: 'or',
|
||||
items: [{
|
||||
field_id: 'system.has_context',
|
||||
operator: 'is',
|
||||
value: true,
|
||||
}],
|
||||
}],
|
||||
}
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.setBatchTab(resourceType, resourceId, 'history')
|
||||
store.setUploadedFileName(resourceType, resourceId, 'batch.csv')
|
||||
useEvaluationStore.setState(state => ({
|
||||
resources: {
|
||||
...state.resources,
|
||||
'apps:app-5': {
|
||||
...state.resources['apps:app-5'],
|
||||
batchRecords: [{
|
||||
id: 'batch-1',
|
||||
fileName: 'batch.csv',
|
||||
status: 'success',
|
||||
startedAt: '10:00:00',
|
||||
summary: 'App evaluation batch',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}))
|
||||
store.hydrateResource(resourceType, resourceId, config)
|
||||
|
||||
const hydratedState = useEvaluationStore.getState().resources['apps:app-5']
|
||||
|
||||
expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
|
||||
expect(hydratedState.metrics).toHaveLength(2)
|
||||
expect(hydratedState.metrics[0].optionId).toBe('faithfulness')
|
||||
expect(hydratedState.metrics[0].nodeInfoList).toEqual([
|
||||
{ node_id: 'node-1', title: 'Retriever', type: 'retriever' },
|
||||
])
|
||||
expect(hydratedState.metrics[1].kind).toBe('custom-workflow')
|
||||
expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review')
|
||||
expect(hydratedState.metrics[1].customConfig?.mappings[0].sourceFieldId).toBe('app.input.query')
|
||||
expect(hydratedState.metrics[1].customConfig?.mappings[0].targetVariableId).toBe('query')
|
||||
expect(hydratedState.conditions[0].logicalOperator).toBe('or')
|
||||
expect(hydratedState.conditions[0].items[0]).toMatchObject({
|
||||
fieldId: 'system.has_context',
|
||||
operator: 'is',
|
||||
value: true,
|
||||
})
|
||||
expect(hydratedState.activeBatchTab).toBe('history')
|
||||
expect(hydratedState.uploadedFileName).toBe('batch.csv')
|
||||
expect(hydratedState.batchRecords).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps } from '../types'
|
||||
import type { EvaluationResourceProps } from '../../types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import BatchTestPanel from './batch-test-panel'
|
||||
import ConditionsSection from './conditions-section'
|
||||
import JudgeModelSelector from './judge-model-selector'
|
||||
import MetricSection from './metric-section'
|
||||
import SectionHeader, { InlineSectionHeader } from './section-header'
|
||||
import BatchTestPanel from '../batch-test-panel'
|
||||
import ConditionsSection from '../conditions-section'
|
||||
import JudgeModelSelector from '../judge-model-selector'
|
||||
import MetricSection from '../metric-section'
|
||||
import SectionHeader, { InlineSectionHeader } from '../section-header'
|
||||
|
||||
const NonPipelineEvaluation = ({
|
||||
resourceType,
|
||||
@ -1,6 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps, MetricOption } from '../types'
|
||||
import type { EvaluationResourceProps, MetricOption } from '../../types'
|
||||
import { useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Badge from '@/app/components/base/badge'
|
||||
@ -11,10 +11,10 @@ import { toast } from '@/app/components/base/ui/toast'
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import { getEvaluationMockConfig } from '../mock'
|
||||
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../store'
|
||||
import JudgeModelSelector from './judge-model-selector'
|
||||
import SectionHeader, { InlineSectionHeader } from './section-header'
|
||||
import { getEvaluationMockConfig } from '../../mock'
|
||||
import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
|
||||
import JudgeModelSelector from '../judge-model-selector'
|
||||
import SectionHeader, { InlineSectionHeader } from '../section-header'
|
||||
|
||||
type PipelineMetricItemProps = {
|
||||
metric: MetricOption
|
||||
@ -1,5 +1,5 @@
|
||||
import { act, fireEvent, render, screen } from '@testing-library/react'
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
|
||||
import { act, fireEvent, render, screen } from '@testing-library/react'
|
||||
import MetricSection from '..'
|
||||
import { useEvaluationStore } from '../../../store'
|
||||
|
||||
@ -13,7 +13,7 @@ vi.mock('@/service/use-evaluation', () => ({
|
||||
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
|
||||
}))
|
||||
|
||||
const resourceType = 'workflow' as const
|
||||
const resourceType = 'apps' as const
|
||||
const resourceId = 'metric-section-resource'
|
||||
|
||||
const renderMetricSection = () => {
|
||||
|
||||
@ -21,7 +21,7 @@ const MetricSection = ({
|
||||
const [nodeInfoMap, setNodeInfoMap] = useState<Record<string, NodeInfo[]>>({})
|
||||
const hasMetrics = resource.metrics.length > 0
|
||||
const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin')
|
||||
const shouldLoadNodeInfo = resourceType !== 'pipeline' && !!resourceId && hasBuiltinMetrics
|
||||
const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics
|
||||
const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo)
|
||||
const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation()
|
||||
const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])
|
||||
|
||||
@ -13,7 +13,7 @@ import {
|
||||
type UseMetricSelectorDataOptions = {
|
||||
open: boolean
|
||||
query: string
|
||||
resourceType: 'workflow' | 'pipeline' | 'snippet'
|
||||
resourceType: 'apps' | 'datasets' | 'snippets'
|
||||
resourceId: string
|
||||
nodeInfoMap: Record<string, NodeInfo[]>
|
||||
setNodeInfoMap: (value: Record<string, NodeInfo[]>) => void
|
||||
@ -63,7 +63,7 @@ export const useMetricSelectorData = ({
|
||||
if (!open)
|
||||
return
|
||||
|
||||
if (resourceType === 'pipeline' || !resourceId || availableMetricIds.length === 0)
|
||||
if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0)
|
||||
return
|
||||
|
||||
let isActive = true
|
||||
@ -107,7 +107,7 @@ export const useMetricSelectorData = ({
|
||||
|| metric.label.toLowerCase().includes(keyword)
|
||||
|| metric.description.toLowerCase().includes(keyword)
|
||||
const metricNodes = nodeInfoMap[metric.id] ?? []
|
||||
const supportsNodeSelection = resourceType !== 'pipeline'
|
||||
const supportsNodeSelection = resourceType !== 'datasets'
|
||||
const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0
|
||||
|
||||
if (hasNoNodeInfo) {
|
||||
|
||||
@ -2,8 +2,8 @@ import type { MetricOption } from '../../types'
|
||||
import type { MetricVisualTone } from './types'
|
||||
import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation'
|
||||
|
||||
export const toEvaluationTargetType = (resourceType: 'workflow' | 'snippet'): EvaluationTargetType => {
|
||||
return resourceType === 'snippet' ? 'snippets' : 'app'
|
||||
export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => {
|
||||
return resourceType === 'snippets' ? 'snippets' : 'app'
|
||||
}
|
||||
|
||||
const humanizeMetricId = (metricId: string) => {
|
||||
|
||||
@ -2,21 +2,31 @@
|
||||
|
||||
import type { EvaluationResourceProps } from './types'
|
||||
import { useEffect } from 'react'
|
||||
import NonPipelineEvaluation from './components/non-pipeline-evaluation'
|
||||
import PipelineEvaluation from './components/pipeline-evaluation'
|
||||
import { useEvaluationConfig } from '@/service/use-evaluation'
|
||||
import NonPipelineEvaluation from './components/layout/non-pipeline-evaluation'
|
||||
import PipelineEvaluation from './components/layout/pipeline-evaluation'
|
||||
import { useEvaluationStore } from './store'
|
||||
|
||||
const Evaluation = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { data: config } = useEvaluationConfig(resourceType, resourceId)
|
||||
const ensureResource = useEvaluationStore(state => state.ensureResource)
|
||||
const hydrateResource = useEvaluationStore(state => state.hydrateResource)
|
||||
|
||||
useEffect(() => {
|
||||
ensureResource(resourceType, resourceId)
|
||||
}, [ensureResource, resourceId, resourceType])
|
||||
|
||||
if (resourceType === 'pipeline') {
|
||||
useEffect(() => {
|
||||
if (!config)
|
||||
return
|
||||
|
||||
hydrateResource(resourceType, resourceId, config)
|
||||
}, [config, hydrateResource, resourceId, resourceType])
|
||||
|
||||
if (resourceType === 'datasets') {
|
||||
return (
|
||||
<PipelineEvaluation
|
||||
resourceType={resourceType}
|
||||
|
||||
@ -160,7 +160,7 @@ export const getDefaultOperator = (fieldType: EvaluationFieldOption['type']): Co
|
||||
}
|
||||
|
||||
export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): EvaluationMockConfig => {
|
||||
if (resourceType === 'pipeline') {
|
||||
if (resourceType === 'datasets') {
|
||||
return {
|
||||
judgeModels,
|
||||
builtinMetrics: pipelineBuiltinMetrics,
|
||||
@ -176,7 +176,7 @@ export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): E
|
||||
}
|
||||
}
|
||||
|
||||
if (resourceType === 'snippet') {
|
||||
if (resourceType === 'snippets') {
|
||||
return {
|
||||
judgeModels,
|
||||
builtinMetrics,
|
||||
|
||||
@ -9,24 +9,195 @@ import type {
|
||||
JudgmentConditionGroup,
|
||||
MetricOption,
|
||||
} from './types'
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
import type {
|
||||
EvaluationConditionValue,
|
||||
EvaluationConfig,
|
||||
EvaluationCustomizedMetric,
|
||||
EvaluationDefaultMetric,
|
||||
EvaluationJudgementConditionGroup,
|
||||
EvaluationJudgementConditionItem,
|
||||
EvaluationMetricsConfig,
|
||||
NodeInfo,
|
||||
} from '@/types/evaluation'
|
||||
import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
|
||||
import { encodeModelSelection } from './utils'
|
||||
|
||||
type EvaluationStoreResources = Record<string, EvaluationResourceState>
|
||||
|
||||
const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
|
||||
|
||||
const humanizeMetricId = (metricId: string) => {
|
||||
return metricId
|
||||
.split(/[-_]/g)
|
||||
.filter(Boolean)
|
||||
.map(part => part.charAt(0).toUpperCase() + part.slice(1))
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
const resolveMetricOption = (resourceType: EvaluationResourceType, metricId: string): MetricOption => {
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
return config.builtinMetrics.find(metric => metric.id === metricId) ?? {
|
||||
id: metricId,
|
||||
label: humanizeMetricId(metricId),
|
||||
description: '',
|
||||
group: config.builtinMetrics[0]?.group ?? 'other',
|
||||
badges: ['Built-in'],
|
||||
}
|
||||
}
|
||||
|
||||
const normalizeNodeInfoList = (value: NodeInfo[] | undefined): NodeInfo[] => {
|
||||
if (!value?.length)
|
||||
return []
|
||||
|
||||
return value
|
||||
.map((item) => {
|
||||
const nodeId = typeof item.node_id === 'string' ? item.node_id : ''
|
||||
const title = typeof item.title === 'string' ? item.title : nodeId
|
||||
const type = typeof item.type === 'string' ? item.type : ''
|
||||
|
||||
if (!nodeId)
|
||||
return null
|
||||
|
||||
return {
|
||||
node_id: nodeId,
|
||||
title,
|
||||
type,
|
||||
}
|
||||
})
|
||||
.filter((item): item is NodeInfo => !!item)
|
||||
}
|
||||
|
||||
const normalizeDefaultMetrics = (
|
||||
resourceType: EvaluationResourceType,
|
||||
value: EvaluationDefaultMetric[] | undefined,
|
||||
): EvaluationMetric[] => {
|
||||
if (!value?.length)
|
||||
return []
|
||||
|
||||
return value
|
||||
.map((item) => {
|
||||
const metricId = typeof item.metric === 'string' ? item.metric : ''
|
||||
if (!metricId)
|
||||
return null
|
||||
|
||||
const metricOption = resolveMetricOption(resourceType, metricId)
|
||||
return createBuiltinMetric(metricOption, normalizeNodeInfoList(item.node_info_list ?? []))
|
||||
})
|
||||
.filter((item): item is EvaluationMetric => !!item)
|
||||
}
|
||||
|
||||
const normalizeCustomMetricMappings = (
|
||||
value: EvaluationCustomizedMetric['input_fields'],
|
||||
): CustomMetricMapping[] => {
|
||||
if (!value)
|
||||
return [createCustomMetricMapping()]
|
||||
|
||||
const mappings = Object.entries(value)
|
||||
.filter((entry): entry is [string, string] => {
|
||||
const [, targetVariableId] = entry
|
||||
return typeof targetVariableId === 'string' && !!targetVariableId
|
||||
})
|
||||
.map(([sourceFieldId, targetVariableId]) => ({
|
||||
id: createId('mapping'),
|
||||
sourceFieldId,
|
||||
targetVariableId,
|
||||
}))
|
||||
|
||||
return mappings.length > 0 ? mappings : [createCustomMetricMapping()]
|
||||
}
|
||||
|
||||
const normalizeCustomMetric = (
|
||||
value: EvaluationCustomizedMetric | null | undefined,
|
||||
): EvaluationMetric[] => {
|
||||
if (!value)
|
||||
return []
|
||||
|
||||
const workflowId = typeof value.evaluation_workflow_id === 'string' ? value.evaluation_workflow_id : null
|
||||
if (!workflowId)
|
||||
return []
|
||||
|
||||
const customMetric = createCustomMetric()
|
||||
|
||||
return [{
|
||||
...customMetric,
|
||||
customConfig: customMetric.customConfig
|
||||
? {
|
||||
...customMetric.customConfig,
|
||||
workflowId,
|
||||
mappings: normalizeCustomMetricMappings(value.input_fields),
|
||||
}
|
||||
: customMetric.customConfig,
|
||||
}]
|
||||
}
|
||||
|
||||
const normalizeConditionItem = (
|
||||
resourceType: EvaluationResourceType,
|
||||
value: EvaluationJudgementConditionItem,
|
||||
): JudgmentConditionGroup['items'][number] => {
|
||||
const fieldId = typeof value.fieldId === 'string'
|
||||
? value.fieldId
|
||||
: typeof value.field_id === 'string'
|
||||
? value.field_id
|
||||
: null
|
||||
const operatorValue = typeof value.operator === 'string' ? value.operator : null
|
||||
const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
|
||||
const allowedOperators = field ? getComparisonOperators(field.type) : ['contains']
|
||||
const operator = operatorValue && allowedOperators.includes(operatorValue as ComparisonOperator)
|
||||
? operatorValue as ComparisonOperator
|
||||
: field
|
||||
? getDefaultOperator(field.type)
|
||||
: 'contains'
|
||||
const rawValue: EvaluationConditionValue = value.value ?? null
|
||||
|
||||
return {
|
||||
id: typeof value.id === 'string' ? value.id : createId('condition'),
|
||||
fieldId,
|
||||
operator,
|
||||
value: getConditionValue(field, operator, rawValue),
|
||||
}
|
||||
}
|
||||
|
||||
const normalizeConditionGroups = (
|
||||
resourceType: EvaluationResourceType,
|
||||
value: EvaluationConfig['judgement_conditions'],
|
||||
): JudgmentConditionGroup[] => {
|
||||
const groupsValue: EvaluationJudgementConditionGroup[] = Array.isArray(value)
|
||||
? value
|
||||
: Array.isArray(value?.groups)
|
||||
? value.groups
|
||||
: []
|
||||
|
||||
const groups = groupsValue
|
||||
.map((group) => {
|
||||
const itemsValue = Array.isArray(group.items) ? group.items : []
|
||||
const items = itemsValue
|
||||
.map(item => normalizeConditionItem(resourceType, item))
|
||||
|
||||
if (items.length === 0)
|
||||
return null
|
||||
|
||||
return {
|
||||
id: typeof group.id === 'string' ? group.id : createId('group'),
|
||||
logicalOperator: group.logicalOperator === 'or' || group.logical_operator === 'or' ? 'or' : 'and',
|
||||
items,
|
||||
} satisfies JudgmentConditionGroup
|
||||
})
|
||||
.filter((group): group is JudgmentConditionGroup => !!group)
|
||||
|
||||
return groups.length > 0 ? groups : [createConditionGroup(resourceType)]
|
||||
}
|
||||
|
||||
export const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}`
|
||||
|
||||
const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty']
|
||||
|
||||
export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator)
|
||||
|
||||
export const getConditionValue = (
|
||||
export function getConditionValue(
|
||||
field: EvaluationFieldOption | undefined,
|
||||
operator: ComparisonOperator,
|
||||
previousValue: string | number | boolean | null = null,
|
||||
) => {
|
||||
) {
|
||||
if (!field || !requiresConditionValue(operator))
|
||||
return null
|
||||
|
||||
@ -42,36 +213,42 @@ export const getConditionValue = (
|
||||
return typeof previousValue === 'string' ? previousValue : null
|
||||
}
|
||||
|
||||
export const createBuiltinMetric = (metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric => ({
|
||||
id: createId('metric'),
|
||||
optionId: metric.id,
|
||||
kind: 'builtin',
|
||||
label: metric.label,
|
||||
description: metric.description,
|
||||
badges: metric.badges,
|
||||
nodeInfoList,
|
||||
})
|
||||
export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric {
|
||||
return {
|
||||
id: createId('metric'),
|
||||
optionId: metric.id,
|
||||
kind: 'builtin',
|
||||
label: metric.label,
|
||||
description: metric.description,
|
||||
badges: metric.badges,
|
||||
nodeInfoList,
|
||||
}
|
||||
}
|
||||
|
||||
export const createCustomMetricMapping = (): CustomMetricMapping => ({
|
||||
id: createId('mapping'),
|
||||
sourceFieldId: null,
|
||||
targetVariableId: null,
|
||||
})
|
||||
export function createCustomMetricMapping(): CustomMetricMapping {
|
||||
return {
|
||||
id: createId('mapping'),
|
||||
sourceFieldId: null,
|
||||
targetVariableId: null,
|
||||
}
|
||||
}
|
||||
|
||||
export const createCustomMetric = (): EvaluationMetric => ({
|
||||
id: createId('metric'),
|
||||
optionId: createId('custom'),
|
||||
kind: 'custom-workflow',
|
||||
label: 'Custom Evaluator',
|
||||
description: 'Map workflow variables to your evaluation inputs.',
|
||||
badges: ['Workflow'],
|
||||
customConfig: {
|
||||
workflowId: null,
|
||||
workflowAppId: null,
|
||||
workflowName: null,
|
||||
mappings: [createCustomMetricMapping()],
|
||||
},
|
||||
})
|
||||
export function createCustomMetric(): EvaluationMetric {
|
||||
return {
|
||||
id: createId('metric'),
|
||||
optionId: createId('custom'),
|
||||
kind: 'custom-workflow',
|
||||
label: 'Custom Evaluator',
|
||||
description: 'Map workflow variables to your evaluation inputs.',
|
||||
badges: ['Workflow'],
|
||||
customConfig: {
|
||||
workflowId: null,
|
||||
workflowAppId: null,
|
||||
workflowName: null,
|
||||
mappings: [createCustomMetricMapping()],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
export const buildConditionItem = (resourceType: EvaluationResourceType) => {
|
||||
const field = getEvaluationMockConfig(resourceType).fieldOptions[0]
|
||||
@ -85,11 +262,13 @@ export const buildConditionItem = (resourceType: EvaluationResourceType) => {
|
||||
}
|
||||
}
|
||||
|
||||
export const createConditionGroup = (resourceType: EvaluationResourceType): JudgmentConditionGroup => ({
|
||||
id: createId('group'),
|
||||
logicalOperator: 'and',
|
||||
items: [buildConditionItem(resourceType)],
|
||||
})
|
||||
export function createConditionGroup(resourceType: EvaluationResourceType): JudgmentConditionGroup {
|
||||
return {
|
||||
id: createId('group'),
|
||||
logicalOperator: 'and',
|
||||
items: [buildConditionItem(resourceType)],
|
||||
}
|
||||
}
|
||||
|
||||
export const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
|
||||
return {
|
||||
@ -102,6 +281,24 @@ export const buildInitialState = (resourceType: EvaluationResourceType): Evaluat
|
||||
}
|
||||
}
|
||||
|
||||
export const buildStateFromEvaluationConfig = (
|
||||
resourceType: EvaluationResourceType,
|
||||
config: EvaluationConfig,
|
||||
): EvaluationResourceState => {
|
||||
const metricsConfig: EvaluationMetricsConfig = config.metrics_config ?? {}
|
||||
const defaultMetrics = normalizeDefaultMetrics(resourceType, metricsConfig.default_metrics)
|
||||
const customMetrics = normalizeCustomMetric(metricsConfig.customized_metrics)
|
||||
|
||||
return {
|
||||
...buildInitialState(resourceType),
|
||||
judgeModelId: config.evaluation_model && config.evaluation_model_provider
|
||||
? encodeModelSelection(config.evaluation_model_provider, config.evaluation_model)
|
||||
: null,
|
||||
metrics: [...defaultMetrics, ...customMetrics],
|
||||
conditions: normalizeConditionGroups(resourceType, config.judgement_conditions),
|
||||
}
|
||||
}
|
||||
|
||||
const getResourceState = (
|
||||
resources: EvaluationStoreResources,
|
||||
resourceType: EvaluationResourceType,
|
||||
|
||||
@ -3,13 +3,14 @@ import type {
|
||||
EvaluationResourceState,
|
||||
EvaluationResourceType,
|
||||
} from './types'
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
import type { EvaluationConfig, NodeInfo } from '@/types/evaluation'
|
||||
import { create } from 'zustand'
|
||||
import { getDefaultOperator, getEvaluationMockConfig } from './mock'
|
||||
import {
|
||||
buildConditionItem,
|
||||
buildInitialState,
|
||||
buildResourceKey,
|
||||
buildStateFromEvaluationConfig,
|
||||
createBatchTestRecord,
|
||||
createBuiltinMetric,
|
||||
createConditionGroup,
|
||||
@ -28,6 +29,7 @@ import {
|
||||
type EvaluationStore = {
|
||||
resources: Record<string, EvaluationResourceState>
|
||||
ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
hydrateResource: (resourceType: EvaluationResourceType, resourceId: string, config: EvaluationConfig) => void
|
||||
setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
|
||||
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void
|
||||
addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
@ -82,6 +84,19 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
|
||||
},
|
||||
}))
|
||||
},
|
||||
hydrateResource: (resourceType, resourceId, config) => {
|
||||
set(state => ({
|
||||
resources: {
|
||||
...state.resources,
|
||||
[buildResourceKey(resourceType, resourceId)]: {
|
||||
...buildStateFromEvaluationConfig(resourceType, config),
|
||||
activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields',
|
||||
uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null,
|
||||
batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [],
|
||||
},
|
||||
},
|
||||
}))
|
||||
},
|
||||
setJudgeModel: (resourceType, resourceId, judgeModelId) => {
|
||||
set(state => ({
|
||||
resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import type { NodeInfo } from '@/types/evaluation'
|
||||
|
||||
export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
|
||||
export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets'
|
||||
|
||||
export type EvaluationResourceProps = {
|
||||
resourceType: EvaluationResourceType
|
||||
|
||||
@ -22,7 +22,7 @@ const SnippetEvaluationPage = ({ snippetId }: SnippetEvaluationPageProps) => {
|
||||
snippet={snippet}
|
||||
section="evaluation"
|
||||
>
|
||||
<Evaluation resourceType="snippet" resourceId={snippetId} />
|
||||
<Evaluation resourceType="snippets" resourceId={snippetId} />
|
||||
</SnippetLayout>
|
||||
)
|
||||
}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import type { AvailableEvaluationWorkflowsResponse } from '@/types/evaluation'
|
||||
import type { EvaluationResourceType } from '@/app/components/evaluation/types'
|
||||
import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation'
|
||||
import {
|
||||
keepPreviousData,
|
||||
useInfiniteQuery,
|
||||
@ -28,6 +29,45 @@ const normalizeAvailableEvaluationWorkflowsParams = (params: AvailableEvaluation
|
||||
}
|
||||
}
|
||||
|
||||
const toEvaluationTargetType = (resourceType: Exclude<EvaluationResourceType, 'datasets'>) => {
|
||||
return resourceType === 'snippets' ? 'snippets' : 'app'
|
||||
}
|
||||
|
||||
const getEvaluationConfigQueryOptions = (
|
||||
resourceType: EvaluationResourceType,
|
||||
resourceId: string,
|
||||
) => {
|
||||
if (resourceType === 'datasets') {
|
||||
return consoleQuery.datasetEvaluation.config.queryOptions({
|
||||
input: {
|
||||
params: {
|
||||
datasetId: resourceId,
|
||||
},
|
||||
},
|
||||
enabled: !!resourceId,
|
||||
refetchOnWindowFocus: false,
|
||||
})
|
||||
}
|
||||
|
||||
return consoleQuery.evaluation.config.queryOptions({
|
||||
input: {
|
||||
params: {
|
||||
targetType: toEvaluationTargetType(resourceType),
|
||||
targetId: resourceId,
|
||||
},
|
||||
},
|
||||
enabled: !!resourceId,
|
||||
refetchOnWindowFocus: false,
|
||||
})
|
||||
}
|
||||
|
||||
export const useEvaluationConfig = (
|
||||
resourceType: EvaluationResourceType,
|
||||
resourceId: string,
|
||||
) => {
|
||||
return useQuery<EvaluationConfig>(getEvaluationConfigQueryOptions(resourceType, resourceId))
|
||||
}
|
||||
|
||||
export const useAvailableEvaluationMetrics = (enabled = true) => {
|
||||
return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({
|
||||
enabled,
|
||||
|
||||
@ -1,10 +1,38 @@
|
||||
export type EvaluationTargetType = 'app' | 'snippets'
|
||||
|
||||
export type EvaluationMetricsConfig = {
|
||||
default_metrics?: EvaluationDefaultMetric[]
|
||||
customized_metrics?: EvaluationCustomizedMetric | null
|
||||
}
|
||||
|
||||
export type EvaluationConditionValue = string | number | boolean | null
|
||||
|
||||
export type EvaluationJudgementConditionItem = {
|
||||
id?: string
|
||||
fieldId?: string
|
||||
field_id?: string
|
||||
operator?: string
|
||||
value?: EvaluationConditionValue
|
||||
}
|
||||
|
||||
export type EvaluationJudgementConditionGroup = {
|
||||
id?: string
|
||||
logicalOperator?: 'and' | 'or'
|
||||
logical_operator?: 'and' | 'or'
|
||||
items?: EvaluationJudgementConditionItem[]
|
||||
}
|
||||
|
||||
export type EvaluationJudgementConditions
|
||||
= | EvaluationJudgementConditionGroup[]
|
||||
| {
|
||||
groups?: EvaluationJudgementConditionGroup[]
|
||||
}
|
||||
|
||||
export type EvaluationConfig = {
|
||||
evaluation_model: string | null
|
||||
evaluation_model_provider: string | null
|
||||
metrics_config: Record<string, unknown> | null
|
||||
judgement_conditions: Record<string, unknown> | null
|
||||
metrics_config: EvaluationMetricsConfig | null
|
||||
judgement_conditions: EvaluationJudgementConditions | null
|
||||
}
|
||||
|
||||
export type NodeInfo = {
|
||||
@ -20,8 +48,8 @@ export type EvaluationDefaultMetric = {
|
||||
|
||||
export type EvaluationCustomizedMetric = {
|
||||
evaluation_workflow_id?: string
|
||||
input_fields?: Record<string, unknown>
|
||||
output_fields?: Record<string, unknown>[]
|
||||
input_fields?: Record<string, string | null | undefined>
|
||||
output_fields?: Array<Record<string, string | null | undefined>>
|
||||
}
|
||||
|
||||
export type EvaluationConfigData = {
|
||||
@ -29,7 +57,7 @@ export type EvaluationConfigData = {
|
||||
evaluation_model_provider?: string
|
||||
default_metrics?: EvaluationDefaultMetric[]
|
||||
customized_metrics?: EvaluationCustomizedMetric | null
|
||||
judgment_config?: Record<string, unknown> | null
|
||||
judgment_config?: EvaluationJudgementConditions | null
|
||||
}
|
||||
|
||||
export type EvaluationRunRequest = EvaluationConfigData & {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user