mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 18:27:19 +08:00
feat: evaluation
This commit is contained in:
parent
7a722773c7
commit
887c7710e9
@ -0,0 +1,11 @@
|
||||
import Evaluation from '@/app/components/evaluation'
|
||||
|
||||
const Page = async (props: {
|
||||
params: Promise<{ appId: string }>
|
||||
}) => {
|
||||
const { appId } = await props.params
|
||||
|
||||
return <Evaluation resourceType="workflow" resourceId={appId} />
|
||||
}
|
||||
|
||||
export default Page
|
||||
@ -7,6 +7,8 @@ import {
|
||||
RiDashboard2Line,
|
||||
RiFileList3Fill,
|
||||
RiFileList3Line,
|
||||
RiFlaskFill,
|
||||
RiFlaskLine,
|
||||
RiTerminalBoxFill,
|
||||
RiTerminalBoxLine,
|
||||
RiTerminalWindowFill,
|
||||
@ -67,40 +69,47 @@ const AppDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
|
||||
}>>([])
|
||||
|
||||
const getNavigationConfig = useCallback((appId: string, isCurrentWorkspaceEditor: boolean, mode: AppModeEnum) => {
|
||||
const navConfig = [
|
||||
...(isCurrentWorkspaceEditor
|
||||
? [{
|
||||
name: t('appMenus.promptEng', { ns: 'common' }),
|
||||
href: `/app/${appId}/${(mode === AppModeEnum.WORKFLOW || mode === AppModeEnum.ADVANCED_CHAT) ? 'workflow' : 'configuration'}`,
|
||||
icon: RiTerminalWindowLine,
|
||||
selectedIcon: RiTerminalWindowFill,
|
||||
}]
|
||||
: []
|
||||
),
|
||||
{
|
||||
name: t('appMenus.apiAccess', { ns: 'common' }),
|
||||
href: `/app/${appId}/develop`,
|
||||
icon: RiTerminalBoxLine,
|
||||
selectedIcon: RiTerminalBoxFill,
|
||||
},
|
||||
...(isCurrentWorkspaceEditor
|
||||
? [{
|
||||
name: mode !== AppModeEnum.WORKFLOW
|
||||
? t('appMenus.logAndAnn', { ns: 'common' })
|
||||
: t('appMenus.logs', { ns: 'common' }),
|
||||
href: `/app/${appId}/logs`,
|
||||
icon: RiFileList3Line,
|
||||
selectedIcon: RiFileList3Fill,
|
||||
}]
|
||||
: []
|
||||
),
|
||||
{
|
||||
name: t('appMenus.overview', { ns: 'common' }),
|
||||
href: `/app/${appId}/overview`,
|
||||
icon: RiDashboard2Line,
|
||||
selectedIcon: RiDashboard2Fill,
|
||||
},
|
||||
]
|
||||
const navConfig = []
|
||||
|
||||
if (isCurrentWorkspaceEditor) {
|
||||
navConfig.push({
|
||||
name: t('appMenus.promptEng', { ns: 'common' }),
|
||||
href: `/app/${appId}/${(mode === AppModeEnum.WORKFLOW || mode === AppModeEnum.ADVANCED_CHAT) ? 'workflow' : 'configuration'}`,
|
||||
icon: RiTerminalWindowLine,
|
||||
selectedIcon: RiTerminalWindowFill,
|
||||
})
|
||||
navConfig.push({
|
||||
name: t('appMenus.evaluation', { ns: 'common' }),
|
||||
href: `/app/${appId}/evaluation`,
|
||||
icon: RiFlaskLine,
|
||||
selectedIcon: RiFlaskFill,
|
||||
})
|
||||
}
|
||||
|
||||
navConfig.push({
|
||||
name: t('appMenus.apiAccess', { ns: 'common' }),
|
||||
href: `/app/${appId}/develop`,
|
||||
icon: RiTerminalBoxLine,
|
||||
selectedIcon: RiTerminalBoxFill,
|
||||
})
|
||||
|
||||
if (isCurrentWorkspaceEditor) {
|
||||
navConfig.push({
|
||||
name: mode !== AppModeEnum.WORKFLOW
|
||||
? t('appMenus.logAndAnn', { ns: 'common' })
|
||||
: t('appMenus.logs', { ns: 'common' }),
|
||||
href: `/app/${appId}/logs`,
|
||||
icon: RiFileList3Line,
|
||||
selectedIcon: RiFileList3Fill,
|
||||
})
|
||||
}
|
||||
|
||||
navConfig.push({
|
||||
name: t('appMenus.overview', { ns: 'common' }),
|
||||
href: `/app/${appId}/overview`,
|
||||
icon: RiDashboard2Line,
|
||||
selectedIcon: RiDashboard2Fill,
|
||||
})
|
||||
return navConfig
|
||||
}, [t])
|
||||
|
||||
|
||||
@ -0,0 +1,11 @@
|
||||
import Evaluation from '@/app/components/evaluation'
|
||||
|
||||
const Page = async (props: {
|
||||
params: Promise<{ datasetId: string }>
|
||||
}) => {
|
||||
const { datasetId } = await props.params
|
||||
|
||||
return <Evaluation resourceType="pipeline" resourceId={datasetId} />
|
||||
}
|
||||
|
||||
export default Page
|
||||
@ -6,6 +6,8 @@ import {
|
||||
RiEqualizer2Line,
|
||||
RiFileTextFill,
|
||||
RiFileTextLine,
|
||||
RiFlaskFill,
|
||||
RiFlaskLine,
|
||||
RiFocus2Fill,
|
||||
RiFocus2Line,
|
||||
} from '@remixicon/react'
|
||||
@ -86,20 +88,30 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
|
||||
]
|
||||
|
||||
if (datasetRes?.provider !== 'external') {
|
||||
baseNavigation.unshift({
|
||||
name: t('datasetMenus.pipeline', { ns: 'common' }),
|
||||
href: `/datasets/${datasetId}/pipeline`,
|
||||
icon: PipelineLine as RemixiconComponentType,
|
||||
selectedIcon: PipelineFill as RemixiconComponentType,
|
||||
disabled: false,
|
||||
})
|
||||
baseNavigation.unshift({
|
||||
name: t('datasetMenus.documents', { ns: 'common' }),
|
||||
href: `/datasets/${datasetId}/documents`,
|
||||
icon: RiFileTextLine,
|
||||
selectedIcon: RiFileTextFill,
|
||||
disabled: isButtonDisabledWithPipeline,
|
||||
})
|
||||
return [
|
||||
{
|
||||
name: t('datasetMenus.documents', { ns: 'common' }),
|
||||
href: `/datasets/${datasetId}/documents`,
|
||||
icon: RiFileTextLine,
|
||||
selectedIcon: RiFileTextFill,
|
||||
disabled: isButtonDisabledWithPipeline,
|
||||
},
|
||||
{
|
||||
name: t('datasetMenus.pipeline', { ns: 'common' }),
|
||||
href: `/datasets/${datasetId}/pipeline`,
|
||||
icon: PipelineLine as RemixiconComponentType,
|
||||
selectedIcon: PipelineFill as RemixiconComponentType,
|
||||
disabled: false,
|
||||
},
|
||||
{
|
||||
name: t('datasetMenus.evaluation', { ns: 'common' }),
|
||||
href: `/datasets/${datasetId}/evaluation`,
|
||||
icon: RiFlaskLine,
|
||||
selectedIcon: RiFlaskFill,
|
||||
disabled: false,
|
||||
},
|
||||
...baseNavigation,
|
||||
]
|
||||
}
|
||||
|
||||
return baseNavigation
|
||||
|
||||
@ -0,0 +1,11 @@
|
||||
import SnippetPage from '@/app/components/snippets'
|
||||
|
||||
const Page = async (props: {
|
||||
params: Promise<{ snippetId: string }>
|
||||
}) => {
|
||||
const { snippetId } = await props.params
|
||||
|
||||
return <SnippetPage snippetId={snippetId} section="evaluation" />
|
||||
}
|
||||
|
||||
export default Page
|
||||
@ -0,0 +1,11 @@
|
||||
import SnippetPage from '@/app/components/snippets'
|
||||
|
||||
const Page = async (props: {
|
||||
params: Promise<{ snippetId: string }>
|
||||
}) => {
|
||||
const { snippetId } = await props.params
|
||||
|
||||
return <SnippetPage snippetId={snippetId} section="orchestrate" />
|
||||
}
|
||||
|
||||
export default Page
|
||||
21
web/app/(commonLayout)/snippets/[snippetId]/page.spec.ts
Normal file
21
web/app/(commonLayout)/snippets/[snippetId]/page.spec.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import Page from './page'
|
||||
|
||||
const mockRedirect = vi.fn()
|
||||
|
||||
vi.mock('next/navigation', () => ({
|
||||
redirect: (path: string) => mockRedirect(path),
|
||||
}))
|
||||
|
||||
describe('snippet detail redirect page', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('should redirect legacy snippet detail routes to orchestrate', async () => {
|
||||
await Page({
|
||||
params: Promise.resolve({ snippetId: 'snippet-1' }),
|
||||
})
|
||||
|
||||
expect(mockRedirect).toHaveBeenCalledWith('/snippets/snippet-1/orchestrate')
|
||||
})
|
||||
})
|
||||
@ -1,11 +1,11 @@
|
||||
import SnippetPage from '@/app/components/snippets'
|
||||
import { redirect } from 'next/navigation'
|
||||
|
||||
const Page = async (props: {
|
||||
params: Promise<{ snippetId: string }>
|
||||
}) => {
|
||||
const { params } = props
|
||||
const { snippetId } = await props.params
|
||||
|
||||
return <SnippetPage snippetId={(await params).snippetId} />
|
||||
redirect(`/snippets/${snippetId}/orchestrate`)
|
||||
}
|
||||
|
||||
export default Page
|
||||
|
||||
@ -91,7 +91,7 @@ const SnippetCard = ({
|
||||
snippet: SnippetListItem
|
||||
}) => {
|
||||
return (
|
||||
<Link href={`/snippets/${snippet.id}`} className="group col-span-1">
|
||||
<Link href={`/snippets/${snippet.id}/orchestrate`} className="group col-span-1">
|
||||
<article className="relative inline-flex h-[160px] w-full flex-col rounded-xl border border-components-card-border bg-components-card-bg shadow-sm transition-all duration-200 ease-in-out hover:-translate-y-0.5 hover:shadow-lg">
|
||||
{snippet.status && (
|
||||
<div className="absolute right-0 top-0 rounded-bl-lg rounded-tr-xl bg-background-default-dimmed px-2 py-1 text-[10px] font-medium uppercase leading-3 text-text-placeholder">
|
||||
|
||||
112
web/app/components/evaluation/__tests__/index.spec.tsx
Normal file
112
web/app/components/evaluation/__tests__/index.spec.tsx
Normal file
@ -0,0 +1,112 @@
|
||||
import { act, fireEvent, render, screen } from '@testing-library/react'
|
||||
import Evaluation from '..'
|
||||
import { getEvaluationMockConfig } from '../mock'
|
||||
import { useEvaluationStore } from '../store'
|
||||
|
||||
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
|
||||
useModelList: () => ({
|
||||
data: [{
|
||||
provider: 'openai',
|
||||
models: [{ model: 'gpt-4o-mini' }],
|
||||
}],
|
||||
}),
|
||||
}))
|
||||
|
||||
vi.mock('@/app/components/header/account-setting/model-provider-page/model-selector', () => ({
|
||||
default: ({ defaultModel }: { defaultModel?: { provider: string, model: string } }) => (
|
||||
<div data-testid="evaluation-model-selector">
|
||||
{defaultModel ? `${defaultModel.provider}:${defaultModel.model}` : 'empty'}
|
||||
</div>
|
||||
),
|
||||
}))
|
||||
|
||||
describe('Evaluation', () => {
|
||||
beforeEach(() => {
|
||||
useEvaluationStore.setState({ resources: {} })
|
||||
})
|
||||
|
||||
it('should search, add metrics, and create a batch history record', async () => {
|
||||
vi.useFakeTimers()
|
||||
|
||||
render(<Evaluation resourceType="workflow" resourceId="app-1" />)
|
||||
|
||||
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
|
||||
expect(screen.getByTestId('evaluation-metric-loading')).toBeInTheDocument()
|
||||
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(200)
|
||||
})
|
||||
|
||||
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), {
|
||||
target: { value: 'does-not-exist' },
|
||||
})
|
||||
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(200)
|
||||
})
|
||||
|
||||
expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument()
|
||||
|
||||
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), {
|
||||
target: { value: 'faith' },
|
||||
})
|
||||
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(200)
|
||||
})
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: /Faithfulness/i }))
|
||||
expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0)
|
||||
|
||||
fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
|
||||
expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
|
||||
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(1300)
|
||||
})
|
||||
|
||||
expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument()
|
||||
expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument()
|
||||
|
||||
vi.useRealTimers()
|
||||
})
|
||||
|
||||
it('should render time placeholders and hide the value row for empty operators', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceId = 'app-2'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
|
||||
const timeField = config.fieldOptions.find(field => field.type === 'time')!
|
||||
let groupId = ''
|
||||
let itemId = ''
|
||||
|
||||
act(() => {
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
|
||||
|
||||
const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
|
||||
groupId = group.id
|
||||
itemId = group.items[0].id
|
||||
|
||||
store.updateConditionField(resourceType, resourceId, groupId, itemId, timeField.id)
|
||||
store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'before')
|
||||
})
|
||||
|
||||
let rerender: ReturnType<typeof render>['rerender']
|
||||
act(() => {
|
||||
({ rerender } = render(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
|
||||
})
|
||||
|
||||
expect(screen.getByText('evaluation.conditions.selectTime')).toBeInTheDocument()
|
||||
|
||||
act(() => {
|
||||
store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'is_empty')
|
||||
rerender(<Evaluation resourceType={resourceType} resourceId={resourceId} />)
|
||||
})
|
||||
|
||||
expect(screen.queryByText('evaluation.conditions.selectTime')).not.toBeInTheDocument()
|
||||
})
|
||||
})
|
||||
96
web/app/components/evaluation/__tests__/store.spec.ts
Normal file
96
web/app/components/evaluation/__tests__/store.spec.ts
Normal file
@ -0,0 +1,96 @@
|
||||
import { getEvaluationMockConfig } from '../mock'
|
||||
import {
|
||||
getAllowedOperators,
|
||||
isCustomMetricConfigured,
|
||||
requiresConditionValue,
|
||||
useEvaluationStore,
|
||||
} from '../store'
|
||||
|
||||
describe('evaluation store', () => {
|
||||
beforeEach(() => {
|
||||
useEvaluationStore.setState({ resources: {} })
|
||||
})
|
||||
|
||||
it('should configure a custom metric mapping to a valid state', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceId = 'app-1'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.addCustomMetric(resourceType, resourceId)
|
||||
|
||||
const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
|
||||
expect(initialMetric).toBeDefined()
|
||||
expect(isCustomMetricConfigured(initialMetric!)).toBe(false)
|
||||
|
||||
store.setCustomMetricWorkflow(resourceType, resourceId, initialMetric!.id, config.workflowOptions[0].id)
|
||||
store.updateCustomMetricMapping(resourceType, resourceId, initialMetric!.id, initialMetric!.customConfig!.mappings[0].id, {
|
||||
sourceFieldId: config.fieldOptions[0].id,
|
||||
targetVariableId: config.workflowOptions[0].targetVariables[0].id,
|
||||
})
|
||||
|
||||
const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
|
||||
expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
|
||||
})
|
||||
|
||||
it('should add and remove builtin metrics', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceId = 'app-2'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)
|
||||
|
||||
const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
|
||||
expect(addedMetric).toBeDefined()
|
||||
|
||||
store.removeMetric(resourceType, resourceId, addedMetric!.id)
|
||||
|
||||
expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
|
||||
})
|
||||
|
||||
it('should update condition groups and adapt operators to field types', () => {
|
||||
const resourceType = 'pipeline'
|
||||
const resourceId = 'dataset-1'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
|
||||
const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
|
||||
store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or')
|
||||
store.addConditionGroup(resourceType, resourceId)
|
||||
|
||||
const booleanField = config.fieldOptions.find(field => field.type === 'boolean')!
|
||||
const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0]
|
||||
store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id)
|
||||
|
||||
const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
|
||||
expect(updatedGroup.logicalOperator).toBe('or')
|
||||
expect(updatedGroup.items[0].operator).toBe('is')
|
||||
expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not'])
|
||||
})
|
||||
|
||||
it('should support time fields and clear values for empty operators', () => {
|
||||
const resourceType = 'workflow'
|
||||
const resourceId = 'app-3'
|
||||
const store = useEvaluationStore.getState()
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
|
||||
store.ensureResource(resourceType, resourceId)
|
||||
|
||||
const timeField = config.fieldOptions.find(field => field.type === 'time')!
|
||||
const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
|
||||
|
||||
store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id)
|
||||
store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty')
|
||||
|
||||
const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
|
||||
|
||||
expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty'])
|
||||
expect(requiresConditionValue('is_empty')).toBe(false)
|
||||
expect(updatedItem.value).toBeNull()
|
||||
})
|
||||
})
|
||||
1017
web/app/components/evaluation/index.tsx
Normal file
1017
web/app/components/evaluation/index.tsx
Normal file
File diff suppressed because it is too large
Load Diff
184
web/app/components/evaluation/mock.ts
Normal file
184
web/app/components/evaluation/mock.ts
Normal file
@ -0,0 +1,184 @@
|
||||
import type {
|
||||
ComparisonOperator,
|
||||
EvaluationFieldOption,
|
||||
EvaluationMockConfig,
|
||||
EvaluationResourceType,
|
||||
MetricOption,
|
||||
} from './types'
|
||||
|
||||
const judgeModels = [
|
||||
{
|
||||
id: 'gpt-4.1-mini',
|
||||
label: 'GPT-4.1 mini',
|
||||
provider: 'OpenAI',
|
||||
},
|
||||
{
|
||||
id: 'claude-3-7-sonnet',
|
||||
label: 'Claude 3.7 Sonnet',
|
||||
provider: 'Anthropic',
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash',
|
||||
label: 'Gemini 2.0 Flash',
|
||||
provider: 'Google',
|
||||
},
|
||||
]
|
||||
|
||||
const builtinMetrics: MetricOption[] = [
|
||||
{
|
||||
id: 'answer-correctness',
|
||||
label: 'Answer Correctness',
|
||||
description: 'Compares the response with the expected answer and scores factual alignment.',
|
||||
group: 'quality',
|
||||
badges: ['LLM', 'Built-in'],
|
||||
},
|
||||
{
|
||||
id: 'faithfulness',
|
||||
label: 'Faithfulness',
|
||||
description: 'Checks whether the answer stays grounded in the retrieved evidence.',
|
||||
group: 'quality',
|
||||
badges: ['LLM', 'Retrieval'],
|
||||
},
|
||||
{
|
||||
id: 'relevance',
|
||||
label: 'Relevance',
|
||||
description: 'Evaluates how directly the answer addresses the original request.',
|
||||
group: 'quality',
|
||||
badges: ['LLM'],
|
||||
},
|
||||
{
|
||||
id: 'latency',
|
||||
label: 'Latency',
|
||||
description: 'Captures runtime responsiveness for the full execution path.',
|
||||
group: 'operations',
|
||||
badges: ['System'],
|
||||
},
|
||||
{
|
||||
id: 'token-usage',
|
||||
label: 'Token Usage',
|
||||
description: 'Tracks prompt and completion token consumption for the run.',
|
||||
group: 'operations',
|
||||
badges: ['System'],
|
||||
},
|
||||
{
|
||||
id: 'tool-success-rate',
|
||||
label: 'Tool Success Rate',
|
||||
description: 'Measures whether each required tool invocation finishes without failure.',
|
||||
group: 'operations',
|
||||
badges: ['Workflow'],
|
||||
},
|
||||
]
|
||||
|
||||
const workflowOptions = [
|
||||
{
|
||||
id: 'workflow-precision-review',
|
||||
label: 'Precision Review Workflow',
|
||||
description: 'Custom evaluator for nuanced quality review.',
|
||||
targetVariables: [
|
||||
{ id: 'query', label: 'query' },
|
||||
{ id: 'answer', label: 'answer' },
|
||||
{ id: 'reference', label: 'reference' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'workflow-risk-review',
|
||||
label: 'Risk Review Workflow',
|
||||
description: 'Custom evaluator for policy and escalation checks.',
|
||||
targetVariables: [
|
||||
{ id: 'input', label: 'input' },
|
||||
{ id: 'output', label: 'output' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
const workflowFields: EvaluationFieldOption[] = [
|
||||
{ id: 'app.input.query', label: 'Query', group: 'App Input', type: 'string' },
|
||||
{ id: 'app.input.locale', label: 'Locale', group: 'App Input', type: 'enum', options: [{ value: 'en-US', label: 'en-US' }, { value: 'zh-Hans', label: 'zh-Hans' }] },
|
||||
{ id: 'app.output.answer', label: 'Answer', group: 'App Output', type: 'string' },
|
||||
{ id: 'app.output.score', label: 'Score', group: 'App Output', type: 'number' },
|
||||
{ id: 'app.output.published_at', label: 'Publication Date', group: 'App Output', type: 'time' },
|
||||
{ id: 'system.has_context', label: 'Has Context', group: 'System', type: 'boolean' },
|
||||
]
|
||||
|
||||
const pipelineFields: EvaluationFieldOption[] = [
|
||||
{ id: 'dataset.input.document_id', label: 'Document ID', group: 'Dataset', type: 'string' },
|
||||
{ id: 'dataset.input.chunk_count', label: 'Chunk Count', group: 'Dataset', type: 'number' },
|
||||
{ id: 'dataset.input.updated_at', label: 'Updated At', group: 'Dataset', type: 'time' },
|
||||
{ id: 'retrieval.output.hit_rate', label: 'Hit Rate', group: 'Retrieval', type: 'number' },
|
||||
{ id: 'retrieval.output.source', label: 'Source', group: 'Retrieval', type: 'enum', options: [{ value: 'bm25', label: 'BM25' }, { value: 'hybrid', label: 'Hybrid' }] },
|
||||
{ id: 'pipeline.output.published', label: 'Published', group: 'Output', type: 'boolean' },
|
||||
]
|
||||
|
||||
const snippetFields: EvaluationFieldOption[] = [
|
||||
{ id: 'snippet.input.blog_url', label: 'Blog URL', group: 'Snippet Input', type: 'string' },
|
||||
{ id: 'snippet.input.platforms', label: 'Platforms', group: 'Snippet Input', type: 'string' },
|
||||
{ id: 'snippet.output.content', label: 'Generated Content', group: 'Snippet Output', type: 'string' },
|
||||
{ id: 'snippet.output.length', label: 'Output Length', group: 'Snippet Output', type: 'number' },
|
||||
{ id: 'snippet.output.scheduled_at', label: 'Scheduled At', group: 'Snippet Output', type: 'time' },
|
||||
{ id: 'system.requires_review', label: 'Requires Review', group: 'System', type: 'boolean' },
|
||||
]
|
||||
|
||||
export const getComparisonOperators = (fieldType: EvaluationFieldOption['type']): ComparisonOperator[] => {
|
||||
if (fieldType === 'number')
|
||||
return ['is', 'is_not', 'greater_than', 'less_than', 'greater_or_equal', 'less_or_equal', 'is_empty', 'is_not_empty']
|
||||
|
||||
if (fieldType === 'time')
|
||||
return ['is', 'before', 'after', 'is_empty', 'is_not_empty']
|
||||
|
||||
if (fieldType === 'boolean' || fieldType === 'enum')
|
||||
return ['is', 'is_not']
|
||||
|
||||
return ['contains', 'not_contains', 'is', 'is_not', 'is_empty', 'is_not_empty']
|
||||
}
|
||||
|
||||
export const getDefaultOperator = (fieldType: EvaluationFieldOption['type']): ComparisonOperator => {
|
||||
return getComparisonOperators(fieldType)[0]
|
||||
}
|
||||
|
||||
export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): EvaluationMockConfig => {
|
||||
if (resourceType === 'pipeline') {
|
||||
return {
|
||||
judgeModels,
|
||||
builtinMetrics,
|
||||
workflowOptions,
|
||||
fieldOptions: pipelineFields,
|
||||
templateFileName: 'pipeline-evaluation-template.csv',
|
||||
batchRequirements: [
|
||||
'Include one row per retrieval scenario.',
|
||||
'Provide the expected source or target chunk for each case.',
|
||||
'Keep numeric metrics in plain number format.',
|
||||
],
|
||||
historySummaryLabel: 'Pipeline evaluation batch',
|
||||
}
|
||||
}
|
||||
|
||||
if (resourceType === 'snippet') {
|
||||
return {
|
||||
judgeModels,
|
||||
builtinMetrics,
|
||||
workflowOptions,
|
||||
fieldOptions: snippetFields,
|
||||
templateFileName: 'snippet-evaluation-template.csv',
|
||||
batchRequirements: [
|
||||
'Include one row per snippet execution case.',
|
||||
'Provide the expected final content or acceptance rule.',
|
||||
'Keep optional fields empty when not used.',
|
||||
],
|
||||
historySummaryLabel: 'Snippet evaluation batch',
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
judgeModels,
|
||||
builtinMetrics,
|
||||
workflowOptions,
|
||||
fieldOptions: workflowFields,
|
||||
templateFileName: 'workflow-evaluation-template.csv',
|
||||
batchRequirements: [
|
||||
'Include one row per workflow test case.',
|
||||
'Provide both user input and expected answer when available.',
|
||||
'Keep boolean columns as true or false.',
|
||||
],
|
||||
historySummaryLabel: 'Workflow evaluation batch',
|
||||
}
|
||||
}
|
||||
635
web/app/components/evaluation/store.ts
Normal file
635
web/app/components/evaluation/store.ts
Normal file
@ -0,0 +1,635 @@
|
||||
import type {
|
||||
BatchTestRecord,
|
||||
ComparisonOperator,
|
||||
EvaluationFieldOption,
|
||||
EvaluationMetric,
|
||||
EvaluationResourceState,
|
||||
EvaluationResourceType,
|
||||
JudgmentConditionGroup,
|
||||
} from './types'
|
||||
import { create } from 'zustand'
|
||||
import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
|
||||
|
||||
type EvaluationStore = {
|
||||
resources: Record<string, EvaluationResourceState>
|
||||
ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
|
||||
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string) => void
|
||||
addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
removeMetric: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void
|
||||
setCustomMetricWorkflow: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, workflowId: string) => void
|
||||
addCustomMetricMapping: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void
|
||||
updateCustomMetricMapping: (
|
||||
resourceType: EvaluationResourceType,
|
||||
resourceId: string,
|
||||
metricId: string,
|
||||
mappingId: string,
|
||||
patch: { sourceFieldId?: string | null, targetVariableId?: string | null },
|
||||
) => void
|
||||
removeCustomMetricMapping: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, mappingId: string) => void
|
||||
addConditionGroup: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
removeConditionGroup: (resourceType: EvaluationResourceType, resourceId: string, groupId: string) => void
|
||||
setConditionGroupOperator: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, logicalOperator: 'and' | 'or') => void
|
||||
addConditionItem: (resourceType: EvaluationResourceType, resourceId: string, groupId: string) => void
|
||||
removeConditionItem: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string) => void
|
||||
updateConditionField: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string, fieldId: string) => void
|
||||
updateConditionOperator: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string, operator: ComparisonOperator) => void
|
||||
updateConditionValue: (
|
||||
resourceType: EvaluationResourceType,
|
||||
resourceId: string,
|
||||
groupId: string,
|
||||
itemId: string,
|
||||
value: string | number | boolean | null,
|
||||
) => void
|
||||
setBatchTab: (resourceType: EvaluationResourceType, resourceId: string, tab: EvaluationResourceState['activeBatchTab']) => void
|
||||
setUploadedFileName: (resourceType: EvaluationResourceType, resourceId: string, uploadedFileName: string | null) => void
|
||||
runBatchTest: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
}
|
||||
|
||||
const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}`
|
||||
const initialResourceCache: Record<string, EvaluationResourceState> = {}
|
||||
|
||||
const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
|
||||
|
||||
export const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty']
|
||||
|
||||
export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator)
|
||||
|
||||
const getConditionValue = (
|
||||
field: EvaluationFieldOption | undefined,
|
||||
operator: ComparisonOperator,
|
||||
previousValue: string | number | boolean | null = null,
|
||||
) => {
|
||||
if (!field || !requiresConditionValue(operator))
|
||||
return null
|
||||
|
||||
if (field.type === 'boolean')
|
||||
return typeof previousValue === 'boolean' ? previousValue : null
|
||||
|
||||
if (field.type === 'enum')
|
||||
return typeof previousValue === 'string' ? previousValue : null
|
||||
|
||||
if (field.type === 'number')
|
||||
return typeof previousValue === 'number' ? previousValue : null
|
||||
|
||||
return typeof previousValue === 'string' ? previousValue : null
|
||||
}
|
||||
|
||||
const buildConditionItem = (resourceType: EvaluationResourceType) => {
|
||||
const field = getEvaluationMockConfig(resourceType).fieldOptions[0]
|
||||
const operator = field ? getDefaultOperator(field.type) : 'contains'
|
||||
|
||||
return {
|
||||
id: createId('condition'),
|
||||
fieldId: field?.id ?? null,
|
||||
operator,
|
||||
value: getConditionValue(field, operator),
|
||||
}
|
||||
}
|
||||
|
||||
const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
const defaultMetric = config.builtinMetrics[0]
|
||||
|
||||
return {
|
||||
judgeModelId: null,
|
||||
metrics: defaultMetric
|
||||
? [{
|
||||
id: createId('metric'),
|
||||
optionId: defaultMetric.id,
|
||||
kind: 'builtin',
|
||||
label: defaultMetric.label,
|
||||
description: defaultMetric.description,
|
||||
badges: defaultMetric.badges,
|
||||
}]
|
||||
: [],
|
||||
conditions: [{
|
||||
id: createId('group'),
|
||||
logicalOperator: 'and',
|
||||
items: [buildConditionItem(resourceType)],
|
||||
}],
|
||||
activeBatchTab: 'input-fields',
|
||||
uploadedFileName: null,
|
||||
batchRecords: [],
|
||||
}
|
||||
}
|
||||
|
||||
const withResourceState = (
|
||||
resources: EvaluationStore['resources'],
|
||||
resourceType: EvaluationResourceType,
|
||||
resourceId: string,
|
||||
) => {
|
||||
const resourceKey = buildResourceKey(resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resourceKey,
|
||||
resource: resources[resourceKey] ?? buildInitialState(resourceType),
|
||||
}
|
||||
}
|
||||
|
||||
const updateMetric = (
|
||||
metrics: EvaluationMetric[],
|
||||
metricId: string,
|
||||
updater: (metric: EvaluationMetric) => EvaluationMetric,
|
||||
) => metrics.map(metric => metric.id === metricId ? updater(metric) : metric)
|
||||
|
||||
const updateConditionGroup = (
|
||||
groups: JudgmentConditionGroup[],
|
||||
groupId: string,
|
||||
updater: (group: JudgmentConditionGroup) => JudgmentConditionGroup,
|
||||
) => groups.map(group => group.id === groupId ? updater(group) : group)
|
||||
|
||||
export const isCustomMetricConfigured = (metric: EvaluationMetric) => {
|
||||
if (metric.kind !== 'custom-workflow')
|
||||
return true
|
||||
|
||||
if (!metric.customConfig?.workflowId)
|
||||
return false
|
||||
|
||||
return metric.customConfig.mappings.length > 0
|
||||
&& metric.customConfig.mappings.every(mapping => !!mapping.sourceFieldId && !!mapping.targetVariableId)
|
||||
}
|
||||
|
||||
export const isEvaluationRunnable = (state: EvaluationResourceState) => {
|
||||
return !!state.judgeModelId
|
||||
&& state.metrics.length > 0
|
||||
&& state.metrics.every(isCustomMetricConfigured)
|
||||
&& state.conditions.some(group => group.items.length > 0)
|
||||
}
|
||||
|
||||
export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
|
||||
resources: {},
|
||||
ensureResource: (resourceType, resourceId) => {
|
||||
const resourceKey = buildResourceKey(resourceType, resourceId)
|
||||
if (get().resources[resourceKey])
|
||||
return
|
||||
|
||||
set(state => ({
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: buildInitialState(resourceType),
|
||||
},
|
||||
}))
|
||||
},
|
||||
setJudgeModel: (resourceType, resourceId, judgeModelId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
judgeModelId,
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
addBuiltinMetric: (resourceType, resourceId, optionId) => {
|
||||
const option = getEvaluationMockConfig(resourceType).builtinMetrics.find(metric => metric.id === optionId)
|
||||
if (!option)
|
||||
return
|
||||
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
if (resource.metrics.some(metric => metric.optionId === optionId && metric.kind === 'builtin'))
|
||||
return state
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: [
|
||||
...resource.metrics,
|
||||
{
|
||||
id: createId('metric'),
|
||||
optionId: option.id,
|
||||
kind: 'builtin',
|
||||
label: option.label,
|
||||
description: option.description,
|
||||
badges: option.badges,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
addCustomMetric: (resourceType, resourceId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: [
|
||||
...resource.metrics,
|
||||
{
|
||||
id: createId('metric'),
|
||||
optionId: createId('custom'),
|
||||
kind: 'custom-workflow',
|
||||
label: 'Custom Evaluator',
|
||||
description: 'Map workflow variables to your evaluation inputs.',
|
||||
badges: ['Workflow'],
|
||||
customConfig: {
|
||||
workflowId: null,
|
||||
mappings: [{
|
||||
id: createId('mapping'),
|
||||
sourceFieldId: null,
|
||||
targetVariableId: null,
|
||||
}],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
removeMetric: (resourceType, resourceId, metricId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: resource.metrics.filter(metric => metric.id !== metricId),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
setCustomMetricWorkflow: (resourceType, resourceId, metricId, workflowId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: updateMetric(resource.metrics, metricId, metric => ({
|
||||
...metric,
|
||||
customConfig: metric.customConfig
|
||||
? {
|
||||
...metric.customConfig,
|
||||
workflowId,
|
||||
mappings: metric.customConfig.mappings.map(mapping => ({
|
||||
...mapping,
|
||||
targetVariableId: null,
|
||||
})),
|
||||
}
|
||||
: metric.customConfig,
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
addCustomMetricMapping: (resourceType, resourceId, metricId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: updateMetric(resource.metrics, metricId, metric => ({
|
||||
...metric,
|
||||
customConfig: metric.customConfig
|
||||
? {
|
||||
...metric.customConfig,
|
||||
mappings: [
|
||||
...metric.customConfig.mappings,
|
||||
{
|
||||
id: createId('mapping'),
|
||||
sourceFieldId: null,
|
||||
targetVariableId: null,
|
||||
},
|
||||
],
|
||||
}
|
||||
: metric.customConfig,
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
updateCustomMetricMapping: (resourceType, resourceId, metricId, mappingId, patch) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: updateMetric(resource.metrics, metricId, metric => ({
|
||||
...metric,
|
||||
customConfig: metric.customConfig
|
||||
? {
|
||||
...metric.customConfig,
|
||||
mappings: metric.customConfig.mappings.map(mapping => mapping.id === mappingId ? { ...mapping, ...patch } : mapping),
|
||||
}
|
||||
: metric.customConfig,
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
removeCustomMetricMapping: (resourceType, resourceId, metricId, mappingId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
metrics: updateMetric(resource.metrics, metricId, metric => ({
|
||||
...metric,
|
||||
customConfig: metric.customConfig
|
||||
? {
|
||||
...metric.customConfig,
|
||||
mappings: metric.customConfig.mappings.filter(mapping => mapping.id !== mappingId),
|
||||
}
|
||||
: metric.customConfig,
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
addConditionGroup: (resourceType, resourceId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: [
|
||||
...resource.conditions,
|
||||
{
|
||||
id: createId('group'),
|
||||
logicalOperator: 'and',
|
||||
items: [buildConditionItem(resourceType)],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
removeConditionGroup: (resourceType, resourceId, groupId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: resource.conditions.filter(group => group.id !== groupId),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
setConditionGroupOperator: (resourceType, resourceId, groupId, logicalOperator) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
|
||||
...group,
|
||||
logicalOperator,
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
addConditionItem: (resourceType, resourceId, groupId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
|
||||
...group,
|
||||
items: [
|
||||
...group.items,
|
||||
buildConditionItem(resourceType),
|
||||
],
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
removeConditionItem: (resourceType, resourceId, groupId, itemId) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
|
||||
...group,
|
||||
items: group.items.filter(item => item.id !== itemId),
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
updateConditionField: (resourceType, resourceId, groupId, itemId, fieldId) => {
|
||||
const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
|
||||
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
|
||||
...group,
|
||||
items: group.items.map((item) => {
|
||||
if (item.id !== itemId)
|
||||
return item
|
||||
|
||||
return {
|
||||
...item,
|
||||
fieldId,
|
||||
operator: field ? getDefaultOperator(field.type) : item.operator,
|
||||
value: getConditionValue(field, field ? getDefaultOperator(field.type) : item.operator),
|
||||
}
|
||||
}),
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
updateConditionOperator: (resourceType, resourceId, groupId, itemId, operator) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
const fieldOptions = getEvaluationMockConfig(resourceType).fieldOptions
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
|
||||
...group,
|
||||
items: group.items.map((item) => {
|
||||
if (item.id !== itemId)
|
||||
return item
|
||||
|
||||
const field = fieldOptions.find(option => option.id === item.fieldId)
|
||||
|
||||
return {
|
||||
...item,
|
||||
operator,
|
||||
value: getConditionValue(field, operator, item.value),
|
||||
}
|
||||
}),
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
updateConditionValue: (resourceType, resourceId, groupId, itemId, value) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
|
||||
...group,
|
||||
items: group.items.map(item => item.id === itemId ? { ...item, value } : item),
|
||||
})),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
setBatchTab: (resourceType, resourceId, tab) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
activeBatchTab: tab,
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
setUploadedFileName: (resourceType, resourceId, uploadedFileName) => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
uploadedFileName,
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
},
|
||||
runBatchTest: (resourceType, resourceId) => {
|
||||
const config = getEvaluationMockConfig(resourceType)
|
||||
const recordId = createId('batch')
|
||||
const nextRecord: BatchTestRecord = {
|
||||
id: recordId,
|
||||
fileName: get().resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? config.templateFileName,
|
||||
status: 'running',
|
||||
startedAt: new Date().toLocaleTimeString(),
|
||||
summary: config.historySummaryLabel,
|
||||
}
|
||||
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
activeBatchTab: 'history',
|
||||
batchRecords: [nextRecord, ...resource.batchRecords],
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
|
||||
window.setTimeout(() => {
|
||||
set((state) => {
|
||||
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
|
||||
|
||||
return {
|
||||
resources: {
|
||||
...state.resources,
|
||||
[resourceKey]: {
|
||||
...resource,
|
||||
batchRecords: resource.batchRecords.map(record => record.id === recordId
|
||||
? {
|
||||
...record,
|
||||
status: resource.metrics.length > 1 ? 'success' : 'failed',
|
||||
}
|
||||
: record),
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
}, 1200)
|
||||
},
|
||||
}))
|
||||
|
||||
export const useEvaluationResource = (resourceType: EvaluationResourceType, resourceId: string) => {
|
||||
const resourceKey = buildResourceKey(resourceType, resourceId)
|
||||
return useEvaluationStore(state => state.resources[resourceKey] ?? (initialResourceCache[resourceKey] ??= buildInitialState(resourceType)))
|
||||
}
|
||||
|
||||
export const getAllowedOperators = (resourceType: EvaluationResourceType, fieldId: string | null) => {
|
||||
const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
|
||||
|
||||
if (!field)
|
||||
return ['contains'] as ComparisonOperator[]
|
||||
|
||||
return getComparisonOperators(field.type)
|
||||
}
|
||||
117
web/app/components/evaluation/types.ts
Normal file
117
web/app/components/evaluation/types.ts
Normal file
@ -0,0 +1,117 @@
|
||||
export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
|
||||
|
||||
export type MetricKind = 'builtin' | 'custom-workflow'
|
||||
|
||||
export type BatchTestTab = 'input-fields' | 'history'
|
||||
|
||||
export type FieldType = 'string' | 'number' | 'boolean' | 'enum' | 'time'
|
||||
|
||||
export type ComparisonOperator
|
||||
= | 'contains'
|
||||
| 'not_contains'
|
||||
| 'is'
|
||||
| 'is_not'
|
||||
| 'is_empty'
|
||||
| 'is_not_empty'
|
||||
| 'greater_than'
|
||||
| 'less_than'
|
||||
| 'greater_or_equal'
|
||||
| 'less_or_equal'
|
||||
| 'before'
|
||||
| 'after'
|
||||
|
||||
export type JudgeModelOption = {
|
||||
id: string
|
||||
label: string
|
||||
provider: string
|
||||
}
|
||||
|
||||
export type MetricOption = {
|
||||
id: string
|
||||
label: string
|
||||
description: string
|
||||
group: string
|
||||
badges: string[]
|
||||
}
|
||||
|
||||
export type EvaluationWorkflowOption = {
|
||||
id: string
|
||||
label: string
|
||||
description: string
|
||||
targetVariables: Array<{
|
||||
id: string
|
||||
label: string
|
||||
}>
|
||||
}
|
||||
|
||||
export type EvaluationFieldOption = {
|
||||
id: string
|
||||
label: string
|
||||
group: string
|
||||
type: FieldType
|
||||
options?: Array<{
|
||||
value: string
|
||||
label: string
|
||||
}>
|
||||
}
|
||||
|
||||
export type CustomMetricMapping = {
|
||||
id: string
|
||||
sourceFieldId: string | null
|
||||
targetVariableId: string | null
|
||||
}
|
||||
|
||||
export type CustomMetricConfig = {
|
||||
workflowId: string | null
|
||||
mappings: CustomMetricMapping[]
|
||||
}
|
||||
|
||||
export type EvaluationMetric = {
|
||||
id: string
|
||||
optionId: string
|
||||
kind: MetricKind
|
||||
label: string
|
||||
description: string
|
||||
badges: string[]
|
||||
customConfig?: CustomMetricConfig
|
||||
}
|
||||
|
||||
export type JudgmentConditionItem = {
|
||||
id: string
|
||||
fieldId: string | null
|
||||
operator: ComparisonOperator
|
||||
value: string | number | boolean | null
|
||||
}
|
||||
|
||||
export type JudgmentConditionGroup = {
|
||||
id: string
|
||||
logicalOperator: 'and' | 'or'
|
||||
items: JudgmentConditionItem[]
|
||||
}
|
||||
|
||||
export type BatchTestRecord = {
|
||||
id: string
|
||||
fileName: string
|
||||
status: 'running' | 'success' | 'failed'
|
||||
startedAt: string
|
||||
summary: string
|
||||
}
|
||||
|
||||
export type EvaluationResourceState = {
|
||||
judgeModelId: string | null
|
||||
metrics: EvaluationMetric[]
|
||||
conditions: JudgmentConditionGroup[]
|
||||
activeBatchTab: BatchTestTab
|
||||
uploadedFileName: string | null
|
||||
batchRecords: BatchTestRecord[]
|
||||
}
|
||||
|
||||
export type EvaluationMockConfig = {
|
||||
judgeModels: JudgeModelOption[]
|
||||
builtinMetrics: MetricOption[]
|
||||
workflowOptions: EvaluationWorkflowOption[]
|
||||
fieldOptions: EvaluationFieldOption[]
|
||||
templateFileName: string
|
||||
batchRequirements: string[]
|
||||
historySummaryLabel: string
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
import type { NavIcon } from '@/app/components/app-sidebar/nav-link'
|
||||
import type { WorkflowProps } from '@/app/components/workflow'
|
||||
import type { SnippetDetailPayload, SnippetInputField } from '@/models/snippet'
|
||||
import type { SnippetDetailPayload, SnippetInputField, SnippetSection } from '@/models/snippet'
|
||||
import {
|
||||
RiFlaskFill,
|
||||
RiFlaskLine,
|
||||
@ -17,6 +17,7 @@ import NavLink from '@/app/components/app-sidebar/nav-link'
|
||||
import SnippetInfo from '@/app/components/app-sidebar/snippet-info'
|
||||
import { useStore as useAppStore } from '@/app/components/app/store'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import Evaluation from '@/app/components/evaluation'
|
||||
import { WorkflowWithInnerContext } from '@/app/components/workflow'
|
||||
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
|
||||
import { useSnippetDetailStore } from '../store'
|
||||
@ -25,6 +26,7 @@ import SnippetChildren from './snippet-children'
|
||||
type SnippetMainProps = {
|
||||
payload: SnippetDetailPayload
|
||||
snippetId: string
|
||||
section: SnippetSection
|
||||
} & Pick<WorkflowProps, 'nodes' | 'edges' | 'viewport'>
|
||||
|
||||
const ORCHESTRATE_ICONS: { normal: NavIcon, selected: NavIcon } = {
|
||||
@ -40,6 +42,7 @@ const EVALUATION_ICONS: { normal: NavIcon, selected: NavIcon } = {
|
||||
const SnippetMain = ({
|
||||
payload,
|
||||
snippetId,
|
||||
section,
|
||||
nodes,
|
||||
edges,
|
||||
viewport,
|
||||
@ -51,7 +54,6 @@ const SnippetMain = ({
|
||||
const [fields, setFields] = useState<SnippetInputField[]>(payload.inputFields)
|
||||
const setAppSidebarExpand = useAppStore(state => state.setAppSidebarExpand)
|
||||
const {
|
||||
activeSection,
|
||||
editingField,
|
||||
isEditorOpen,
|
||||
isInputPanelOpen,
|
||||
@ -59,12 +61,10 @@ const SnippetMain = ({
|
||||
closeEditor,
|
||||
openEditor,
|
||||
reset,
|
||||
setActiveSection,
|
||||
setInputPanelOpen,
|
||||
toggleInputPanel,
|
||||
togglePublishMenu,
|
||||
} = useSnippetDetailStore(useShallow(state => ({
|
||||
activeSection: state.activeSection,
|
||||
editingField: state.editingField,
|
||||
isEditorOpen: state.isEditorOpen,
|
||||
isInputPanelOpen: state.isInputPanelOpen,
|
||||
@ -72,7 +72,6 @@ const SnippetMain = ({
|
||||
closeEditor: state.closeEditor,
|
||||
openEditor: state.openEditor,
|
||||
reset: state.reset,
|
||||
setActiveSection: state.setActiveSection,
|
||||
setInputPanelOpen: state.setInputPanelOpen,
|
||||
toggleInputPanel: state.toggleInputPanel,
|
||||
togglePublishMenu: state.togglePublishMenu,
|
||||
@ -145,15 +144,15 @@ const SnippetMain = ({
|
||||
mode={mode}
|
||||
name={t('sectionOrchestrate')}
|
||||
iconMap={ORCHESTRATE_ICONS}
|
||||
active={activeSection === 'orchestrate'}
|
||||
onClick={() => setActiveSection('orchestrate')}
|
||||
href={`/snippets/${snippetId}/orchestrate`}
|
||||
active={section === 'orchestrate'}
|
||||
/>
|
||||
<NavLink
|
||||
mode={mode}
|
||||
name={t('sectionEvaluation')}
|
||||
iconMap={EVALUATION_ICONS}
|
||||
active={activeSection === 'evaluation'}
|
||||
onClick={() => setActiveSection('evaluation')}
|
||||
href={`/snippets/${snippetId}/evaluation`}
|
||||
active={section === 'evaluation'}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
@ -161,29 +160,35 @@ const SnippetMain = ({
|
||||
|
||||
<div className="relative min-h-0 min-w-0 grow overflow-hidden">
|
||||
<div className="absolute inset-0 min-h-0 min-w-0 overflow-hidden">
|
||||
<WorkflowWithInnerContext
|
||||
nodes={nodes}
|
||||
edges={edges}
|
||||
viewport={viewport ?? graph.viewport}
|
||||
>
|
||||
<SnippetChildren
|
||||
fields={fields}
|
||||
uiMeta={uiMeta}
|
||||
editingField={editingField}
|
||||
isEditorOpen={isEditorOpen}
|
||||
isInputPanelOpen={isInputPanelOpen}
|
||||
isPublishMenuOpen={isPublishMenuOpen}
|
||||
onToggleInputPanel={handleToggleInputPanel}
|
||||
onTogglePublishMenu={togglePublishMenu}
|
||||
onCloseInputPanel={handleCloseInputPanel}
|
||||
onOpenEditor={openEditor}
|
||||
onCloseEditor={closeEditor}
|
||||
onSubmitField={handleSubmitField}
|
||||
onRemoveField={handleRemoveField}
|
||||
onPrimarySortChange={handlePrimarySortChange}
|
||||
onSecondarySortChange={handleSecondarySortChange}
|
||||
/>
|
||||
</WorkflowWithInnerContext>
|
||||
{section === 'evaluation'
|
||||
? (
|
||||
<Evaluation resourceType="snippet" resourceId={snippetId} />
|
||||
)
|
||||
: (
|
||||
<WorkflowWithInnerContext
|
||||
nodes={nodes}
|
||||
edges={edges}
|
||||
viewport={viewport ?? graph.viewport}
|
||||
>
|
||||
<SnippetChildren
|
||||
fields={fields}
|
||||
uiMeta={uiMeta}
|
||||
editingField={editingField}
|
||||
isEditorOpen={isEditorOpen}
|
||||
isInputPanelOpen={isInputPanelOpen}
|
||||
isPublishMenuOpen={isPublishMenuOpen}
|
||||
onToggleInputPanel={handleToggleInputPanel}
|
||||
onTogglePublishMenu={togglePublishMenu}
|
||||
onCloseInputPanel={handleCloseInputPanel}
|
||||
onOpenEditor={openEditor}
|
||||
onCloseEditor={closeEditor}
|
||||
onSubmitField={handleSubmitField}
|
||||
onRemoveField={handleRemoveField}
|
||||
onPrimarySortChange={handlePrimarySortChange}
|
||||
onSecondarySortChange={handleSecondarySortChange}
|
||||
/>
|
||||
</WorkflowWithInnerContext>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import type { SnippetSection } from '@/models/snippet'
|
||||
import { useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Loading from '@/app/components/base/loading'
|
||||
@ -14,10 +15,12 @@ import { useSnippetInit } from './hooks/use-snippet-init'
|
||||
|
||||
type SnippetPageProps = {
|
||||
snippetId: string
|
||||
section?: SnippetSection
|
||||
}
|
||||
|
||||
const SnippetPage = ({
|
||||
snippetId,
|
||||
section = 'orchestrate',
|
||||
}: SnippetPageProps) => {
|
||||
const { t } = useTranslation('snippet')
|
||||
const { data, isLoading } = useSnippetInit(snippetId)
|
||||
@ -62,6 +65,7 @@ const SnippetPage = ({
|
||||
<SnippetMain
|
||||
key={snippetId}
|
||||
snippetId={snippetId}
|
||||
section={section}
|
||||
payload={data}
|
||||
nodes={nodesData}
|
||||
edges={edgesData}
|
||||
|
||||
@ -14,6 +14,7 @@ import type datasetPipeline from '../i18n/en-US/dataset-pipeline.json'
|
||||
import type datasetSettings from '../i18n/en-US/dataset-settings.json'
|
||||
import type dataset from '../i18n/en-US/dataset.json'
|
||||
import type education from '../i18n/en-US/education.json'
|
||||
import type evaluation from '../i18n/en-US/evaluation.json'
|
||||
import type explore from '../i18n/en-US/explore.json'
|
||||
import type layout from '../i18n/en-US/layout.json'
|
||||
import type login from '../i18n/en-US/login.json'
|
||||
@ -48,6 +49,7 @@ export type Resources = {
|
||||
datasetPipeline: typeof datasetPipeline
|
||||
datasetSettings: typeof datasetSettings
|
||||
education: typeof education
|
||||
evaluation: typeof evaluation
|
||||
explore: typeof explore
|
||||
layout: typeof layout
|
||||
login: typeof login
|
||||
@ -82,6 +84,7 @@ export const namespaces = [
|
||||
'datasetPipeline',
|
||||
'datasetSettings',
|
||||
'education',
|
||||
'evaluation',
|
||||
'explore',
|
||||
'layout',
|
||||
'login',
|
||||
|
||||
@ -93,6 +93,7 @@
|
||||
"apiBasedExtension.type": "Type",
|
||||
"appMenus.apiAccess": "API Access",
|
||||
"appMenus.apiAccessTip": "This knowledge base is accessible via the Service API",
|
||||
"appMenus.evaluation": "Evaluation",
|
||||
"appMenus.logAndAnn": "Logs & Annotations",
|
||||
"appMenus.logs": "Logs",
|
||||
"appMenus.overview": "Monitoring",
|
||||
@ -149,6 +150,7 @@
|
||||
"dataSource.website.with": "With",
|
||||
"datasetMenus.documents": "Documents",
|
||||
"datasetMenus.emptyTip": "This Knowledge has not been integrated within any application. Please refer to the document for guidance.",
|
||||
"datasetMenus.evaluation": "Evaluation",
|
||||
"datasetMenus.hitTesting": "Retrieval Testing",
|
||||
"datasetMenus.noRelatedApp": "No linked apps",
|
||||
"datasetMenus.pipeline": "Pipeline",
|
||||
|
||||
73
web/i18n/en-US/evaluation.json
Normal file
73
web/i18n/en-US/evaluation.json
Normal file
@ -0,0 +1,73 @@
|
||||
{
|
||||
"batch.downloadTemplate": "Download Excel Template",
|
||||
"batch.emptyHistory": "No test history yet.",
|
||||
"batch.noticeDescription": "Download the template, upload your cases, then run a local mock batch test.",
|
||||
"batch.noticeTitle": "Quick start",
|
||||
"batch.requirementsTitle": "Data requirements",
|
||||
"batch.run": "Run Test",
|
||||
"batch.status.failed": "Failed",
|
||||
"batch.status.running": "Running",
|
||||
"batch.status.success": "Success",
|
||||
"batch.tabs.history": "Test History",
|
||||
"batch.tabs.input-fields": "Input Fields",
|
||||
"batch.title": "Batch Test",
|
||||
"batch.uploadHint": "Select a .csv or .xlsx file",
|
||||
"batch.uploadTitle": "Upload test file",
|
||||
"batch.validation": "Complete the judge model, metrics, and custom mappings before running a batch test.",
|
||||
"conditions.addCondition": "Add Condition",
|
||||
"conditions.addGroup": "Add Condition Group",
|
||||
"conditions.boolean.false": "False",
|
||||
"conditions.boolean.true": "True",
|
||||
"conditions.description": "Define additional rules for when results should pass or fail.",
|
||||
"conditions.emptyDescription": "Start with a condition group to define evaluation gates.",
|
||||
"conditions.emptyTitle": "No conditions yet",
|
||||
"conditions.fieldPlaceholder": "Select field",
|
||||
"conditions.groupLabel": "Group {{index}}",
|
||||
"conditions.logical.and": "AND",
|
||||
"conditions.logical.or": "OR",
|
||||
"conditions.operators.after": "After",
|
||||
"conditions.operators.before": "Before",
|
||||
"conditions.operators.contains": "Contains",
|
||||
"conditions.operators.greater_or_equal": "Greater than or equal",
|
||||
"conditions.operators.greater_than": "Greater than",
|
||||
"conditions.operators.is": "Is",
|
||||
"conditions.operators.is_empty": "Is empty",
|
||||
"conditions.operators.is_not": "Is not",
|
||||
"conditions.operators.is_not_empty": "Is not empty",
|
||||
"conditions.operators.less_or_equal": "Less than or equal",
|
||||
"conditions.operators.less_than": "Less than",
|
||||
"conditions.operators.not_contains": "Does not contain",
|
||||
"conditions.removeCondition": "Remove condition",
|
||||
"conditions.removeGroup": "Remove condition group",
|
||||
"conditions.selectFieldFirst": "Select a field first",
|
||||
"conditions.selectTime": "Choose a time...",
|
||||
"conditions.selectValue": "Choose a value",
|
||||
"conditions.title": "Judgment Conditions",
|
||||
"conditions.valuePlaceholder": "Enter a value",
|
||||
"description": "Configure judge models, metrics, and batch tests for this resource.",
|
||||
"judgeModel.description": "Choose the model used to score your evaluation results.",
|
||||
"judgeModel.title": "Judge Model",
|
||||
"metrics.add": "Add Metric",
|
||||
"metrics.addCustom": "Add Custom Metrics",
|
||||
"metrics.added": "Added",
|
||||
"metrics.custom.addMapping": "Add Mapping",
|
||||
"metrics.custom.description": "Select an evaluation workflow and map your variables before running tests.",
|
||||
"metrics.custom.mappingTitle": "Variable Mapping",
|
||||
"metrics.custom.mappingWarning": "Complete the workflow selection and each variable mapping to enable batch tests.",
|
||||
"metrics.custom.sourcePlaceholder": "Source variable",
|
||||
"metrics.custom.targetPlaceholder": "Target variable",
|
||||
"metrics.custom.title": "Custom Evaluator",
|
||||
"metrics.custom.warningBadge": "Needs setup",
|
||||
"metrics.custom.workflowLabel": "Evaluation Workflow",
|
||||
"metrics.custom.workflowPlaceholder": "Select a workflow",
|
||||
"metrics.description": "Combine built-in metrics with custom evaluator workflows.",
|
||||
"metrics.groups.operations": "Operations",
|
||||
"metrics.groups.quality": "Quality",
|
||||
"metrics.noResults": "No metrics match your search.",
|
||||
"metrics.remove": "Remove metric",
|
||||
"metrics.searchPlaceholder": "Search metrics",
|
||||
"metrics.showLess": "Show less",
|
||||
"metrics.showMore": "Show more",
|
||||
"metrics.title": "Metrics",
|
||||
"title": "Evaluation"
|
||||
}
|
||||
@ -93,6 +93,7 @@
|
||||
"apiBasedExtension.type": "类型",
|
||||
"appMenus.apiAccess": "访问 API",
|
||||
"appMenus.apiAccessTip": "此知识库可通过服务 API 访问",
|
||||
"appMenus.evaluation": "评测",
|
||||
"appMenus.logAndAnn": "日志与标注",
|
||||
"appMenus.logs": "日志",
|
||||
"appMenus.overview": "监测",
|
||||
@ -149,6 +150,7 @@
|
||||
"dataSource.website.with": "使用",
|
||||
"datasetMenus.documents": "文档",
|
||||
"datasetMenus.emptyTip": "此知识尚未集成到任何应用程序中。请参阅文档以获取指导。",
|
||||
"datasetMenus.evaluation": "评测",
|
||||
"datasetMenus.hitTesting": "召回测试",
|
||||
"datasetMenus.noRelatedApp": "无关联应用",
|
||||
"datasetMenus.pipeline": "流水线",
|
||||
|
||||
73
web/i18n/zh-Hans/evaluation.json
Normal file
73
web/i18n/zh-Hans/evaluation.json
Normal file
@ -0,0 +1,73 @@
|
||||
{
|
||||
"batch.downloadTemplate": "下载 Excel 模板",
|
||||
"batch.emptyHistory": "还没有测试历史。",
|
||||
"batch.noticeDescription": "先下载模板,再上传测试集,然后运行本地模拟批量测试。",
|
||||
"batch.noticeTitle": "快速开始",
|
||||
"batch.requirementsTitle": "数据要求",
|
||||
"batch.run": "运行测试",
|
||||
"batch.status.failed": "失败",
|
||||
"batch.status.running": "运行中",
|
||||
"batch.status.success": "成功",
|
||||
"batch.tabs.history": "测试历史",
|
||||
"batch.tabs.input-fields": "输入字段",
|
||||
"batch.title": "批量测试",
|
||||
"batch.uploadHint": "选择 .csv 或 .xlsx 文件",
|
||||
"batch.uploadTitle": "上传测试文件",
|
||||
"batch.validation": "运行批量测试前,请先完成判定模型、指标和自定义映射配置。",
|
||||
"conditions.addCondition": "添加条件",
|
||||
"conditions.addGroup": "添加条件组",
|
||||
"conditions.boolean.false": "否",
|
||||
"conditions.boolean.true": "是",
|
||||
"conditions.description": "定义额外规则,决定结果何时通过或失败。",
|
||||
"conditions.emptyDescription": "从一个条件组开始,定义评测门槛。",
|
||||
"conditions.emptyTitle": "还没有条件",
|
||||
"conditions.fieldPlaceholder": "选择字段",
|
||||
"conditions.groupLabel": "条件组 {{index}}",
|
||||
"conditions.logical.and": "且",
|
||||
"conditions.logical.or": "或",
|
||||
"conditions.operators.after": "晚于",
|
||||
"conditions.operators.before": "早于",
|
||||
"conditions.operators.contains": "包含",
|
||||
"conditions.operators.greater_or_equal": "大于等于",
|
||||
"conditions.operators.greater_than": "大于",
|
||||
"conditions.operators.is": "等于",
|
||||
"conditions.operators.is_empty": "为空",
|
||||
"conditions.operators.is_not": "不等于",
|
||||
"conditions.operators.is_not_empty": "不为空",
|
||||
"conditions.operators.less_or_equal": "小于等于",
|
||||
"conditions.operators.less_than": "小于",
|
||||
"conditions.operators.not_contains": "不包含",
|
||||
"conditions.removeCondition": "删除条件",
|
||||
"conditions.removeGroup": "删除条件组",
|
||||
"conditions.selectFieldFirst": "请先选择字段",
|
||||
"conditions.selectTime": "选择时间...",
|
||||
"conditions.selectValue": "选择值",
|
||||
"conditions.title": "判定条件",
|
||||
"conditions.valuePlaceholder": "输入值",
|
||||
"description": "为当前资源配置判定模型、评测指标和批量测试。",
|
||||
"judgeModel.description": "选择用于打分和判定评测结果的模型。",
|
||||
"judgeModel.title": "判定模型",
|
||||
"metrics.add": "添加指标",
|
||||
"metrics.addCustom": "添加自定义指标",
|
||||
"metrics.added": "已添加",
|
||||
"metrics.custom.addMapping": "添加映射",
|
||||
"metrics.custom.description": "选择评测工作流并完成变量映射后即可运行测试。",
|
||||
"metrics.custom.mappingTitle": "变量映射",
|
||||
"metrics.custom.mappingWarning": "请先完成工作流选择和所有变量映射,再运行批量测试。",
|
||||
"metrics.custom.sourcePlaceholder": "源变量",
|
||||
"metrics.custom.targetPlaceholder": "目标变量",
|
||||
"metrics.custom.title": "自定义评测器",
|
||||
"metrics.custom.warningBadge": "待配置",
|
||||
"metrics.custom.workflowLabel": "评测工作流",
|
||||
"metrics.custom.workflowPlaceholder": "选择工作流",
|
||||
"metrics.description": "组合内置指标和自定义评测工作流。",
|
||||
"metrics.groups.operations": "运行",
|
||||
"metrics.groups.quality": "质量",
|
||||
"metrics.noResults": "没有匹配的指标。",
|
||||
"metrics.remove": "删除指标",
|
||||
"metrics.searchPlaceholder": "搜索指标",
|
||||
"metrics.showLess": "收起",
|
||||
"metrics.showMore": "展开更多",
|
||||
"metrics.title": "指标",
|
||||
"title": "评测"
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user