feat: evaluation

This commit is contained in:
zxhlyh 2026-03-16 10:46:33 +08:00
parent 7a722773c7
commit 887c7710e9
22 changed files with 2493 additions and 84 deletions

View File

@ -0,0 +1,11 @@
import Evaluation from '@/app/components/evaluation'
const Page = async (props: {
params: Promise<{ appId: string }>
}) => {
const { appId } = await props.params
return <Evaluation resourceType="workflow" resourceId={appId} />
}
export default Page

View File

@ -7,6 +7,8 @@ import {
RiDashboard2Line,
RiFileList3Fill,
RiFileList3Line,
RiFlaskFill,
RiFlaskLine,
RiTerminalBoxFill,
RiTerminalBoxLine,
RiTerminalWindowFill,
@ -67,40 +69,47 @@ const AppDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
}>>([])
const getNavigationConfig = useCallback((appId: string, isCurrentWorkspaceEditor: boolean, mode: AppModeEnum) => {
const navConfig = [
...(isCurrentWorkspaceEditor
? [{
name: t('appMenus.promptEng', { ns: 'common' }),
href: `/app/${appId}/${(mode === AppModeEnum.WORKFLOW || mode === AppModeEnum.ADVANCED_CHAT) ? 'workflow' : 'configuration'}`,
icon: RiTerminalWindowLine,
selectedIcon: RiTerminalWindowFill,
}]
: []
),
{
name: t('appMenus.apiAccess', { ns: 'common' }),
href: `/app/${appId}/develop`,
icon: RiTerminalBoxLine,
selectedIcon: RiTerminalBoxFill,
},
...(isCurrentWorkspaceEditor
? [{
name: mode !== AppModeEnum.WORKFLOW
? t('appMenus.logAndAnn', { ns: 'common' })
: t('appMenus.logs', { ns: 'common' }),
href: `/app/${appId}/logs`,
icon: RiFileList3Line,
selectedIcon: RiFileList3Fill,
}]
: []
),
{
name: t('appMenus.overview', { ns: 'common' }),
href: `/app/${appId}/overview`,
icon: RiDashboard2Line,
selectedIcon: RiDashboard2Fill,
},
]
const navConfig = []
if (isCurrentWorkspaceEditor) {
navConfig.push({
name: t('appMenus.promptEng', { ns: 'common' }),
href: `/app/${appId}/${(mode === AppModeEnum.WORKFLOW || mode === AppModeEnum.ADVANCED_CHAT) ? 'workflow' : 'configuration'}`,
icon: RiTerminalWindowLine,
selectedIcon: RiTerminalWindowFill,
})
navConfig.push({
name: t('appMenus.evaluation', { ns: 'common' }),
href: `/app/${appId}/evaluation`,
icon: RiFlaskLine,
selectedIcon: RiFlaskFill,
})
}
navConfig.push({
name: t('appMenus.apiAccess', { ns: 'common' }),
href: `/app/${appId}/develop`,
icon: RiTerminalBoxLine,
selectedIcon: RiTerminalBoxFill,
})
if (isCurrentWorkspaceEditor) {
navConfig.push({
name: mode !== AppModeEnum.WORKFLOW
? t('appMenus.logAndAnn', { ns: 'common' })
: t('appMenus.logs', { ns: 'common' }),
href: `/app/${appId}/logs`,
icon: RiFileList3Line,
selectedIcon: RiFileList3Fill,
})
}
navConfig.push({
name: t('appMenus.overview', { ns: 'common' }),
href: `/app/${appId}/overview`,
icon: RiDashboard2Line,
selectedIcon: RiDashboard2Fill,
})
return navConfig
}, [t])

View File

@ -0,0 +1,11 @@
import Evaluation from '@/app/components/evaluation'
const Page = async (props: {
params: Promise<{ datasetId: string }>
}) => {
const { datasetId } = await props.params
return <Evaluation resourceType="pipeline" resourceId={datasetId} />
}
export default Page

View File

@ -6,6 +6,8 @@ import {
RiEqualizer2Line,
RiFileTextFill,
RiFileTextLine,
RiFlaskFill,
RiFlaskLine,
RiFocus2Fill,
RiFocus2Line,
} from '@remixicon/react'
@ -86,20 +88,30 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
]
if (datasetRes?.provider !== 'external') {
baseNavigation.unshift({
name: t('datasetMenus.pipeline', { ns: 'common' }),
href: `/datasets/${datasetId}/pipeline`,
icon: PipelineLine as RemixiconComponentType,
selectedIcon: PipelineFill as RemixiconComponentType,
disabled: false,
})
baseNavigation.unshift({
name: t('datasetMenus.documents', { ns: 'common' }),
href: `/datasets/${datasetId}/documents`,
icon: RiFileTextLine,
selectedIcon: RiFileTextFill,
disabled: isButtonDisabledWithPipeline,
})
return [
{
name: t('datasetMenus.documents', { ns: 'common' }),
href: `/datasets/${datasetId}/documents`,
icon: RiFileTextLine,
selectedIcon: RiFileTextFill,
disabled: isButtonDisabledWithPipeline,
},
{
name: t('datasetMenus.pipeline', { ns: 'common' }),
href: `/datasets/${datasetId}/pipeline`,
icon: PipelineLine as RemixiconComponentType,
selectedIcon: PipelineFill as RemixiconComponentType,
disabled: false,
},
{
name: t('datasetMenus.evaluation', { ns: 'common' }),
href: `/datasets/${datasetId}/evaluation`,
icon: RiFlaskLine,
selectedIcon: RiFlaskFill,
disabled: false,
},
...baseNavigation,
]
}
return baseNavigation

View File

@ -0,0 +1,11 @@
import SnippetPage from '@/app/components/snippets'
const Page = async (props: {
params: Promise<{ snippetId: string }>
}) => {
const { snippetId } = await props.params
return <SnippetPage snippetId={snippetId} section="evaluation" />
}
export default Page

View File

@ -0,0 +1,11 @@
import SnippetPage from '@/app/components/snippets'
const Page = async (props: {
params: Promise<{ snippetId: string }>
}) => {
const { snippetId } = await props.params
return <SnippetPage snippetId={snippetId} section="orchestrate" />
}
export default Page

View File

@ -0,0 +1,21 @@
import Page from './page'
const mockRedirect = vi.fn()
vi.mock('next/navigation', () => ({
redirect: (path: string) => mockRedirect(path),
}))
describe('snippet detail redirect page', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('should redirect legacy snippet detail routes to orchestrate', async () => {
await Page({
params: Promise.resolve({ snippetId: 'snippet-1' }),
})
expect(mockRedirect).toHaveBeenCalledWith('/snippets/snippet-1/orchestrate')
})
})

View File

@ -1,11 +1,11 @@
import SnippetPage from '@/app/components/snippets'
import { redirect } from 'next/navigation'
const Page = async (props: {
params: Promise<{ snippetId: string }>
}) => {
const { params } = props
const { snippetId } = await props.params
return <SnippetPage snippetId={(await params).snippetId} />
redirect(`/snippets/${snippetId}/orchestrate`)
}
export default Page

View File

@ -91,7 +91,7 @@ const SnippetCard = ({
snippet: SnippetListItem
}) => {
return (
<Link href={`/snippets/${snippet.id}`} className="group col-span-1">
<Link href={`/snippets/${snippet.id}/orchestrate`} className="group col-span-1">
<article className="relative inline-flex h-[160px] w-full flex-col rounded-xl border border-components-card-border bg-components-card-bg shadow-sm transition-all duration-200 ease-in-out hover:-translate-y-0.5 hover:shadow-lg">
{snippet.status && (
<div className="absolute right-0 top-0 rounded-bl-lg rounded-tr-xl bg-background-default-dimmed px-2 py-1 text-[10px] font-medium uppercase leading-3 text-text-placeholder">

View File

@ -0,0 +1,112 @@
import { act, fireEvent, render, screen } from '@testing-library/react'
import Evaluation from '..'
import { getEvaluationMockConfig } from '../mock'
import { useEvaluationStore } from '../store'
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
useModelList: () => ({
data: [{
provider: 'openai',
models: [{ model: 'gpt-4o-mini' }],
}],
}),
}))
vi.mock('@/app/components/header/account-setting/model-provider-page/model-selector', () => ({
default: ({ defaultModel }: { defaultModel?: { provider: string, model: string } }) => (
<div data-testid="evaluation-model-selector">
{defaultModel ? `${defaultModel.provider}:${defaultModel.model}` : 'empty'}
</div>
),
}))
describe('Evaluation', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
})
it('should search, add metrics, and create a batch history record', async () => {
vi.useFakeTimers()
render(<Evaluation resourceType="workflow" resourceId="app-1" />)
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByTestId('evaluation-metric-loading')).toBeInTheDocument()
await act(async () => {
vi.advanceTimersByTime(200)
})
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), {
target: { value: 'does-not-exist' },
})
await act(async () => {
vi.advanceTimersByTime(200)
})
expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument()
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), {
target: { value: 'faith' },
})
await act(async () => {
vi.advanceTimersByTime(200)
})
fireEvent.click(screen.getByRole('button', { name: /Faithfulness/i }))
expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
await act(async () => {
vi.advanceTimersByTime(1300)
})
expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument()
expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument()
vi.useRealTimers()
})
it('should render time placeholders and hide the value row for empty operators', () => {
const resourceType = 'workflow'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
const timeField = config.fieldOptions.find(field => field.type === 'time')!
let groupId = ''
let itemId = ''
act(() => {
store.ensureResource(resourceType, resourceId)
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
groupId = group.id
itemId = group.items[0].id
store.updateConditionField(resourceType, resourceId, groupId, itemId, timeField.id)
store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'before')
})
let rerender: ReturnType<typeof render>['rerender']
act(() => {
({ rerender } = render(<Evaluation resourceType={resourceType} resourceId={resourceId} />))
})
expect(screen.getByText('evaluation.conditions.selectTime')).toBeInTheDocument()
act(() => {
store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'is_empty')
rerender(<Evaluation resourceType={resourceType} resourceId={resourceId} />)
})
expect(screen.queryByText('evaluation.conditions.selectTime')).not.toBeInTheDocument()
})
})

View File

@ -0,0 +1,96 @@
import { getEvaluationMockConfig } from '../mock'
import {
getAllowedOperators,
isCustomMetricConfigured,
requiresConditionValue,
useEvaluationStore,
} from '../store'
describe('evaluation store', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
})
it('should configure a custom metric mapping to a valid state', () => {
const resourceType = 'workflow'
const resourceId = 'app-1'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
store.ensureResource(resourceType, resourceId)
store.addCustomMetric(resourceType, resourceId)
const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
expect(initialMetric).toBeDefined()
expect(isCustomMetricConfigured(initialMetric!)).toBe(false)
store.setCustomMetricWorkflow(resourceType, resourceId, initialMetric!.id, config.workflowOptions[0].id)
store.updateCustomMetricMapping(resourceType, resourceId, initialMetric!.id, initialMetric!.customConfig!.mappings[0].id, {
sourceFieldId: config.fieldOptions[0].id,
targetVariableId: config.workflowOptions[0].targetVariables[0].id,
})
const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
})
it('should add and remove builtin metrics', () => {
const resourceType = 'workflow'
const resourceId = 'app-2'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
store.ensureResource(resourceType, resourceId)
store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)
const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
expect(addedMetric).toBeDefined()
store.removeMetric(resourceType, resourceId, addedMetric!.id)
expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
})
it('should update condition groups and adapt operators to field types', () => {
const resourceType = 'pipeline'
const resourceId = 'dataset-1'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
store.ensureResource(resourceType, resourceId)
const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or')
store.addConditionGroup(resourceType, resourceId)
const booleanField = config.fieldOptions.find(field => field.type === 'boolean')!
const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0]
store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id)
const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
expect(updatedGroup.logicalOperator).toBe('or')
expect(updatedGroup.items[0].operator).toBe('is')
expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not'])
})
it('should support time fields and clear values for empty operators', () => {
const resourceType = 'workflow'
const resourceId = 'app-3'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
store.ensureResource(resourceType, resourceId)
const timeField = config.fieldOptions.find(field => field.type === 'time')!
const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id)
store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty')
const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty'])
expect(requiresConditionValue('is_empty')).toBe(false)
expect(updatedItem.value).toBeNull()
})
})

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,184 @@
import type {
ComparisonOperator,
EvaluationFieldOption,
EvaluationMockConfig,
EvaluationResourceType,
MetricOption,
} from './types'
const judgeModels = [
{
id: 'gpt-4.1-mini',
label: 'GPT-4.1 mini',
provider: 'OpenAI',
},
{
id: 'claude-3-7-sonnet',
label: 'Claude 3.7 Sonnet',
provider: 'Anthropic',
},
{
id: 'gemini-2.0-flash',
label: 'Gemini 2.0 Flash',
provider: 'Google',
},
]
const builtinMetrics: MetricOption[] = [
{
id: 'answer-correctness',
label: 'Answer Correctness',
description: 'Compares the response with the expected answer and scores factual alignment.',
group: 'quality',
badges: ['LLM', 'Built-in'],
},
{
id: 'faithfulness',
label: 'Faithfulness',
description: 'Checks whether the answer stays grounded in the retrieved evidence.',
group: 'quality',
badges: ['LLM', 'Retrieval'],
},
{
id: 'relevance',
label: 'Relevance',
description: 'Evaluates how directly the answer addresses the original request.',
group: 'quality',
badges: ['LLM'],
},
{
id: 'latency',
label: 'Latency',
description: 'Captures runtime responsiveness for the full execution path.',
group: 'operations',
badges: ['System'],
},
{
id: 'token-usage',
label: 'Token Usage',
description: 'Tracks prompt and completion token consumption for the run.',
group: 'operations',
badges: ['System'],
},
{
id: 'tool-success-rate',
label: 'Tool Success Rate',
description: 'Measures whether each required tool invocation finishes without failure.',
group: 'operations',
badges: ['Workflow'],
},
]
const workflowOptions = [
{
id: 'workflow-precision-review',
label: 'Precision Review Workflow',
description: 'Custom evaluator for nuanced quality review.',
targetVariables: [
{ id: 'query', label: 'query' },
{ id: 'answer', label: 'answer' },
{ id: 'reference', label: 'reference' },
],
},
{
id: 'workflow-risk-review',
label: 'Risk Review Workflow',
description: 'Custom evaluator for policy and escalation checks.',
targetVariables: [
{ id: 'input', label: 'input' },
{ id: 'output', label: 'output' },
],
},
]
const workflowFields: EvaluationFieldOption[] = [
{ id: 'app.input.query', label: 'Query', group: 'App Input', type: 'string' },
{ id: 'app.input.locale', label: 'Locale', group: 'App Input', type: 'enum', options: [{ value: 'en-US', label: 'en-US' }, { value: 'zh-Hans', label: 'zh-Hans' }] },
{ id: 'app.output.answer', label: 'Answer', group: 'App Output', type: 'string' },
{ id: 'app.output.score', label: 'Score', group: 'App Output', type: 'number' },
{ id: 'app.output.published_at', label: 'Publication Date', group: 'App Output', type: 'time' },
{ id: 'system.has_context', label: 'Has Context', group: 'System', type: 'boolean' },
]
const pipelineFields: EvaluationFieldOption[] = [
{ id: 'dataset.input.document_id', label: 'Document ID', group: 'Dataset', type: 'string' },
{ id: 'dataset.input.chunk_count', label: 'Chunk Count', group: 'Dataset', type: 'number' },
{ id: 'dataset.input.updated_at', label: 'Updated At', group: 'Dataset', type: 'time' },
{ id: 'retrieval.output.hit_rate', label: 'Hit Rate', group: 'Retrieval', type: 'number' },
{ id: 'retrieval.output.source', label: 'Source', group: 'Retrieval', type: 'enum', options: [{ value: 'bm25', label: 'BM25' }, { value: 'hybrid', label: 'Hybrid' }] },
{ id: 'pipeline.output.published', label: 'Published', group: 'Output', type: 'boolean' },
]
const snippetFields: EvaluationFieldOption[] = [
{ id: 'snippet.input.blog_url', label: 'Blog URL', group: 'Snippet Input', type: 'string' },
{ id: 'snippet.input.platforms', label: 'Platforms', group: 'Snippet Input', type: 'string' },
{ id: 'snippet.output.content', label: 'Generated Content', group: 'Snippet Output', type: 'string' },
{ id: 'snippet.output.length', label: 'Output Length', group: 'Snippet Output', type: 'number' },
{ id: 'snippet.output.scheduled_at', label: 'Scheduled At', group: 'Snippet Output', type: 'time' },
{ id: 'system.requires_review', label: 'Requires Review', group: 'System', type: 'boolean' },
]
export const getComparisonOperators = (fieldType: EvaluationFieldOption['type']): ComparisonOperator[] => {
if (fieldType === 'number')
return ['is', 'is_not', 'greater_than', 'less_than', 'greater_or_equal', 'less_or_equal', 'is_empty', 'is_not_empty']
if (fieldType === 'time')
return ['is', 'before', 'after', 'is_empty', 'is_not_empty']
if (fieldType === 'boolean' || fieldType === 'enum')
return ['is', 'is_not']
return ['contains', 'not_contains', 'is', 'is_not', 'is_empty', 'is_not_empty']
}
export const getDefaultOperator = (fieldType: EvaluationFieldOption['type']): ComparisonOperator => {
return getComparisonOperators(fieldType)[0]
}
export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): EvaluationMockConfig => {
if (resourceType === 'pipeline') {
return {
judgeModels,
builtinMetrics,
workflowOptions,
fieldOptions: pipelineFields,
templateFileName: 'pipeline-evaluation-template.csv',
batchRequirements: [
'Include one row per retrieval scenario.',
'Provide the expected source or target chunk for each case.',
'Keep numeric metrics in plain number format.',
],
historySummaryLabel: 'Pipeline evaluation batch',
}
}
if (resourceType === 'snippet') {
return {
judgeModels,
builtinMetrics,
workflowOptions,
fieldOptions: snippetFields,
templateFileName: 'snippet-evaluation-template.csv',
batchRequirements: [
'Include one row per snippet execution case.',
'Provide the expected final content or acceptance rule.',
'Keep optional fields empty when not used.',
],
historySummaryLabel: 'Snippet evaluation batch',
}
}
return {
judgeModels,
builtinMetrics,
workflowOptions,
fieldOptions: workflowFields,
templateFileName: 'workflow-evaluation-template.csv',
batchRequirements: [
'Include one row per workflow test case.',
'Provide both user input and expected answer when available.',
'Keep boolean columns as true or false.',
],
historySummaryLabel: 'Workflow evaluation batch',
}
}

View File

@ -0,0 +1,635 @@
import type {
BatchTestRecord,
ComparisonOperator,
EvaluationFieldOption,
EvaluationMetric,
EvaluationResourceState,
EvaluationResourceType,
JudgmentConditionGroup,
} from './types'
import { create } from 'zustand'
import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
type EvaluationStore = {
resources: Record<string, EvaluationResourceState>
ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string) => void
addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
removeMetric: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void
setCustomMetricWorkflow: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, workflowId: string) => void
addCustomMetricMapping: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void
updateCustomMetricMapping: (
resourceType: EvaluationResourceType,
resourceId: string,
metricId: string,
mappingId: string,
patch: { sourceFieldId?: string | null, targetVariableId?: string | null },
) => void
removeCustomMetricMapping: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, mappingId: string) => void
addConditionGroup: (resourceType: EvaluationResourceType, resourceId: string) => void
removeConditionGroup: (resourceType: EvaluationResourceType, resourceId: string, groupId: string) => void
setConditionGroupOperator: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, logicalOperator: 'and' | 'or') => void
addConditionItem: (resourceType: EvaluationResourceType, resourceId: string, groupId: string) => void
removeConditionItem: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string) => void
updateConditionField: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string, fieldId: string) => void
updateConditionOperator: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string, operator: ComparisonOperator) => void
updateConditionValue: (
resourceType: EvaluationResourceType,
resourceId: string,
groupId: string,
itemId: string,
value: string | number | boolean | null,
) => void
setBatchTab: (resourceType: EvaluationResourceType, resourceId: string, tab: EvaluationResourceState['activeBatchTab']) => void
setUploadedFileName: (resourceType: EvaluationResourceType, resourceId: string, uploadedFileName: string | null) => void
runBatchTest: (resourceType: EvaluationResourceType, resourceId: string) => void
}
const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}`
const initialResourceCache: Record<string, EvaluationResourceState> = {}
const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
export const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty']
export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator)
const getConditionValue = (
field: EvaluationFieldOption | undefined,
operator: ComparisonOperator,
previousValue: string | number | boolean | null = null,
) => {
if (!field || !requiresConditionValue(operator))
return null
if (field.type === 'boolean')
return typeof previousValue === 'boolean' ? previousValue : null
if (field.type === 'enum')
return typeof previousValue === 'string' ? previousValue : null
if (field.type === 'number')
return typeof previousValue === 'number' ? previousValue : null
return typeof previousValue === 'string' ? previousValue : null
}
const buildConditionItem = (resourceType: EvaluationResourceType) => {
const field = getEvaluationMockConfig(resourceType).fieldOptions[0]
const operator = field ? getDefaultOperator(field.type) : 'contains'
return {
id: createId('condition'),
fieldId: field?.id ?? null,
operator,
value: getConditionValue(field, operator),
}
}
const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
const config = getEvaluationMockConfig(resourceType)
const defaultMetric = config.builtinMetrics[0]
return {
judgeModelId: null,
metrics: defaultMetric
? [{
id: createId('metric'),
optionId: defaultMetric.id,
kind: 'builtin',
label: defaultMetric.label,
description: defaultMetric.description,
badges: defaultMetric.badges,
}]
: [],
conditions: [{
id: createId('group'),
logicalOperator: 'and',
items: [buildConditionItem(resourceType)],
}],
activeBatchTab: 'input-fields',
uploadedFileName: null,
batchRecords: [],
}
}
const withResourceState = (
resources: EvaluationStore['resources'],
resourceType: EvaluationResourceType,
resourceId: string,
) => {
const resourceKey = buildResourceKey(resourceType, resourceId)
return {
resourceKey,
resource: resources[resourceKey] ?? buildInitialState(resourceType),
}
}
const updateMetric = (
metrics: EvaluationMetric[],
metricId: string,
updater: (metric: EvaluationMetric) => EvaluationMetric,
) => metrics.map(metric => metric.id === metricId ? updater(metric) : metric)
const updateConditionGroup = (
groups: JudgmentConditionGroup[],
groupId: string,
updater: (group: JudgmentConditionGroup) => JudgmentConditionGroup,
) => groups.map(group => group.id === groupId ? updater(group) : group)
export const isCustomMetricConfigured = (metric: EvaluationMetric) => {
if (metric.kind !== 'custom-workflow')
return true
if (!metric.customConfig?.workflowId)
return false
return metric.customConfig.mappings.length > 0
&& metric.customConfig.mappings.every(mapping => !!mapping.sourceFieldId && !!mapping.targetVariableId)
}
export const isEvaluationRunnable = (state: EvaluationResourceState) => {
return !!state.judgeModelId
&& state.metrics.length > 0
&& state.metrics.every(isCustomMetricConfigured)
&& state.conditions.some(group => group.items.length > 0)
}
export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
resources: {},
ensureResource: (resourceType, resourceId) => {
const resourceKey = buildResourceKey(resourceType, resourceId)
if (get().resources[resourceKey])
return
set(state => ({
resources: {
...state.resources,
[resourceKey]: buildInitialState(resourceType),
},
}))
},
setJudgeModel: (resourceType, resourceId, judgeModelId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
judgeModelId,
},
},
}
})
},
addBuiltinMetric: (resourceType, resourceId, optionId) => {
const option = getEvaluationMockConfig(resourceType).builtinMetrics.find(metric => metric.id === optionId)
if (!option)
return
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
if (resource.metrics.some(metric => metric.optionId === optionId && metric.kind === 'builtin'))
return state
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: [
...resource.metrics,
{
id: createId('metric'),
optionId: option.id,
kind: 'builtin',
label: option.label,
description: option.description,
badges: option.badges,
},
],
},
},
}
})
},
addCustomMetric: (resourceType, resourceId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: [
...resource.metrics,
{
id: createId('metric'),
optionId: createId('custom'),
kind: 'custom-workflow',
label: 'Custom Evaluator',
description: 'Map workflow variables to your evaluation inputs.',
badges: ['Workflow'],
customConfig: {
workflowId: null,
mappings: [{
id: createId('mapping'),
sourceFieldId: null,
targetVariableId: null,
}],
},
},
],
},
},
}
})
},
removeMetric: (resourceType, resourceId, metricId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: resource.metrics.filter(metric => metric.id !== metricId),
},
},
}
})
},
setCustomMetricWorkflow: (resourceType, resourceId, metricId, workflowId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: updateMetric(resource.metrics, metricId, metric => ({
...metric,
customConfig: metric.customConfig
? {
...metric.customConfig,
workflowId,
mappings: metric.customConfig.mappings.map(mapping => ({
...mapping,
targetVariableId: null,
})),
}
: metric.customConfig,
})),
},
},
}
})
},
addCustomMetricMapping: (resourceType, resourceId, metricId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: updateMetric(resource.metrics, metricId, metric => ({
...metric,
customConfig: metric.customConfig
? {
...metric.customConfig,
mappings: [
...metric.customConfig.mappings,
{
id: createId('mapping'),
sourceFieldId: null,
targetVariableId: null,
},
],
}
: metric.customConfig,
})),
},
},
}
})
},
updateCustomMetricMapping: (resourceType, resourceId, metricId, mappingId, patch) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: updateMetric(resource.metrics, metricId, metric => ({
...metric,
customConfig: metric.customConfig
? {
...metric.customConfig,
mappings: metric.customConfig.mappings.map(mapping => mapping.id === mappingId ? { ...mapping, ...patch } : mapping),
}
: metric.customConfig,
})),
},
},
}
})
},
removeCustomMetricMapping: (resourceType, resourceId, metricId, mappingId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
metrics: updateMetric(resource.metrics, metricId, metric => ({
...metric,
customConfig: metric.customConfig
? {
...metric.customConfig,
mappings: metric.customConfig.mappings.filter(mapping => mapping.id !== mappingId),
}
: metric.customConfig,
})),
},
},
}
})
},
addConditionGroup: (resourceType, resourceId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: [
...resource.conditions,
{
id: createId('group'),
logicalOperator: 'and',
items: [buildConditionItem(resourceType)],
},
],
},
},
}
})
},
removeConditionGroup: (resourceType, resourceId, groupId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: resource.conditions.filter(group => group.id !== groupId),
},
},
}
})
},
setConditionGroupOperator: (resourceType, resourceId, groupId, logicalOperator) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
...group,
logicalOperator,
})),
},
},
}
})
},
addConditionItem: (resourceType, resourceId, groupId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
...group,
items: [
...group.items,
buildConditionItem(resourceType),
],
})),
},
},
}
})
},
removeConditionItem: (resourceType, resourceId, groupId, itemId) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
...group,
items: group.items.filter(item => item.id !== itemId),
})),
},
},
}
})
},
updateConditionField: (resourceType, resourceId, groupId, itemId, fieldId) => {
const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
...group,
items: group.items.map((item) => {
if (item.id !== itemId)
return item
return {
...item,
fieldId,
operator: field ? getDefaultOperator(field.type) : item.operator,
value: getConditionValue(field, field ? getDefaultOperator(field.type) : item.operator),
}
}),
})),
},
},
}
})
},
updateConditionOperator: (resourceType, resourceId, groupId, itemId, operator) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
const fieldOptions = getEvaluationMockConfig(resourceType).fieldOptions
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
...group,
items: group.items.map((item) => {
if (item.id !== itemId)
return item
const field = fieldOptions.find(option => option.id === item.fieldId)
return {
...item,
operator,
value: getConditionValue(field, operator, item.value),
}
}),
})),
},
},
}
})
},
updateConditionValue: (resourceType, resourceId, groupId, itemId, value) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
conditions: updateConditionGroup(resource.conditions, groupId, group => ({
...group,
items: group.items.map(item => item.id === itemId ? { ...item, value } : item),
})),
},
},
}
})
},
setBatchTab: (resourceType, resourceId, tab) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
activeBatchTab: tab,
},
},
}
})
},
setUploadedFileName: (resourceType, resourceId, uploadedFileName) => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
uploadedFileName,
},
},
}
})
},
runBatchTest: (resourceType, resourceId) => {
const config = getEvaluationMockConfig(resourceType)
const recordId = createId('batch')
const nextRecord: BatchTestRecord = {
id: recordId,
fileName: get().resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? config.templateFileName,
status: 'running',
startedAt: new Date().toLocaleTimeString(),
summary: config.historySummaryLabel,
}
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
activeBatchTab: 'history',
batchRecords: [nextRecord, ...resource.batchRecords],
},
},
}
})
window.setTimeout(() => {
set((state) => {
const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId)
return {
resources: {
...state.resources,
[resourceKey]: {
...resource,
batchRecords: resource.batchRecords.map(record => record.id === recordId
? {
...record,
status: resource.metrics.length > 1 ? 'success' : 'failed',
}
: record),
},
},
}
})
}, 1200)
},
}))
export const useEvaluationResource = (resourceType: EvaluationResourceType, resourceId: string) => {
const resourceKey = buildResourceKey(resourceType, resourceId)
return useEvaluationStore(state => state.resources[resourceKey] ?? (initialResourceCache[resourceKey] ??= buildInitialState(resourceType)))
}
export const getAllowedOperators = (resourceType: EvaluationResourceType, fieldId: string | null) => {
const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
if (!field)
return ['contains'] as ComparisonOperator[]
return getComparisonOperators(field.type)
}

View File

@ -0,0 +1,117 @@
export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
export type MetricKind = 'builtin' | 'custom-workflow'
export type BatchTestTab = 'input-fields' | 'history'
export type FieldType = 'string' | 'number' | 'boolean' | 'enum' | 'time'
export type ComparisonOperator
= | 'contains'
| 'not_contains'
| 'is'
| 'is_not'
| 'is_empty'
| 'is_not_empty'
| 'greater_than'
| 'less_than'
| 'greater_or_equal'
| 'less_or_equal'
| 'before'
| 'after'
export type JudgeModelOption = {
id: string
label: string
provider: string
}
export type MetricOption = {
id: string
label: string
description: string
group: string
badges: string[]
}
export type EvaluationWorkflowOption = {
id: string
label: string
description: string
targetVariables: Array<{
id: string
label: string
}>
}
export type EvaluationFieldOption = {
id: string
label: string
group: string
type: FieldType
options?: Array<{
value: string
label: string
}>
}
export type CustomMetricMapping = {
id: string
sourceFieldId: string | null
targetVariableId: string | null
}
export type CustomMetricConfig = {
workflowId: string | null
mappings: CustomMetricMapping[]
}
export type EvaluationMetric = {
id: string
optionId: string
kind: MetricKind
label: string
description: string
badges: string[]
customConfig?: CustomMetricConfig
}
export type JudgmentConditionItem = {
id: string
fieldId: string | null
operator: ComparisonOperator
value: string | number | boolean | null
}
export type JudgmentConditionGroup = {
id: string
logicalOperator: 'and' | 'or'
items: JudgmentConditionItem[]
}
export type BatchTestRecord = {
id: string
fileName: string
status: 'running' | 'success' | 'failed'
startedAt: string
summary: string
}
export type EvaluationResourceState = {
judgeModelId: string | null
metrics: EvaluationMetric[]
conditions: JudgmentConditionGroup[]
activeBatchTab: BatchTestTab
uploadedFileName: string | null
batchRecords: BatchTestRecord[]
}
export type EvaluationMockConfig = {
judgeModels: JudgeModelOption[]
builtinMetrics: MetricOption[]
workflowOptions: EvaluationWorkflowOption[]
fieldOptions: EvaluationFieldOption[]
templateFileName: string
batchRequirements: string[]
historySummaryLabel: string
}

View File

@ -2,7 +2,7 @@
import type { NavIcon } from '@/app/components/app-sidebar/nav-link'
import type { WorkflowProps } from '@/app/components/workflow'
import type { SnippetDetailPayload, SnippetInputField } from '@/models/snippet'
import type { SnippetDetailPayload, SnippetInputField, SnippetSection } from '@/models/snippet'
import {
RiFlaskFill,
RiFlaskLine,
@ -17,6 +17,7 @@ import NavLink from '@/app/components/app-sidebar/nav-link'
import SnippetInfo from '@/app/components/app-sidebar/snippet-info'
import { useStore as useAppStore } from '@/app/components/app/store'
import Toast from '@/app/components/base/toast'
import Evaluation from '@/app/components/evaluation'
import { WorkflowWithInnerContext } from '@/app/components/workflow'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import { useSnippetDetailStore } from '../store'
@ -25,6 +26,7 @@ import SnippetChildren from './snippet-children'
type SnippetMainProps = {
payload: SnippetDetailPayload
snippetId: string
section: SnippetSection
} & Pick<WorkflowProps, 'nodes' | 'edges' | 'viewport'>
const ORCHESTRATE_ICONS: { normal: NavIcon, selected: NavIcon } = {
@ -40,6 +42,7 @@ const EVALUATION_ICONS: { normal: NavIcon, selected: NavIcon } = {
const SnippetMain = ({
payload,
snippetId,
section,
nodes,
edges,
viewport,
@ -51,7 +54,6 @@ const SnippetMain = ({
const [fields, setFields] = useState<SnippetInputField[]>(payload.inputFields)
const setAppSidebarExpand = useAppStore(state => state.setAppSidebarExpand)
const {
activeSection,
editingField,
isEditorOpen,
isInputPanelOpen,
@ -59,12 +61,10 @@ const SnippetMain = ({
closeEditor,
openEditor,
reset,
setActiveSection,
setInputPanelOpen,
toggleInputPanel,
togglePublishMenu,
} = useSnippetDetailStore(useShallow(state => ({
activeSection: state.activeSection,
editingField: state.editingField,
isEditorOpen: state.isEditorOpen,
isInputPanelOpen: state.isInputPanelOpen,
@ -72,7 +72,6 @@ const SnippetMain = ({
closeEditor: state.closeEditor,
openEditor: state.openEditor,
reset: state.reset,
setActiveSection: state.setActiveSection,
setInputPanelOpen: state.setInputPanelOpen,
toggleInputPanel: state.toggleInputPanel,
togglePublishMenu: state.togglePublishMenu,
@ -145,15 +144,15 @@ const SnippetMain = ({
mode={mode}
name={t('sectionOrchestrate')}
iconMap={ORCHESTRATE_ICONS}
active={activeSection === 'orchestrate'}
onClick={() => setActiveSection('orchestrate')}
href={`/snippets/${snippetId}/orchestrate`}
active={section === 'orchestrate'}
/>
<NavLink
mode={mode}
name={t('sectionEvaluation')}
iconMap={EVALUATION_ICONS}
active={activeSection === 'evaluation'}
onClick={() => setActiveSection('evaluation')}
href={`/snippets/${snippetId}/evaluation`}
active={section === 'evaluation'}
/>
</>
)}
@ -161,29 +160,35 @@ const SnippetMain = ({
<div className="relative min-h-0 min-w-0 grow overflow-hidden">
<div className="absolute inset-0 min-h-0 min-w-0 overflow-hidden">
<WorkflowWithInnerContext
nodes={nodes}
edges={edges}
viewport={viewport ?? graph.viewport}
>
<SnippetChildren
fields={fields}
uiMeta={uiMeta}
editingField={editingField}
isEditorOpen={isEditorOpen}
isInputPanelOpen={isInputPanelOpen}
isPublishMenuOpen={isPublishMenuOpen}
onToggleInputPanel={handleToggleInputPanel}
onTogglePublishMenu={togglePublishMenu}
onCloseInputPanel={handleCloseInputPanel}
onOpenEditor={openEditor}
onCloseEditor={closeEditor}
onSubmitField={handleSubmitField}
onRemoveField={handleRemoveField}
onPrimarySortChange={handlePrimarySortChange}
onSecondarySortChange={handleSecondarySortChange}
/>
</WorkflowWithInnerContext>
{section === 'evaluation'
? (
<Evaluation resourceType="snippet" resourceId={snippetId} />
)
: (
<WorkflowWithInnerContext
nodes={nodes}
edges={edges}
viewport={viewport ?? graph.viewport}
>
<SnippetChildren
fields={fields}
uiMeta={uiMeta}
editingField={editingField}
isEditorOpen={isEditorOpen}
isInputPanelOpen={isInputPanelOpen}
isPublishMenuOpen={isPublishMenuOpen}
onToggleInputPanel={handleToggleInputPanel}
onTogglePublishMenu={togglePublishMenu}
onCloseInputPanel={handleCloseInputPanel}
onOpenEditor={openEditor}
onCloseEditor={closeEditor}
onSubmitField={handleSubmitField}
onRemoveField={handleRemoveField}
onPrimarySortChange={handlePrimarySortChange}
onSecondarySortChange={handleSecondarySortChange}
/>
</WorkflowWithInnerContext>
)}
</div>
</div>
</div>

View File

@ -1,5 +1,6 @@
'use client'
import type { SnippetSection } from '@/models/snippet'
import { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Loading from '@/app/components/base/loading'
@ -14,10 +15,12 @@ import { useSnippetInit } from './hooks/use-snippet-init'
type SnippetPageProps = {
snippetId: string
section?: SnippetSection
}
const SnippetPage = ({
snippetId,
section = 'orchestrate',
}: SnippetPageProps) => {
const { t } = useTranslation('snippet')
const { data, isLoading } = useSnippetInit(snippetId)
@ -62,6 +65,7 @@ const SnippetPage = ({
<SnippetMain
key={snippetId}
snippetId={snippetId}
section={section}
payload={data}
nodes={nodesData}
edges={edgesData}

View File

@ -14,6 +14,7 @@ import type datasetPipeline from '../i18n/en-US/dataset-pipeline.json'
import type datasetSettings from '../i18n/en-US/dataset-settings.json'
import type dataset from '../i18n/en-US/dataset.json'
import type education from '../i18n/en-US/education.json'
import type evaluation from '../i18n/en-US/evaluation.json'
import type explore from '../i18n/en-US/explore.json'
import type layout from '../i18n/en-US/layout.json'
import type login from '../i18n/en-US/login.json'
@ -48,6 +49,7 @@ export type Resources = {
datasetPipeline: typeof datasetPipeline
datasetSettings: typeof datasetSettings
education: typeof education
evaluation: typeof evaluation
explore: typeof explore
layout: typeof layout
login: typeof login
@ -82,6 +84,7 @@ export const namespaces = [
'datasetPipeline',
'datasetSettings',
'education',
'evaluation',
'explore',
'layout',
'login',

View File

@ -93,6 +93,7 @@
"apiBasedExtension.type": "Type",
"appMenus.apiAccess": "API Access",
"appMenus.apiAccessTip": "This knowledge base is accessible via the Service API",
"appMenus.evaluation": "Evaluation",
"appMenus.logAndAnn": "Logs & Annotations",
"appMenus.logs": "Logs",
"appMenus.overview": "Monitoring",
@ -149,6 +150,7 @@
"dataSource.website.with": "With",
"datasetMenus.documents": "Documents",
"datasetMenus.emptyTip": "This Knowledge has not been integrated within any application. Please refer to the document for guidance.",
"datasetMenus.evaluation": "Evaluation",
"datasetMenus.hitTesting": "Retrieval Testing",
"datasetMenus.noRelatedApp": "No linked apps",
"datasetMenus.pipeline": "Pipeline",

View File

@ -0,0 +1,73 @@
{
"batch.downloadTemplate": "Download Excel Template",
"batch.emptyHistory": "No test history yet.",
"batch.noticeDescription": "Download the template, upload your cases, then run a local mock batch test.",
"batch.noticeTitle": "Quick start",
"batch.requirementsTitle": "Data requirements",
"batch.run": "Run Test",
"batch.status.failed": "Failed",
"batch.status.running": "Running",
"batch.status.success": "Success",
"batch.tabs.history": "Test History",
"batch.tabs.input-fields": "Input Fields",
"batch.title": "Batch Test",
"batch.uploadHint": "Select a .csv or .xlsx file",
"batch.uploadTitle": "Upload test file",
"batch.validation": "Complete the judge model, metrics, and custom mappings before running a batch test.",
"conditions.addCondition": "Add Condition",
"conditions.addGroup": "Add Condition Group",
"conditions.boolean.false": "False",
"conditions.boolean.true": "True",
"conditions.description": "Define additional rules for when results should pass or fail.",
"conditions.emptyDescription": "Start with a condition group to define evaluation gates.",
"conditions.emptyTitle": "No conditions yet",
"conditions.fieldPlaceholder": "Select field",
"conditions.groupLabel": "Group {{index}}",
"conditions.logical.and": "AND",
"conditions.logical.or": "OR",
"conditions.operators.after": "After",
"conditions.operators.before": "Before",
"conditions.operators.contains": "Contains",
"conditions.operators.greater_or_equal": "Greater than or equal",
"conditions.operators.greater_than": "Greater than",
"conditions.operators.is": "Is",
"conditions.operators.is_empty": "Is empty",
"conditions.operators.is_not": "Is not",
"conditions.operators.is_not_empty": "Is not empty",
"conditions.operators.less_or_equal": "Less than or equal",
"conditions.operators.less_than": "Less than",
"conditions.operators.not_contains": "Does not contain",
"conditions.removeCondition": "Remove condition",
"conditions.removeGroup": "Remove condition group",
"conditions.selectFieldFirst": "Select a field first",
"conditions.selectTime": "Choose a time...",
"conditions.selectValue": "Choose a value",
"conditions.title": "Judgment Conditions",
"conditions.valuePlaceholder": "Enter a value",
"description": "Configure judge models, metrics, and batch tests for this resource.",
"judgeModel.description": "Choose the model used to score your evaluation results.",
"judgeModel.title": "Judge Model",
"metrics.add": "Add Metric",
"metrics.addCustom": "Add Custom Metrics",
"metrics.added": "Added",
"metrics.custom.addMapping": "Add Mapping",
"metrics.custom.description": "Select an evaluation workflow and map your variables before running tests.",
"metrics.custom.mappingTitle": "Variable Mapping",
"metrics.custom.mappingWarning": "Complete the workflow selection and each variable mapping to enable batch tests.",
"metrics.custom.sourcePlaceholder": "Source variable",
"metrics.custom.targetPlaceholder": "Target variable",
"metrics.custom.title": "Custom Evaluator",
"metrics.custom.warningBadge": "Needs setup",
"metrics.custom.workflowLabel": "Evaluation Workflow",
"metrics.custom.workflowPlaceholder": "Select a workflow",
"metrics.description": "Combine built-in metrics with custom evaluator workflows.",
"metrics.groups.operations": "Operations",
"metrics.groups.quality": "Quality",
"metrics.noResults": "No metrics match your search.",
"metrics.remove": "Remove metric",
"metrics.searchPlaceholder": "Search metrics",
"metrics.showLess": "Show less",
"metrics.showMore": "Show more",
"metrics.title": "Metrics",
"title": "Evaluation"
}

View File

@ -93,6 +93,7 @@
"apiBasedExtension.type": "类型",
"appMenus.apiAccess": "访问 API",
"appMenus.apiAccessTip": "此知识库可通过服务 API 访问",
"appMenus.evaluation": "评测",
"appMenus.logAndAnn": "日志与标注",
"appMenus.logs": "日志",
"appMenus.overview": "监测",
@ -149,6 +150,7 @@
"dataSource.website.with": "使用",
"datasetMenus.documents": "文档",
"datasetMenus.emptyTip": "此知识尚未集成到任何应用程序中。请参阅文档以获取指导。",
"datasetMenus.evaluation": "评测",
"datasetMenus.hitTesting": "召回测试",
"datasetMenus.noRelatedApp": "无关联应用",
"datasetMenus.pipeline": "流水线",

View File

@ -0,0 +1,73 @@
{
"batch.downloadTemplate": "下载 Excel 模板",
"batch.emptyHistory": "还没有测试历史。",
"batch.noticeDescription": "先下载模板,再上传测试集,然后运行本地模拟批量测试。",
"batch.noticeTitle": "快速开始",
"batch.requirementsTitle": "数据要求",
"batch.run": "运行测试",
"batch.status.failed": "失败",
"batch.status.running": "运行中",
"batch.status.success": "成功",
"batch.tabs.history": "测试历史",
"batch.tabs.input-fields": "输入字段",
"batch.title": "批量测试",
"batch.uploadHint": "选择 .csv 或 .xlsx 文件",
"batch.uploadTitle": "上传测试文件",
"batch.validation": "运行批量测试前,请先完成判定模型、指标和自定义映射配置。",
"conditions.addCondition": "添加条件",
"conditions.addGroup": "添加条件组",
"conditions.boolean.false": "否",
"conditions.boolean.true": "是",
"conditions.description": "定义额外规则,决定结果何时通过或失败。",
"conditions.emptyDescription": "从一个条件组开始,定义评测门槛。",
"conditions.emptyTitle": "还没有条件",
"conditions.fieldPlaceholder": "选择字段",
"conditions.groupLabel": "条件组 {{index}}",
"conditions.logical.and": "且",
"conditions.logical.or": "或",
"conditions.operators.after": "晚于",
"conditions.operators.before": "早于",
"conditions.operators.contains": "包含",
"conditions.operators.greater_or_equal": "大于等于",
"conditions.operators.greater_than": "大于",
"conditions.operators.is": "等于",
"conditions.operators.is_empty": "为空",
"conditions.operators.is_not": "不等于",
"conditions.operators.is_not_empty": "不为空",
"conditions.operators.less_or_equal": "小于等于",
"conditions.operators.less_than": "小于",
"conditions.operators.not_contains": "不包含",
"conditions.removeCondition": "删除条件",
"conditions.removeGroup": "删除条件组",
"conditions.selectFieldFirst": "请先选择字段",
"conditions.selectTime": "选择时间...",
"conditions.selectValue": "选择值",
"conditions.title": "判定条件",
"conditions.valuePlaceholder": "输入值",
"description": "为当前资源配置判定模型、评测指标和批量测试。",
"judgeModel.description": "选择用于打分和判定评测结果的模型。",
"judgeModel.title": "判定模型",
"metrics.add": "添加指标",
"metrics.addCustom": "添加自定义指标",
"metrics.added": "已添加",
"metrics.custom.addMapping": "添加映射",
"metrics.custom.description": "选择评测工作流并完成变量映射后即可运行测试。",
"metrics.custom.mappingTitle": "变量映射",
"metrics.custom.mappingWarning": "请先完成工作流选择和所有变量映射,再运行批量测试。",
"metrics.custom.sourcePlaceholder": "源变量",
"metrics.custom.targetPlaceholder": "目标变量",
"metrics.custom.title": "自定义评测器",
"metrics.custom.warningBadge": "待配置",
"metrics.custom.workflowLabel": "评测工作流",
"metrics.custom.workflowPlaceholder": "选择工作流",
"metrics.description": "组合内置指标和自定义评测工作流。",
"metrics.groups.operations": "运行",
"metrics.groups.quality": "质量",
"metrics.noResults": "没有匹配的指标。",
"metrics.remove": "删除指标",
"metrics.searchPlaceholder": "搜索指标",
"metrics.showLess": "收起",
"metrics.showMore": "展开更多",
"metrics.title": "指标",
"title": "评测"
}