diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx new file mode 100644 index 0000000000..161f37108d --- /dev/null +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx @@ -0,0 +1,11 @@ +import Evaluation from '@/app/components/evaluation' + +const Page = async (props: { + params: Promise<{ appId: string }> +}) => { + const { appId } = await props.params + + return +} + +export default Page diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout-main.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout-main.tsx index fd0bf2c8bd..0160553092 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout-main.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout-main.tsx @@ -7,6 +7,8 @@ import { RiDashboard2Line, RiFileList3Fill, RiFileList3Line, + RiFlaskFill, + RiFlaskLine, RiTerminalBoxFill, RiTerminalBoxLine, RiTerminalWindowFill, @@ -67,40 +69,47 @@ const AppDetailLayout: FC = (props) => { }>>([]) const getNavigationConfig = useCallback((appId: string, isCurrentWorkspaceEditor: boolean, mode: AppModeEnum) => { - const navConfig = [ - ...(isCurrentWorkspaceEditor - ? [{ - name: t('appMenus.promptEng', { ns: 'common' }), - href: `/app/${appId}/${(mode === AppModeEnum.WORKFLOW || mode === AppModeEnum.ADVANCED_CHAT) ? 'workflow' : 'configuration'}`, - icon: RiTerminalWindowLine, - selectedIcon: RiTerminalWindowFill, - }] - : [] - ), - { - name: t('appMenus.apiAccess', { ns: 'common' }), - href: `/app/${appId}/develop`, - icon: RiTerminalBoxLine, - selectedIcon: RiTerminalBoxFill, - }, - ...(isCurrentWorkspaceEditor - ? [{ - name: mode !== AppModeEnum.WORKFLOW - ? t('appMenus.logAndAnn', { ns: 'common' }) - : t('appMenus.logs', { ns: 'common' }), - href: `/app/${appId}/logs`, - icon: RiFileList3Line, - selectedIcon: RiFileList3Fill, - }] - : [] - ), - { - name: t('appMenus.overview', { ns: 'common' }), - href: `/app/${appId}/overview`, - icon: RiDashboard2Line, - selectedIcon: RiDashboard2Fill, - }, - ] + const navConfig = [] + + if (isCurrentWorkspaceEditor) { + navConfig.push({ + name: t('appMenus.promptEng', { ns: 'common' }), + href: `/app/${appId}/${(mode === AppModeEnum.WORKFLOW || mode === AppModeEnum.ADVANCED_CHAT) ? 'workflow' : 'configuration'}`, + icon: RiTerminalWindowLine, + selectedIcon: RiTerminalWindowFill, + }) + navConfig.push({ + name: t('appMenus.evaluation', { ns: 'common' }), + href: `/app/${appId}/evaluation`, + icon: RiFlaskLine, + selectedIcon: RiFlaskFill, + }) + } + + navConfig.push({ + name: t('appMenus.apiAccess', { ns: 'common' }), + href: `/app/${appId}/develop`, + icon: RiTerminalBoxLine, + selectedIcon: RiTerminalBoxFill, + }) + + if (isCurrentWorkspaceEditor) { + navConfig.push({ + name: mode !== AppModeEnum.WORKFLOW + ? t('appMenus.logAndAnn', { ns: 'common' }) + : t('appMenus.logs', { ns: 'common' }), + href: `/app/${appId}/logs`, + icon: RiFileList3Line, + selectedIcon: RiFileList3Fill, + }) + } + + navConfig.push({ + name: t('appMenus.overview', { ns: 'common' }), + href: `/app/${appId}/overview`, + icon: RiDashboard2Line, + selectedIcon: RiDashboard2Fill, + }) return navConfig }, [t]) diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx new file mode 100644 index 0000000000..d502266d16 --- /dev/null +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx @@ -0,0 +1,11 @@ +import Evaluation from '@/app/components/evaluation' + +const Page = async (props: { + params: Promise<{ datasetId: string }> +}) => { + const { datasetId } = await props.params + + return +} + +export default Page diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout-main.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout-main.tsx index 4f3f724e62..495c57a4ce 100644 --- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout-main.tsx +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/layout-main.tsx @@ -6,6 +6,8 @@ import { RiEqualizer2Line, RiFileTextFill, RiFileTextLine, + RiFlaskFill, + RiFlaskLine, RiFocus2Fill, RiFocus2Line, } from '@remixicon/react' @@ -86,20 +88,30 @@ const DatasetDetailLayout: FC = (props) => { ] if (datasetRes?.provider !== 'external') { - baseNavigation.unshift({ - name: t('datasetMenus.pipeline', { ns: 'common' }), - href: `/datasets/${datasetId}/pipeline`, - icon: PipelineLine as RemixiconComponentType, - selectedIcon: PipelineFill as RemixiconComponentType, - disabled: false, - }) - baseNavigation.unshift({ - name: t('datasetMenus.documents', { ns: 'common' }), - href: `/datasets/${datasetId}/documents`, - icon: RiFileTextLine, - selectedIcon: RiFileTextFill, - disabled: isButtonDisabledWithPipeline, - }) + return [ + { + name: t('datasetMenus.documents', { ns: 'common' }), + href: `/datasets/${datasetId}/documents`, + icon: RiFileTextLine, + selectedIcon: RiFileTextFill, + disabled: isButtonDisabledWithPipeline, + }, + { + name: t('datasetMenus.pipeline', { ns: 'common' }), + href: `/datasets/${datasetId}/pipeline`, + icon: PipelineLine as RemixiconComponentType, + selectedIcon: PipelineFill as RemixiconComponentType, + disabled: false, + }, + { + name: t('datasetMenus.evaluation', { ns: 'common' }), + href: `/datasets/${datasetId}/evaluation`, + icon: RiFlaskLine, + selectedIcon: RiFlaskFill, + disabled: false, + }, + ...baseNavigation, + ] } return baseNavigation diff --git a/web/app/(commonLayout)/snippets/[snippetId]/evaluation/page.tsx b/web/app/(commonLayout)/snippets/[snippetId]/evaluation/page.tsx new file mode 100644 index 0000000000..293945ad20 --- /dev/null +++ b/web/app/(commonLayout)/snippets/[snippetId]/evaluation/page.tsx @@ -0,0 +1,11 @@ +import SnippetPage from '@/app/components/snippets' + +const Page = async (props: { + params: Promise<{ snippetId: string }> +}) => { + const { snippetId } = await props.params + + return +} + +export default Page diff --git a/web/app/(commonLayout)/snippets/[snippetId]/orchestrate/page.tsx b/web/app/(commonLayout)/snippets/[snippetId]/orchestrate/page.tsx new file mode 100644 index 0000000000..702dfcab54 --- /dev/null +++ b/web/app/(commonLayout)/snippets/[snippetId]/orchestrate/page.tsx @@ -0,0 +1,11 @@ +import SnippetPage from '@/app/components/snippets' + +const Page = async (props: { + params: Promise<{ snippetId: string }> +}) => { + const { snippetId } = await props.params + + return +} + +export default Page diff --git a/web/app/(commonLayout)/snippets/[snippetId]/page.spec.ts b/web/app/(commonLayout)/snippets/[snippetId]/page.spec.ts new file mode 100644 index 0000000000..578c562848 --- /dev/null +++ b/web/app/(commonLayout)/snippets/[snippetId]/page.spec.ts @@ -0,0 +1,21 @@ +import Page from './page' + +const mockRedirect = vi.fn() + +vi.mock('next/navigation', () => ({ + redirect: (path: string) => mockRedirect(path), +})) + +describe('snippet detail redirect page', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should redirect legacy snippet detail routes to orchestrate', async () => { + await Page({ + params: Promise.resolve({ snippetId: 'snippet-1' }), + }) + + expect(mockRedirect).toHaveBeenCalledWith('/snippets/snippet-1/orchestrate') + }) +}) diff --git a/web/app/(commonLayout)/snippets/[snippetId]/page.tsx b/web/app/(commonLayout)/snippets/[snippetId]/page.tsx index 0d37e83e2b..3b35e29360 100644 --- a/web/app/(commonLayout)/snippets/[snippetId]/page.tsx +++ b/web/app/(commonLayout)/snippets/[snippetId]/page.tsx @@ -1,11 +1,11 @@ -import SnippetPage from '@/app/components/snippets' +import { redirect } from 'next/navigation' const Page = async (props: { params: Promise<{ snippetId: string }> }) => { - const { params } = props + const { snippetId } = await props.params - return + redirect(`/snippets/${snippetId}/orchestrate`) } export default Page diff --git a/web/app/components/apps/list.tsx b/web/app/components/apps/list.tsx index 90dd84cb9e..465ad230ff 100644 --- a/web/app/components/apps/list.tsx +++ b/web/app/components/apps/list.tsx @@ -91,7 +91,7 @@ const SnippetCard = ({ snippet: SnippetListItem }) => { return ( - +
{snippet.status && (
diff --git a/web/app/components/evaluation/__tests__/index.spec.tsx b/web/app/components/evaluation/__tests__/index.spec.tsx new file mode 100644 index 0000000000..55edd6ceb2 --- /dev/null +++ b/web/app/components/evaluation/__tests__/index.spec.tsx @@ -0,0 +1,112 @@ +import { act, fireEvent, render, screen } from '@testing-library/react' +import Evaluation from '..' +import { getEvaluationMockConfig } from '../mock' +import { useEvaluationStore } from '../store' + +vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({ + useModelList: () => ({ + data: [{ + provider: 'openai', + models: [{ model: 'gpt-4o-mini' }], + }], + }), +})) + +vi.mock('@/app/components/header/account-setting/model-provider-page/model-selector', () => ({ + default: ({ defaultModel }: { defaultModel?: { provider: string, model: string } }) => ( +
+ {defaultModel ? `${defaultModel.provider}:${defaultModel.model}` : 'empty'} +
+ ), +})) + +describe('Evaluation', () => { + beforeEach(() => { + useEvaluationStore.setState({ resources: {} }) + }) + + it('should search, add metrics, and create a batch history record', async () => { + vi.useFakeTimers() + + render() + + expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini') + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' })) + expect(screen.getByTestId('evaluation-metric-loading')).toBeInTheDocument() + + await act(async () => { + vi.advanceTimersByTime(200) + }) + + fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), { + target: { value: 'does-not-exist' }, + }) + + await act(async () => { + vi.advanceTimersByTime(200) + }) + + expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument() + + fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), { + target: { value: 'faith' }, + }) + + await act(async () => { + vi.advanceTimersByTime(200) + }) + + fireEvent.click(screen.getByRole('button', { name: /Faithfulness/i })) + expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0) + + fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' })) + expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument() + + await act(async () => { + vi.advanceTimersByTime(1300) + }) + + expect(screen.getByText('evaluation.batch.status.success')).toBeInTheDocument() + expect(screen.getByText('Workflow evaluation batch')).toBeInTheDocument() + + vi.useRealTimers() + }) + + it('should render time placeholders and hide the value row for empty operators', () => { + const resourceType = 'workflow' + const resourceId = 'app-2' + const store = useEvaluationStore.getState() + const config = getEvaluationMockConfig(resourceType) + + const timeField = config.fieldOptions.find(field => field.type === 'time')! + let groupId = '' + let itemId = '' + + act(() => { + store.ensureResource(resourceType, resourceId) + store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini') + + const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0] + groupId = group.id + itemId = group.items[0].id + + store.updateConditionField(resourceType, resourceId, groupId, itemId, timeField.id) + store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'before') + }) + + let rerender: ReturnType['rerender'] + act(() => { + ({ rerender } = render()) + }) + + expect(screen.getByText('evaluation.conditions.selectTime')).toBeInTheDocument() + + act(() => { + store.updateConditionOperator(resourceType, resourceId, groupId, itemId, 'is_empty') + rerender() + }) + + expect(screen.queryByText('evaluation.conditions.selectTime')).not.toBeInTheDocument() + }) +}) diff --git a/web/app/components/evaluation/__tests__/store.spec.ts b/web/app/components/evaluation/__tests__/store.spec.ts new file mode 100644 index 0000000000..d37952be61 --- /dev/null +++ b/web/app/components/evaluation/__tests__/store.spec.ts @@ -0,0 +1,96 @@ +import { getEvaluationMockConfig } from '../mock' +import { + getAllowedOperators, + isCustomMetricConfigured, + requiresConditionValue, + useEvaluationStore, +} from '../store' + +describe('evaluation store', () => { + beforeEach(() => { + useEvaluationStore.setState({ resources: {} }) + }) + + it('should configure a custom metric mapping to a valid state', () => { + const resourceType = 'workflow' + const resourceId = 'app-1' + const store = useEvaluationStore.getState() + const config = getEvaluationMockConfig(resourceType) + + store.ensureResource(resourceType, resourceId) + store.addCustomMetric(resourceType, resourceId) + + const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow') + expect(initialMetric).toBeDefined() + expect(isCustomMetricConfigured(initialMetric!)).toBe(false) + + store.setCustomMetricWorkflow(resourceType, resourceId, initialMetric!.id, config.workflowOptions[0].id) + store.updateCustomMetricMapping(resourceType, resourceId, initialMetric!.id, initialMetric!.customConfig!.mappings[0].id, { + sourceFieldId: config.fieldOptions[0].id, + targetVariableId: config.workflowOptions[0].targetVariables[0].id, + }) + + const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id) + expect(isCustomMetricConfigured(configuredMetric!)).toBe(true) + }) + + it('should add and remove builtin metrics', () => { + const resourceType = 'workflow' + const resourceId = 'app-2' + const store = useEvaluationStore.getState() + const config = getEvaluationMockConfig(resourceType) + + store.ensureResource(resourceType, resourceId) + store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id) + + const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id) + expect(addedMetric).toBeDefined() + + store.removeMetric(resourceType, resourceId, addedMetric!.id) + + expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false) + }) + + it('should update condition groups and adapt operators to field types', () => { + const resourceType = 'pipeline' + const resourceId = 'dataset-1' + const store = useEvaluationStore.getState() + const config = getEvaluationMockConfig(resourceType) + + store.ensureResource(resourceType, resourceId) + + const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0] + store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or') + store.addConditionGroup(resourceType, resourceId) + + const booleanField = config.fieldOptions.find(field => field.type === 'boolean')! + const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0] + store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id) + + const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0] + expect(updatedGroup.logicalOperator).toBe('or') + expect(updatedGroup.items[0].operator).toBe('is') + expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not']) + }) + + it('should support time fields and clear values for empty operators', () => { + const resourceType = 'workflow' + const resourceId = 'app-3' + const store = useEvaluationStore.getState() + const config = getEvaluationMockConfig(resourceType) + + store.ensureResource(resourceType, resourceId) + + const timeField = config.fieldOptions.find(field => field.type === 'time')! + const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0] + + store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id) + store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty') + + const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0] + + expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty']) + expect(requiresConditionValue('is_empty')).toBe(false) + expect(updatedItem.value).toBeNull() + }) +}) diff --git a/web/app/components/evaluation/index.tsx b/web/app/components/evaluation/index.tsx new file mode 100644 index 0000000000..798e092eed --- /dev/null +++ b/web/app/components/evaluation/index.tsx @@ -0,0 +1,1017 @@ +'use client' + +import type { TFunction } from 'i18next' +import type { ChangeEvent, ReactNode } from 'react' +import type { + ComparisonOperator, + CustomMetricMapping, + EvaluationFieldOption, + EvaluationMetric, + EvaluationResourceType, + JudgmentConditionGroup, +} from './types' +import { + RiAddLine, + RiArrowDownSLine, + RiCloseLine, + RiDeleteBinLine, + RiDownloadLine, + RiFileUploadLine, + RiFlaskLine, + RiLoader4Line, +} from '@remixicon/react' +import { useEffect, useMemo, useRef, useState } from 'react' +import { useTranslation } from 'react-i18next' +import Badge from '@/app/components/base/badge' +import Button from '@/app/components/base/button' +import DatePicker from '@/app/components/base/date-and-time-picker/date-picker' +import dayjs from '@/app/components/base/date-and-time-picker/utils/dayjs' +import Input from '@/app/components/base/input' +import Toast from '@/app/components/base/toast' +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@/app/components/base/ui/popover' +import { + Select, + SelectContent, + SelectGroup, + SelectGroupLabel, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/app/components/base/ui/select' +import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' +import { useModelList } from '@/app/components/header/account-setting/model-provider-page/hooks' +import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' +import { cn } from '@/utils/classnames' +import { getEvaluationMockConfig } from './mock' +import { + getAllowedOperators, + isCustomMetricConfigured, + isEvaluationRunnable, + requiresConditionValue, + useEvaluationResource, + useEvaluationStore, +} from './store' + +type EvaluationProps = { + resourceType: EvaluationResourceType + resourceId: string +} + +const TAB_CLASS_NAME = 'flex-1 rounded-lg px-3 py-2 text-left system-sm-medium' + +const encodeModelSelection = (provider: string, model: string) => `${provider}::${model}` + +const decodeModelSelection = (judgeModelId: string | null) => { + if (!judgeModelId) + return undefined + + const [provider, model] = judgeModelId.split('::') + if (!provider || !model) + return undefined + + return { provider, model } +} + +const compactOperatorLabels: Partial> = { + is: '=', + is_not: '!=', + greater_than: '>', + less_than: '<', + greater_or_equal: '>=', + less_or_equal: '<=', +} + +const groupFieldOptions = (fieldOptions: EvaluationFieldOption[]) => { + return Object.entries(fieldOptions.reduce>((acc, field) => { + acc[field.group] = [...(acc[field.group] ?? []), field] + return acc + }, {})) +} + +const getOperatorLabel = ( + operator: ComparisonOperator, + fieldType: EvaluationFieldOption['type'] | undefined, + t: TFunction<'evaluation'>, +) => { + if (fieldType === 'number' && compactOperatorLabels[operator]) + return compactOperatorLabels[operator] as string + + return t(`conditions.operators.${operator}` as const) +} + +const getFieldTypeIconClassName = (fieldType: EvaluationFieldOption['type']) => { + if (fieldType === 'number') + return 'i-ri-hashtag' + + if (fieldType === 'boolean') + return 'i-ri-checkbox-circle-line' + + if (fieldType === 'enum') + return 'i-ri-list-check-2' + + if (fieldType === 'time') + return 'i-ri-time-line' + + return 'i-ri-text' +} + +const ConditionFieldLabel = ({ + field, + placeholder, +}: { + field?: EvaluationFieldOption + placeholder: string +}) => { + if (!field) + return {placeholder} + + return ( +
+
+ + {field.label} +
+ {field.type} +
+ ) +} + +const SectionHeader = ({ + title, + description, + action, +}: { + title: string + description: string + action?: ReactNode +}) => { + return ( +
+
+
{title}
+
{description}
+
+ {action} +
+ ) +} + +const FieldValueInput = ({ + field, + operator, + value, + onChange, +}: { + field?: EvaluationFieldOption + operator: ComparisonOperator + value: string | number | boolean | null + onChange: (value: string | number | boolean | null) => void +}) => { + const { t } = useTranslation('evaluation') + + if (!field || !requiresConditionValue(operator)) + return null + + if (field.type === 'time') { + const selectedTime = typeof value === 'string' && value ? dayjs(value) : undefined + + return ( +
+ onChange(date ? date.toISOString() : null)} + onClear={() => onChange(null)} + placeholder={t('conditions.selectTime')} + triggerWrapClassName="w-full" + popupZIndexClassname="z-[1002]" + renderTrigger={({ handleClickTrigger }) => ( + + )} + /> +
+ ) + } + + if (field.type === 'boolean') { + return ( +
+ +
+ ) + } + + if (field.type === 'enum') { + return ( +
+ +
+ ) + } + + return ( +
+ { + if (field.type === 'number') { + const nextValue = e.target.value + onChange(nextValue === '' ? null : Number(nextValue)) + return + } + + onChange(e.target.value) + }} + /> +
+ ) +} + +const ConditionFieldSelect = ({ + field, + fieldOptions, + placeholder, + onChange, +}: { + field?: EvaluationFieldOption + fieldOptions: EvaluationFieldOption[] + placeholder: string + onChange: (fieldId: string) => void +}) => { + return ( + + ) +} + +const ConditionOperatorSelect = ({ + field, + operator, + operators, + onChange, +}: { + field?: EvaluationFieldOption + operator: ComparisonOperator + operators: ComparisonOperator[] + onChange: (operator: ComparisonOperator) => void +}) => { + const { t } = useTranslation('evaluation') + + return ( + + ) +} + +const JudgeModelSelector = ({ + resourceId, + resourceType, +}: EvaluationProps) => { + const { data: modelList } = useModelList(ModelTypeEnum.textGeneration) + const resource = useEvaluationResource(resourceType, resourceId) + const setJudgeModel = useEvaluationStore(state => state.setJudgeModel) + const selectedModel = decodeModelSelection(resource.judgeModelId) + + useEffect(() => { + if (resource.judgeModelId || !modelList.length) + return + + const firstProvider = modelList[0] + const firstModel = firstProvider.models[0] + if (!firstProvider || !firstModel) + return + + setJudgeModel(resourceType, resourceId, encodeModelSelection(firstProvider.provider, firstModel.model)) + }, [modelList, resource.judgeModelId, resourceId, resourceType, setJudgeModel]) + + return ( + setJudgeModel(resourceType, resourceId, encodeModelSelection(model.provider, model.model))} + showDeprecatedWarnIcon + triggerClassName="h-11" + /> + ) +} + +const MetricSelector = ({ + resourceType, + resourceId, +}: EvaluationProps) => { + const { t } = useTranslation('evaluation') + const config = getEvaluationMockConfig(resourceType) + const metricGroupLabels = { + quality: t('metrics.groups.quality'), + operations: t('metrics.groups.operations'), + } + const metrics = useEvaluationResource(resourceType, resourceId).metrics + const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric) + const addCustomMetric = useEvaluationStore(state => state.addCustomMetric) + const [open, setOpen] = useState(false) + const [query, setQuery] = useState('') + const [showAll, setShowAll] = useState(false) + const [isLoading, setIsLoading] = useState(false) + const loadingTimerRef = useRef(null) + + const triggerLoading = () => { + if (loadingTimerRef.current) + window.clearTimeout(loadingTimerRef.current) + + setIsLoading(true) + loadingTimerRef.current = window.setTimeout(() => { + setIsLoading(false) + }, 180) + } + + const handleOpenChange = (nextOpen: boolean) => { + setOpen(nextOpen) + + if (nextOpen) { + triggerLoading() + return + } + + if (loadingTimerRef.current) + window.clearTimeout(loadingTimerRef.current) + setIsLoading(false) + } + + const handleQueryChange = (event: ChangeEvent) => { + setQuery(event.target.value) + if (open) + triggerLoading() + } + + useEffect(() => { + return () => { + if (loadingTimerRef.current) + window.clearTimeout(loadingTimerRef.current) + } + }, []) + + const filteredGroups = useMemo(() => { + const filteredMetrics = config.builtinMetrics.filter((metric) => { + const keyword = query.trim().toLowerCase() + if (!keyword) + return true + + return metric.label.toLowerCase().includes(keyword) || metric.description.toLowerCase().includes(keyword) + }) + + const grouped = filteredMetrics.reduce>((acc, metric) => { + acc[metric.group] = [...(acc[metric.group] ?? []), metric] + return acc + }, {}) + + return Object.entries(grouped) + }, [config.builtinMetrics, query]) + + return ( + + + + {t('metrics.add')} + + +
+ +
+ {isLoading && ( +
+ {['metric-skeleton-1', 'metric-skeleton-2', 'metric-skeleton-3'].map(key => ( +
+ ))} +
+ )} + {!isLoading && filteredGroups.length === 0 && ( +
+ {t('metrics.noResults')} +
+ )} + {!isLoading && filteredGroups.map(([groupName, options]) => { + const shownOptions = showAll ? options : options.slice(0, 2) + return ( +
+
{metricGroupLabels[groupName as keyof typeof metricGroupLabels] ?? groupName}
+
+ {shownOptions.map(option => ( + + ))} +
+
+ ) + })} +
+ {filteredGroups.some(([, options]) => options.length > 2) && ( + + )} +
+ +
+
+ + + ) +} + +const CustomMetricEditor = ({ + resourceType, + resourceId, + metric, +}: EvaluationProps & { metric: EvaluationMetric }) => { + const { t } = useTranslation('evaluation') + const config = getEvaluationMockConfig(resourceType) + const setCustomMetricWorkflow = useEvaluationStore(state => state.setCustomMetricWorkflow) + const addCustomMetricMapping = useEvaluationStore(state => state.addCustomMetricMapping) + const updateCustomMetricMapping = useEvaluationStore(state => state.updateCustomMetricMapping) + const removeCustomMetricMapping = useEvaluationStore(state => state.removeCustomMetricMapping) + const selectedWorkflow = config.workflowOptions.find(option => option.id === metric.customConfig?.workflowId) + const isConfigured = isCustomMetricConfigured(metric) + + if (!metric.customConfig) + return null + + return ( +
+
+
+
{t('metrics.custom.title')}
+
{t('metrics.custom.description')}
+
+ {!isConfigured && {t('metrics.custom.warningBadge')}} +
+
+
+
{t('metrics.custom.workflowLabel')}
+ + {selectedWorkflow &&
{selectedWorkflow.description}
} +
+
+
+
{t('metrics.custom.mappingTitle')}
+ +
+
+ {metric.customConfig.mappings.map(mapping => ( + updateCustomMetricMapping(resourceType, resourceId, metric.id, mapping.id, patch)} + onRemove={() => removeCustomMetricMapping(resourceType, resourceId, metric.id, mapping.id)} + /> + ))} +
+ {!isConfigured && ( +
+ {t('metrics.custom.mappingWarning')} +
+ )} +
+
+
+ ) +} + +function MappingRow({ + resourceType, + mapping, + targetOptions, + onUpdate, + onRemove, +}: { + resourceType: EvaluationResourceType + mapping: CustomMetricMapping + targetOptions: Array<{ id: string, label: string }> + onUpdate: (patch: { sourceFieldId?: string | null, targetVariableId?: string | null }) => void + onRemove: () => void +}) { + const { t } = useTranslation('evaluation') + const config = getEvaluationMockConfig(resourceType) + + return ( +
+ +
+ +
+ + +
+ ) +} + +const ConditionGroup = ({ + resourceType, + resourceId, + group, + index, +}: EvaluationProps & { group: JudgmentConditionGroup, index: number }) => { + const { t } = useTranslation('evaluation') + const config = getEvaluationMockConfig(resourceType) + const logicalLabels = { + and: t('conditions.logical.and'), + or: t('conditions.logical.or'), + } + const removeConditionGroup = useEvaluationStore(state => state.removeConditionGroup) + const setConditionGroupOperator = useEvaluationStore(state => state.setConditionGroupOperator) + const addConditionItem = useEvaluationStore(state => state.addConditionItem) + const removeConditionItem = useEvaluationStore(state => state.removeConditionItem) + const updateConditionField = useEvaluationStore(state => state.updateConditionField) + const updateConditionOperator = useEvaluationStore(state => state.updateConditionOperator) + const updateConditionValue = useEvaluationStore(state => state.updateConditionValue) + + return ( +
+
+
+ {t('conditions.groupLabel', { index: index + 1 })} +
+ {(['and', 'or'] as const).map(operator => ( + + ))} +
+
+
+ + +
+
+
+ {group.items.map((item) => { + const field = config.fieldOptions.find(option => option.id === item.fieldId) + const allowedOperators = getAllowedOperators(resourceType, item.fieldId) + const showValue = !!field && requiresConditionValue(item.operator) + + return ( +
+
+
+
+ updateConditionField(resourceType, resourceId, group.id, item.id, value)} + /> +
+
+ updateConditionOperator(resourceType, resourceId, group.id, item.id, value)} + /> +
+ {showValue && ( +
+ updateConditionValue(resourceType, resourceId, group.id, item.id, value)} + /> +
+ )} +
+
+ +
+
+ ) + })} +
+
+ ) +} + +const BatchTestPanel = ({ + resourceType, + resourceId, +}: EvaluationProps) => { + const { t } = useTranslation('evaluation') + const config = getEvaluationMockConfig(resourceType) + const tabLabels = { + 'input-fields': t('batch.tabs.input-fields'), + 'history': t('batch.tabs.history'), + } + const statusLabels = { + running: t('batch.status.running'), + success: t('batch.status.success'), + failed: t('batch.status.failed'), + } + const resource = useEvaluationResource(resourceType, resourceId) + const setBatchTab = useEvaluationStore(state => state.setBatchTab) + const setUploadedFileName = useEvaluationStore(state => state.setUploadedFileName) + const runBatchTest = useEvaluationStore(state => state.runBatchTest) + const fileInputRef = useRef(null) + const isRunnable = isEvaluationRunnable(resource) + + const handleDownloadTemplate = () => { + const content = ['case_id,input,expected', '1,Example input,Example output'].join('\n') + const link = document.createElement('a') + link.href = `data:text/csv;charset=utf-8,${encodeURIComponent(content)}` + link.download = config.templateFileName + link.click() + } + + const handleRun = () => { + if (!isRunnable) { + Toast.notify({ + type: 'warning', + message: t('batch.validation'), + }) + return + } + + runBatchTest(resourceType, resourceId) + } + + return ( +
+
+
+ + {t('batch.title')} +
+
+
{t('batch.noticeTitle')}
+
{t('batch.noticeDescription')}
+
+
+ {(['input-fields', 'history'] as const).map(tab => ( + + ))} +
+
+
+ {resource.activeBatchTab === 'input-fields' && ( +
+
+
{t('batch.requirementsTitle')}
+
+ {config.batchRequirements.map(requirement => ( +
+ + {requirement} +
+ ))} +
+
+
+ + { + const file = event.target.files?.[0] + setUploadedFileName(resourceType, resourceId, file?.name ?? null) + }} + /> + +
+ {!isRunnable && ( +
+ {t('batch.validation')} +
+ )} + +
+ )} + {resource.activeBatchTab === 'history' && ( +
+ {resource.batchRecords.length === 0 && ( +
+ {t('batch.emptyHistory')} +
+ )} + {resource.batchRecords.map(record => ( +
+
+
+
{record.summary}
+
{record.fileName}
+
+ + {record.status === 'running' + ? ( + + + {statusLabels.running} + + ) + : statusLabels[record.status]} + +
+
{record.startedAt}
+
+ ))} +
+ )} +
+
+ ) +} + +const Evaluation = ({ + resourceType, + resourceId, +}: EvaluationProps) => { + const { t } = useTranslation('evaluation') + const resource = useEvaluationResource(resourceType, resourceId) + const ensureResource = useEvaluationStore(state => state.ensureResource) + const removeMetric = useEvaluationStore(state => state.removeMetric) + const addConditionGroup = useEvaluationStore(state => state.addConditionGroup) + + useEffect(() => { + ensureResource(resourceType, resourceId) + }, [ensureResource, resourceId, resourceType]) + + return ( +
+
+
+ +
+ +
+ +
+
+ +
+ } + /> +
+ {resource.metrics.map(metric => ( +
+
+
+
{metric.label}
+
{metric.description}
+
+ {metric.badges.map(badge => ( + {badge} + ))} +
+
+ +
+ {metric.kind === 'custom-workflow' && ( + + )} +
+ ))} +
+
+ +
+ addConditionGroup(resourceType, resourceId)}> + + {t('conditions.addGroup')} + + )} + /> +
+ {resource.conditions.length === 0 && ( +
+
{t('conditions.emptyTitle')}
+
{t('conditions.emptyDescription')}
+
+ )} + {resource.conditions.map((group, index) => ( + + ))} +
+
+
+
+ +
+ +
+
+ ) +} + +export default Evaluation diff --git a/web/app/components/evaluation/mock.ts b/web/app/components/evaluation/mock.ts new file mode 100644 index 0000000000..598e5ee675 --- /dev/null +++ b/web/app/components/evaluation/mock.ts @@ -0,0 +1,184 @@ +import type { + ComparisonOperator, + EvaluationFieldOption, + EvaluationMockConfig, + EvaluationResourceType, + MetricOption, +} from './types' + +const judgeModels = [ + { + id: 'gpt-4.1-mini', + label: 'GPT-4.1 mini', + provider: 'OpenAI', + }, + { + id: 'claude-3-7-sonnet', + label: 'Claude 3.7 Sonnet', + provider: 'Anthropic', + }, + { + id: 'gemini-2.0-flash', + label: 'Gemini 2.0 Flash', + provider: 'Google', + }, +] + +const builtinMetrics: MetricOption[] = [ + { + id: 'answer-correctness', + label: 'Answer Correctness', + description: 'Compares the response with the expected answer and scores factual alignment.', + group: 'quality', + badges: ['LLM', 'Built-in'], + }, + { + id: 'faithfulness', + label: 'Faithfulness', + description: 'Checks whether the answer stays grounded in the retrieved evidence.', + group: 'quality', + badges: ['LLM', 'Retrieval'], + }, + { + id: 'relevance', + label: 'Relevance', + description: 'Evaluates how directly the answer addresses the original request.', + group: 'quality', + badges: ['LLM'], + }, + { + id: 'latency', + label: 'Latency', + description: 'Captures runtime responsiveness for the full execution path.', + group: 'operations', + badges: ['System'], + }, + { + id: 'token-usage', + label: 'Token Usage', + description: 'Tracks prompt and completion token consumption for the run.', + group: 'operations', + badges: ['System'], + }, + { + id: 'tool-success-rate', + label: 'Tool Success Rate', + description: 'Measures whether each required tool invocation finishes without failure.', + group: 'operations', + badges: ['Workflow'], + }, +] + +const workflowOptions = [ + { + id: 'workflow-precision-review', + label: 'Precision Review Workflow', + description: 'Custom evaluator for nuanced quality review.', + targetVariables: [ + { id: 'query', label: 'query' }, + { id: 'answer', label: 'answer' }, + { id: 'reference', label: 'reference' }, + ], + }, + { + id: 'workflow-risk-review', + label: 'Risk Review Workflow', + description: 'Custom evaluator for policy and escalation checks.', + targetVariables: [ + { id: 'input', label: 'input' }, + { id: 'output', label: 'output' }, + ], + }, +] + +const workflowFields: EvaluationFieldOption[] = [ + { id: 'app.input.query', label: 'Query', group: 'App Input', type: 'string' }, + { id: 'app.input.locale', label: 'Locale', group: 'App Input', type: 'enum', options: [{ value: 'en-US', label: 'en-US' }, { value: 'zh-Hans', label: 'zh-Hans' }] }, + { id: 'app.output.answer', label: 'Answer', group: 'App Output', type: 'string' }, + { id: 'app.output.score', label: 'Score', group: 'App Output', type: 'number' }, + { id: 'app.output.published_at', label: 'Publication Date', group: 'App Output', type: 'time' }, + { id: 'system.has_context', label: 'Has Context', group: 'System', type: 'boolean' }, +] + +const pipelineFields: EvaluationFieldOption[] = [ + { id: 'dataset.input.document_id', label: 'Document ID', group: 'Dataset', type: 'string' }, + { id: 'dataset.input.chunk_count', label: 'Chunk Count', group: 'Dataset', type: 'number' }, + { id: 'dataset.input.updated_at', label: 'Updated At', group: 'Dataset', type: 'time' }, + { id: 'retrieval.output.hit_rate', label: 'Hit Rate', group: 'Retrieval', type: 'number' }, + { id: 'retrieval.output.source', label: 'Source', group: 'Retrieval', type: 'enum', options: [{ value: 'bm25', label: 'BM25' }, { value: 'hybrid', label: 'Hybrid' }] }, + { id: 'pipeline.output.published', label: 'Published', group: 'Output', type: 'boolean' }, +] + +const snippetFields: EvaluationFieldOption[] = [ + { id: 'snippet.input.blog_url', label: 'Blog URL', group: 'Snippet Input', type: 'string' }, + { id: 'snippet.input.platforms', label: 'Platforms', group: 'Snippet Input', type: 'string' }, + { id: 'snippet.output.content', label: 'Generated Content', group: 'Snippet Output', type: 'string' }, + { id: 'snippet.output.length', label: 'Output Length', group: 'Snippet Output', type: 'number' }, + { id: 'snippet.output.scheduled_at', label: 'Scheduled At', group: 'Snippet Output', type: 'time' }, + { id: 'system.requires_review', label: 'Requires Review', group: 'System', type: 'boolean' }, +] + +export const getComparisonOperators = (fieldType: EvaluationFieldOption['type']): ComparisonOperator[] => { + if (fieldType === 'number') + return ['is', 'is_not', 'greater_than', 'less_than', 'greater_or_equal', 'less_or_equal', 'is_empty', 'is_not_empty'] + + if (fieldType === 'time') + return ['is', 'before', 'after', 'is_empty', 'is_not_empty'] + + if (fieldType === 'boolean' || fieldType === 'enum') + return ['is', 'is_not'] + + return ['contains', 'not_contains', 'is', 'is_not', 'is_empty', 'is_not_empty'] +} + +export const getDefaultOperator = (fieldType: EvaluationFieldOption['type']): ComparisonOperator => { + return getComparisonOperators(fieldType)[0] +} + +export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): EvaluationMockConfig => { + if (resourceType === 'pipeline') { + return { + judgeModels, + builtinMetrics, + workflowOptions, + fieldOptions: pipelineFields, + templateFileName: 'pipeline-evaluation-template.csv', + batchRequirements: [ + 'Include one row per retrieval scenario.', + 'Provide the expected source or target chunk for each case.', + 'Keep numeric metrics in plain number format.', + ], + historySummaryLabel: 'Pipeline evaluation batch', + } + } + + if (resourceType === 'snippet') { + return { + judgeModels, + builtinMetrics, + workflowOptions, + fieldOptions: snippetFields, + templateFileName: 'snippet-evaluation-template.csv', + batchRequirements: [ + 'Include one row per snippet execution case.', + 'Provide the expected final content or acceptance rule.', + 'Keep optional fields empty when not used.', + ], + historySummaryLabel: 'Snippet evaluation batch', + } + } + + return { + judgeModels, + builtinMetrics, + workflowOptions, + fieldOptions: workflowFields, + templateFileName: 'workflow-evaluation-template.csv', + batchRequirements: [ + 'Include one row per workflow test case.', + 'Provide both user input and expected answer when available.', + 'Keep boolean columns as true or false.', + ], + historySummaryLabel: 'Workflow evaluation batch', + } +} diff --git a/web/app/components/evaluation/store.ts b/web/app/components/evaluation/store.ts new file mode 100644 index 0000000000..59b2b6d907 --- /dev/null +++ b/web/app/components/evaluation/store.ts @@ -0,0 +1,635 @@ +import type { + BatchTestRecord, + ComparisonOperator, + EvaluationFieldOption, + EvaluationMetric, + EvaluationResourceState, + EvaluationResourceType, + JudgmentConditionGroup, +} from './types' +import { create } from 'zustand' +import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock' + +type EvaluationStore = { + resources: Record + ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void + setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void + addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string) => void + addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void + removeMetric: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void + setCustomMetricWorkflow: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, workflowId: string) => void + addCustomMetricMapping: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void + updateCustomMetricMapping: ( + resourceType: EvaluationResourceType, + resourceId: string, + metricId: string, + mappingId: string, + patch: { sourceFieldId?: string | null, targetVariableId?: string | null }, + ) => void + removeCustomMetricMapping: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, mappingId: string) => void + addConditionGroup: (resourceType: EvaluationResourceType, resourceId: string) => void + removeConditionGroup: (resourceType: EvaluationResourceType, resourceId: string, groupId: string) => void + setConditionGroupOperator: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, logicalOperator: 'and' | 'or') => void + addConditionItem: (resourceType: EvaluationResourceType, resourceId: string, groupId: string) => void + removeConditionItem: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string) => void + updateConditionField: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string, fieldId: string) => void + updateConditionOperator: (resourceType: EvaluationResourceType, resourceId: string, groupId: string, itemId: string, operator: ComparisonOperator) => void + updateConditionValue: ( + resourceType: EvaluationResourceType, + resourceId: string, + groupId: string, + itemId: string, + value: string | number | boolean | null, + ) => void + setBatchTab: (resourceType: EvaluationResourceType, resourceId: string, tab: EvaluationResourceState['activeBatchTab']) => void + setUploadedFileName: (resourceType: EvaluationResourceType, resourceId: string, uploadedFileName: string | null) => void + runBatchTest: (resourceType: EvaluationResourceType, resourceId: string) => void +} + +const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}` +const initialResourceCache: Record = {} + +const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}` + +export const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty'] + +export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator) + +const getConditionValue = ( + field: EvaluationFieldOption | undefined, + operator: ComparisonOperator, + previousValue: string | number | boolean | null = null, +) => { + if (!field || !requiresConditionValue(operator)) + return null + + if (field.type === 'boolean') + return typeof previousValue === 'boolean' ? previousValue : null + + if (field.type === 'enum') + return typeof previousValue === 'string' ? previousValue : null + + if (field.type === 'number') + return typeof previousValue === 'number' ? previousValue : null + + return typeof previousValue === 'string' ? previousValue : null +} + +const buildConditionItem = (resourceType: EvaluationResourceType) => { + const field = getEvaluationMockConfig(resourceType).fieldOptions[0] + const operator = field ? getDefaultOperator(field.type) : 'contains' + + return { + id: createId('condition'), + fieldId: field?.id ?? null, + operator, + value: getConditionValue(field, operator), + } +} + +const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => { + const config = getEvaluationMockConfig(resourceType) + const defaultMetric = config.builtinMetrics[0] + + return { + judgeModelId: null, + metrics: defaultMetric + ? [{ + id: createId('metric'), + optionId: defaultMetric.id, + kind: 'builtin', + label: defaultMetric.label, + description: defaultMetric.description, + badges: defaultMetric.badges, + }] + : [], + conditions: [{ + id: createId('group'), + logicalOperator: 'and', + items: [buildConditionItem(resourceType)], + }], + activeBatchTab: 'input-fields', + uploadedFileName: null, + batchRecords: [], + } +} + +const withResourceState = ( + resources: EvaluationStore['resources'], + resourceType: EvaluationResourceType, + resourceId: string, +) => { + const resourceKey = buildResourceKey(resourceType, resourceId) + + return { + resourceKey, + resource: resources[resourceKey] ?? buildInitialState(resourceType), + } +} + +const updateMetric = ( + metrics: EvaluationMetric[], + metricId: string, + updater: (metric: EvaluationMetric) => EvaluationMetric, +) => metrics.map(metric => metric.id === metricId ? updater(metric) : metric) + +const updateConditionGroup = ( + groups: JudgmentConditionGroup[], + groupId: string, + updater: (group: JudgmentConditionGroup) => JudgmentConditionGroup, +) => groups.map(group => group.id === groupId ? updater(group) : group) + +export const isCustomMetricConfigured = (metric: EvaluationMetric) => { + if (metric.kind !== 'custom-workflow') + return true + + if (!metric.customConfig?.workflowId) + return false + + return metric.customConfig.mappings.length > 0 + && metric.customConfig.mappings.every(mapping => !!mapping.sourceFieldId && !!mapping.targetVariableId) +} + +export const isEvaluationRunnable = (state: EvaluationResourceState) => { + return !!state.judgeModelId + && state.metrics.length > 0 + && state.metrics.every(isCustomMetricConfigured) + && state.conditions.some(group => group.items.length > 0) +} + +export const useEvaluationStore = create((set, get) => ({ + resources: {}, + ensureResource: (resourceType, resourceId) => { + const resourceKey = buildResourceKey(resourceType, resourceId) + if (get().resources[resourceKey]) + return + + set(state => ({ + resources: { + ...state.resources, + [resourceKey]: buildInitialState(resourceType), + }, + })) + }, + setJudgeModel: (resourceType, resourceId, judgeModelId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + judgeModelId, + }, + }, + } + }) + }, + addBuiltinMetric: (resourceType, resourceId, optionId) => { + const option = getEvaluationMockConfig(resourceType).builtinMetrics.find(metric => metric.id === optionId) + if (!option) + return + + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + if (resource.metrics.some(metric => metric.optionId === optionId && metric.kind === 'builtin')) + return state + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: [ + ...resource.metrics, + { + id: createId('metric'), + optionId: option.id, + kind: 'builtin', + label: option.label, + description: option.description, + badges: option.badges, + }, + ], + }, + }, + } + }) + }, + addCustomMetric: (resourceType, resourceId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: [ + ...resource.metrics, + { + id: createId('metric'), + optionId: createId('custom'), + kind: 'custom-workflow', + label: 'Custom Evaluator', + description: 'Map workflow variables to your evaluation inputs.', + badges: ['Workflow'], + customConfig: { + workflowId: null, + mappings: [{ + id: createId('mapping'), + sourceFieldId: null, + targetVariableId: null, + }], + }, + }, + ], + }, + }, + } + }) + }, + removeMetric: (resourceType, resourceId, metricId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: resource.metrics.filter(metric => metric.id !== metricId), + }, + }, + } + }) + }, + setCustomMetricWorkflow: (resourceType, resourceId, metricId, workflowId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: updateMetric(resource.metrics, metricId, metric => ({ + ...metric, + customConfig: metric.customConfig + ? { + ...metric.customConfig, + workflowId, + mappings: metric.customConfig.mappings.map(mapping => ({ + ...mapping, + targetVariableId: null, + })), + } + : metric.customConfig, + })), + }, + }, + } + }) + }, + addCustomMetricMapping: (resourceType, resourceId, metricId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: updateMetric(resource.metrics, metricId, metric => ({ + ...metric, + customConfig: metric.customConfig + ? { + ...metric.customConfig, + mappings: [ + ...metric.customConfig.mappings, + { + id: createId('mapping'), + sourceFieldId: null, + targetVariableId: null, + }, + ], + } + : metric.customConfig, + })), + }, + }, + } + }) + }, + updateCustomMetricMapping: (resourceType, resourceId, metricId, mappingId, patch) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: updateMetric(resource.metrics, metricId, metric => ({ + ...metric, + customConfig: metric.customConfig + ? { + ...metric.customConfig, + mappings: metric.customConfig.mappings.map(mapping => mapping.id === mappingId ? { ...mapping, ...patch } : mapping), + } + : metric.customConfig, + })), + }, + }, + } + }) + }, + removeCustomMetricMapping: (resourceType, resourceId, metricId, mappingId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + metrics: updateMetric(resource.metrics, metricId, metric => ({ + ...metric, + customConfig: metric.customConfig + ? { + ...metric.customConfig, + mappings: metric.customConfig.mappings.filter(mapping => mapping.id !== mappingId), + } + : metric.customConfig, + })), + }, + }, + } + }) + }, + addConditionGroup: (resourceType, resourceId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: [ + ...resource.conditions, + { + id: createId('group'), + logicalOperator: 'and', + items: [buildConditionItem(resourceType)], + }, + ], + }, + }, + } + }) + }, + removeConditionGroup: (resourceType, resourceId, groupId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: resource.conditions.filter(group => group.id !== groupId), + }, + }, + } + }) + }, + setConditionGroupOperator: (resourceType, resourceId, groupId, logicalOperator) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: updateConditionGroup(resource.conditions, groupId, group => ({ + ...group, + logicalOperator, + })), + }, + }, + } + }) + }, + addConditionItem: (resourceType, resourceId, groupId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: updateConditionGroup(resource.conditions, groupId, group => ({ + ...group, + items: [ + ...group.items, + buildConditionItem(resourceType), + ], + })), + }, + }, + } + }) + }, + removeConditionItem: (resourceType, resourceId, groupId, itemId) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: updateConditionGroup(resource.conditions, groupId, group => ({ + ...group, + items: group.items.filter(item => item.id !== itemId), + })), + }, + }, + } + }) + }, + updateConditionField: (resourceType, resourceId, groupId, itemId, fieldId) => { + const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId) + + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: updateConditionGroup(resource.conditions, groupId, group => ({ + ...group, + items: group.items.map((item) => { + if (item.id !== itemId) + return item + + return { + ...item, + fieldId, + operator: field ? getDefaultOperator(field.type) : item.operator, + value: getConditionValue(field, field ? getDefaultOperator(field.type) : item.operator), + } + }), + })), + }, + }, + } + }) + }, + updateConditionOperator: (resourceType, resourceId, groupId, itemId, operator) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + const fieldOptions = getEvaluationMockConfig(resourceType).fieldOptions + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: updateConditionGroup(resource.conditions, groupId, group => ({ + ...group, + items: group.items.map((item) => { + if (item.id !== itemId) + return item + + const field = fieldOptions.find(option => option.id === item.fieldId) + + return { + ...item, + operator, + value: getConditionValue(field, operator, item.value), + } + }), + })), + }, + }, + } + }) + }, + updateConditionValue: (resourceType, resourceId, groupId, itemId, value) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + conditions: updateConditionGroup(resource.conditions, groupId, group => ({ + ...group, + items: group.items.map(item => item.id === itemId ? { ...item, value } : item), + })), + }, + }, + } + }) + }, + setBatchTab: (resourceType, resourceId, tab) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + activeBatchTab: tab, + }, + }, + } + }) + }, + setUploadedFileName: (resourceType, resourceId, uploadedFileName) => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + uploadedFileName, + }, + }, + } + }) + }, + runBatchTest: (resourceType, resourceId) => { + const config = getEvaluationMockConfig(resourceType) + const recordId = createId('batch') + const nextRecord: BatchTestRecord = { + id: recordId, + fileName: get().resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? config.templateFileName, + status: 'running', + startedAt: new Date().toLocaleTimeString(), + summary: config.historySummaryLabel, + } + + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + activeBatchTab: 'history', + batchRecords: [nextRecord, ...resource.batchRecords], + }, + }, + } + }) + + window.setTimeout(() => { + set((state) => { + const { resource, resourceKey } = withResourceState(state.resources, resourceType, resourceId) + + return { + resources: { + ...state.resources, + [resourceKey]: { + ...resource, + batchRecords: resource.batchRecords.map(record => record.id === recordId + ? { + ...record, + status: resource.metrics.length > 1 ? 'success' : 'failed', + } + : record), + }, + }, + } + }) + }, 1200) + }, +})) + +export const useEvaluationResource = (resourceType: EvaluationResourceType, resourceId: string) => { + const resourceKey = buildResourceKey(resourceType, resourceId) + return useEvaluationStore(state => state.resources[resourceKey] ?? (initialResourceCache[resourceKey] ??= buildInitialState(resourceType))) +} + +export const getAllowedOperators = (resourceType: EvaluationResourceType, fieldId: string | null) => { + const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId) + + if (!field) + return ['contains'] as ComparisonOperator[] + + return getComparisonOperators(field.type) +} diff --git a/web/app/components/evaluation/types.ts b/web/app/components/evaluation/types.ts new file mode 100644 index 0000000000..9b9cfda31c --- /dev/null +++ b/web/app/components/evaluation/types.ts @@ -0,0 +1,117 @@ +export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet' + +export type MetricKind = 'builtin' | 'custom-workflow' + +export type BatchTestTab = 'input-fields' | 'history' + +export type FieldType = 'string' | 'number' | 'boolean' | 'enum' | 'time' + +export type ComparisonOperator + = | 'contains' + | 'not_contains' + | 'is' + | 'is_not' + | 'is_empty' + | 'is_not_empty' + | 'greater_than' + | 'less_than' + | 'greater_or_equal' + | 'less_or_equal' + | 'before' + | 'after' + +export type JudgeModelOption = { + id: string + label: string + provider: string +} + +export type MetricOption = { + id: string + label: string + description: string + group: string + badges: string[] +} + +export type EvaluationWorkflowOption = { + id: string + label: string + description: string + targetVariables: Array<{ + id: string + label: string + }> +} + +export type EvaluationFieldOption = { + id: string + label: string + group: string + type: FieldType + options?: Array<{ + value: string + label: string + }> +} + +export type CustomMetricMapping = { + id: string + sourceFieldId: string | null + targetVariableId: string | null +} + +export type CustomMetricConfig = { + workflowId: string | null + mappings: CustomMetricMapping[] +} + +export type EvaluationMetric = { + id: string + optionId: string + kind: MetricKind + label: string + description: string + badges: string[] + customConfig?: CustomMetricConfig +} + +export type JudgmentConditionItem = { + id: string + fieldId: string | null + operator: ComparisonOperator + value: string | number | boolean | null +} + +export type JudgmentConditionGroup = { + id: string + logicalOperator: 'and' | 'or' + items: JudgmentConditionItem[] +} + +export type BatchTestRecord = { + id: string + fileName: string + status: 'running' | 'success' | 'failed' + startedAt: string + summary: string +} + +export type EvaluationResourceState = { + judgeModelId: string | null + metrics: EvaluationMetric[] + conditions: JudgmentConditionGroup[] + activeBatchTab: BatchTestTab + uploadedFileName: string | null + batchRecords: BatchTestRecord[] +} + +export type EvaluationMockConfig = { + judgeModels: JudgeModelOption[] + builtinMetrics: MetricOption[] + workflowOptions: EvaluationWorkflowOption[] + fieldOptions: EvaluationFieldOption[] + templateFileName: string + batchRequirements: string[] + historySummaryLabel: string +} diff --git a/web/app/components/snippets/components/snippet-main.tsx b/web/app/components/snippets/components/snippet-main.tsx index fd9bd14373..d1f936a0c5 100644 --- a/web/app/components/snippets/components/snippet-main.tsx +++ b/web/app/components/snippets/components/snippet-main.tsx @@ -2,7 +2,7 @@ import type { NavIcon } from '@/app/components/app-sidebar/nav-link' import type { WorkflowProps } from '@/app/components/workflow' -import type { SnippetDetailPayload, SnippetInputField } from '@/models/snippet' +import type { SnippetDetailPayload, SnippetInputField, SnippetSection } from '@/models/snippet' import { RiFlaskFill, RiFlaskLine, @@ -17,6 +17,7 @@ import NavLink from '@/app/components/app-sidebar/nav-link' import SnippetInfo from '@/app/components/app-sidebar/snippet-info' import { useStore as useAppStore } from '@/app/components/app/store' import Toast from '@/app/components/base/toast' +import Evaluation from '@/app/components/evaluation' import { WorkflowWithInnerContext } from '@/app/components/workflow' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import { useSnippetDetailStore } from '../store' @@ -25,6 +26,7 @@ import SnippetChildren from './snippet-children' type SnippetMainProps = { payload: SnippetDetailPayload snippetId: string + section: SnippetSection } & Pick const ORCHESTRATE_ICONS: { normal: NavIcon, selected: NavIcon } = { @@ -40,6 +42,7 @@ const EVALUATION_ICONS: { normal: NavIcon, selected: NavIcon } = { const SnippetMain = ({ payload, snippetId, + section, nodes, edges, viewport, @@ -51,7 +54,6 @@ const SnippetMain = ({ const [fields, setFields] = useState(payload.inputFields) const setAppSidebarExpand = useAppStore(state => state.setAppSidebarExpand) const { - activeSection, editingField, isEditorOpen, isInputPanelOpen, @@ -59,12 +61,10 @@ const SnippetMain = ({ closeEditor, openEditor, reset, - setActiveSection, setInputPanelOpen, toggleInputPanel, togglePublishMenu, } = useSnippetDetailStore(useShallow(state => ({ - activeSection: state.activeSection, editingField: state.editingField, isEditorOpen: state.isEditorOpen, isInputPanelOpen: state.isInputPanelOpen, @@ -72,7 +72,6 @@ const SnippetMain = ({ closeEditor: state.closeEditor, openEditor: state.openEditor, reset: state.reset, - setActiveSection: state.setActiveSection, setInputPanelOpen: state.setInputPanelOpen, toggleInputPanel: state.toggleInputPanel, togglePublishMenu: state.togglePublishMenu, @@ -145,15 +144,15 @@ const SnippetMain = ({ mode={mode} name={t('sectionOrchestrate')} iconMap={ORCHESTRATE_ICONS} - active={activeSection === 'orchestrate'} - onClick={() => setActiveSection('orchestrate')} + href={`/snippets/${snippetId}/orchestrate`} + active={section === 'orchestrate'} /> setActiveSection('evaluation')} + href={`/snippets/${snippetId}/evaluation`} + active={section === 'evaluation'} /> )} @@ -161,29 +160,35 @@ const SnippetMain = ({
- - - + {section === 'evaluation' + ? ( + + ) + : ( + + + + )}
diff --git a/web/app/components/snippets/index.tsx b/web/app/components/snippets/index.tsx index 690a8d6376..63392a24b5 100644 --- a/web/app/components/snippets/index.tsx +++ b/web/app/components/snippets/index.tsx @@ -1,5 +1,6 @@ 'use client' +import type { SnippetSection } from '@/models/snippet' import { useMemo } from 'react' import { useTranslation } from 'react-i18next' import Loading from '@/app/components/base/loading' @@ -14,10 +15,12 @@ import { useSnippetInit } from './hooks/use-snippet-init' type SnippetPageProps = { snippetId: string + section?: SnippetSection } const SnippetPage = ({ snippetId, + section = 'orchestrate', }: SnippetPageProps) => { const { t } = useTranslation('snippet') const { data, isLoading } = useSnippetInit(snippetId) @@ -62,6 +65,7 @@ const SnippetPage = ({