feat(web): dataset evaluation configure fetch

2026-05-06 18:27:19 +08:00 · 2026-04-09 14:21:01 +08:00 · 2026-04-09 14:21:01 +08:00 · cfb5b9dfea
commit cfb5b9dfea
parent 73d95245f8
18 changed files with 467 additions and 97 deletions
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/evaluation/page.tsx
@ -5,7 +5,7 @@ const Page = async (props: {
 }) => {
  const { appId } = await props.params

-  return <Evaluation resourceType="workflow" resourceId={appId} />
+  return <Evaluation resourceType="apps" resourceId={appId} />
 }

 export default Page
--- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx
+++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/evaluation/page.tsx
@ -5,7 +5,7 @@ const Page = async (props: {
 }) => {
  const { datasetId } = await props.params

-  return <Evaluation resourceType="pipeline" resourceId={datasetId} />
+  return <Evaluation resourceType="datasets" resourceId={datasetId} />
 }

 export default Page
--- a/web/app/components/evaluation/tests/index.spec.tsx
+++ b/web/app/components/evaluation/tests/index.spec.tsx
@ -4,6 +4,7 @@ import { getEvaluationMockConfig } from '../mock'
 import { useEvaluationStore } from '../store'

 const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
+const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
 const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())

 vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
@ -38,6 +39,7 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec
 }))

 vi.mock('@/service/use-evaluation', () => ({
+  useEvaluationConfig: (...args: unknown[]) => mockUseEvaluationConfig(...args),
  useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
  useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
 }))
@ -46,6 +48,9 @@ describe('Evaluation', () => {
  beforeEach(() => {
    useEvaluationStore.setState({ resources: {} })
    vi.clearAllMocks()
+    mockUseEvaluationConfig.mockReturnValue({
+      data: null,
+    })

    mockUseAvailableEvaluationMetrics.mockReturnValue({
      data: {
@ -72,7 +77,7 @@ describe('Evaluation', () => {
  it('should search, select metric nodes, and create a batch history record', async () => {
    vi.useFakeTimers()

-    render(<Evaluation resourceType="workflow" resourceId="app-1" />)
+    render(<Evaluation resourceType="apps" resourceId="app-1" />)

    expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')

@ -113,7 +118,7 @@ describe('Evaluation', () => {
  })

  it('should render time placeholders and hide the value row for empty operators', () => {
-    const resourceType = 'workflow'
+    const resourceType = 'apps'
    const resourceId = 'app-2'
    const store = useEvaluationStore.getState()
    const config = getEvaluationMockConfig(resourceType)
@ -126,7 +131,7 @@ describe('Evaluation', () => {
      store.ensureResource(resourceType, resourceId)
      store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')

-      const group = useEvaluationStore.getState().resources['workflow:app-2'].conditions[0]
+      const group = useEvaluationStore.getState().resources['apps:app-2'].conditions[0]
      groupId = group.id
      itemId = group.items[0].id

@ -166,7 +171,7 @@ describe('Evaluation', () => {
      },
    })

-    render(<Evaluation resourceType="workflow" resourceId="app-3" />)
+    render(<Evaluation resourceType="apps" resourceId="app-3" />)

    fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))

@ -181,7 +186,7 @@ describe('Evaluation', () => {
      isLoading: false,
    })

-    render(<Evaluation resourceType="workflow" resourceId="app-4" />)
+    render(<Evaluation resourceType="apps" resourceId="app-4" />)

    fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))

@ -210,7 +215,7 @@ describe('Evaluation', () => {
      },
    })

-    render(<Evaluation resourceType="workflow" resourceId="app-5" />)
+    render(<Evaluation resourceType="apps" resourceId="app-5" />)

    fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))

@ -224,7 +229,7 @@ describe('Evaluation', () => {
  })

  it('should render the pipeline-specific layout without auto-selecting a judge model', () => {
-    render(<Evaluation resourceType="pipeline" resourceId="dataset-1" />)
+    render(<Evaluation resourceType="datasets" resourceId="dataset-1" />)

    expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('empty')
    expect(screen.getByText('evaluation.history.title')).toBeInTheDocument()
@ -236,7 +241,7 @@ describe('Evaluation', () => {
  })

  it('should enable pipeline batch actions after selecting a judge model and metric', () => {
-    render(<Evaluation resourceType="pipeline" resourceId="dataset-2" />)
+    render(<Evaluation resourceType="datasets" resourceId="dataset-2" />)

    fireEvent.click(screen.getByRole('button', { name: 'select-model' }))
    fireEvent.click(screen.getByRole('button', { name: /Context Precision/i }))
--- a/web/app/components/evaluation/tests/store.spec.ts
+++ b/web/app/components/evaluation/tests/store.spec.ts
@ -1,3 +1,4 @@
+import type { EvaluationConfig } from '@/types/evaluation'
 import { getEvaluationMockConfig } from '../mock'
 import {
  getAllowedOperators,
@ -12,7 +13,7 @@ describe('evaluation store', () => {
  })

  it('should configure a custom metric mapping to a valid state', () => {
-    const resourceType = 'workflow'
+    const resourceType = 'apps'
    const resourceId = 'app-1'
    const store = useEvaluationStore.getState()
    const config = getEvaluationMockConfig(resourceType)
@ -20,7 +21,7 @@ describe('evaluation store', () => {
    store.ensureResource(resourceType, resourceId)
    store.addCustomMetric(resourceType, resourceId)

-    const initialMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
+    const initialMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.kind === 'custom-workflow')
    expect(initialMetric).toBeDefined()
    expect(isCustomMetricConfigured(initialMetric!)).toBe(false)

@ -34,14 +35,14 @@ describe('evaluation store', () => {
      targetVariableId: config.workflowOptions[0].targetVariables[0].id,
    })

-    const configuredMetric = useEvaluationStore.getState().resources['workflow:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
+    const configuredMetric = useEvaluationStore.getState().resources['apps:app-1'].metrics.find(metric => metric.id === initialMetric!.id)
    expect(isCustomMetricConfigured(configuredMetric!)).toBe(true)
    expect(configuredMetric!.customConfig!.workflowAppId).toBe('custom-workflow-app-id')
    expect(configuredMetric!.customConfig!.workflowName).toBe(config.workflowOptions[0].label)
  })

  it('should add and remove builtin metrics', () => {
-    const resourceType = 'workflow'
+    const resourceType = 'apps'
    const resourceId = 'app-2'
    const store = useEvaluationStore.getState()
    const config = getEvaluationMockConfig(resourceType)
@ -49,16 +50,16 @@ describe('evaluation store', () => {
    store.ensureResource(resourceType, resourceId)
    store.addBuiltinMetric(resourceType, resourceId, config.builtinMetrics[1].id)

-    const addedMetric = useEvaluationStore.getState().resources['workflow:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
+    const addedMetric = useEvaluationStore.getState().resources['apps:app-2'].metrics.find(metric => metric.optionId === config.builtinMetrics[1].id)
    expect(addedMetric).toBeDefined()

    store.removeMetric(resourceType, resourceId, addedMetric!.id)

-    expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
+    expect(useEvaluationStore.getState().resources['apps:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
  })

  it('should upsert builtin metric node selections', () => {
-    const resourceType = 'workflow'
+    const resourceType = 'apps'
    const resourceId = 'app-4'
    const store = useEvaluationStore.getState()
    const config = getEvaluationMockConfig(resourceType)
@ -73,38 +74,38 @@ describe('evaluation store', () => {
      { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
    ])

-    const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId)
+    const metric = useEvaluationStore.getState().resources['apps:app-4'].metrics.find(item => item.optionId === metricId)

    expect(metric?.nodeInfoList).toEqual([
      { node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
    ])
-    expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
+    expect(useEvaluationStore.getState().resources['apps:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
  })

  it('should update condition groups and adapt operators to field types', () => {
-    const resourceType = 'pipeline'
+    const resourceType = 'datasets'
    const resourceId = 'dataset-1'
    const store = useEvaluationStore.getState()
    const config = getEvaluationMockConfig(resourceType)

    store.ensureResource(resourceType, resourceId)

-    const initialGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
+    const initialGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
    store.setConditionGroupOperator(resourceType, resourceId, initialGroup.id, 'or')
    store.addConditionGroup(resourceType, resourceId)

    const booleanField = config.fieldOptions.find(field => field.type === 'boolean')!
-    const currentItem = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0].items[0]
+    const currentItem = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0].items[0]
    store.updateConditionField(resourceType, resourceId, initialGroup.id, currentItem.id, booleanField.id)

-    const updatedGroup = useEvaluationStore.getState().resources['pipeline:dataset-1'].conditions[0]
+    const updatedGroup = useEvaluationStore.getState().resources['datasets:dataset-1'].conditions[0]
    expect(updatedGroup.logicalOperator).toBe('or')
    expect(updatedGroup.items[0].operator).toBe('is')
    expect(getAllowedOperators(resourceType, booleanField.id)).toEqual(['is', 'is_not'])
  })

  it('should support time fields and clear values for empty operators', () => {
-    const resourceType = 'workflow'
+    const resourceType = 'apps'
    const resourceId = 'app-3'
    const store = useEvaluationStore.getState()
    const config = getEvaluationMockConfig(resourceType)
@ -112,15 +113,89 @@ describe('evaluation store', () => {
    store.ensureResource(resourceType, resourceId)

    const timeField = config.fieldOptions.find(field => field.type === 'time')!
-    const item = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
+    const item = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]

-    store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, timeField.id)
-    store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].id, item.id, 'is_empty')
+    store.updateConditionField(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, timeField.id)
+    store.updateConditionOperator(resourceType, resourceId, useEvaluationStore.getState().resources['apps:app-3'].conditions[0].id, item.id, 'is_empty')

-    const updatedItem = useEvaluationStore.getState().resources['workflow:app-3'].conditions[0].items[0]
+    const updatedItem = useEvaluationStore.getState().resources['apps:app-3'].conditions[0].items[0]

    expect(getAllowedOperators(resourceType, timeField.id)).toEqual(['is', 'before', 'after', 'is_empty', 'is_not_empty'])
    expect(requiresConditionValue('is_empty')).toBe(false)
    expect(updatedItem.value).toBeNull()
  })
+
+  it('should hydrate resource state from evaluation config', () => {
+    const resourceType = 'apps'
+    const resourceId = 'app-5'
+    const store = useEvaluationStore.getState()
+    const config: EvaluationConfig = {
+      evaluation_model: 'gpt-4o-mini',
+      evaluation_model_provider: 'openai',
+      metrics_config: {
+        default_metrics: [{
+          metric: 'faithfulness',
+          node_info_list: [
+            { node_id: 'node-1', title: 'Retriever', type: 'retriever' },
+          ],
+        }],
+        customized_metrics: {
+          evaluation_workflow_id: 'workflow-precision-review',
+          input_fields: {
+            'app.input.query': 'query',
+          },
+        },
+      },
+      judgement_conditions: [{
+        logical_operator: 'or',
+        items: [{
+          field_id: 'system.has_context',
+          operator: 'is',
+          value: true,
+        }],
+      }],
+    }
+
+    store.ensureResource(resourceType, resourceId)
+    store.setBatchTab(resourceType, resourceId, 'history')
+    store.setUploadedFileName(resourceType, resourceId, 'batch.csv')
+    useEvaluationStore.setState(state => ({
+      resources: {
+        ...state.resources,
+        'apps:app-5': {
+          ...state.resources['apps:app-5'],
+          batchRecords: [{
+            id: 'batch-1',
+            fileName: 'batch.csv',
+            status: 'success',
+            startedAt: '10:00:00',
+            summary: 'App evaluation batch',
+          }],
+        },
+      },
+    }))
+    store.hydrateResource(resourceType, resourceId, config)
+
+    const hydratedState = useEvaluationStore.getState().resources['apps:app-5']
+
+    expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
+    expect(hydratedState.metrics).toHaveLength(2)
+    expect(hydratedState.metrics[0].optionId).toBe('faithfulness')
+    expect(hydratedState.metrics[0].nodeInfoList).toEqual([
+      { node_id: 'node-1', title: 'Retriever', type: 'retriever' },
+    ])
+    expect(hydratedState.metrics[1].kind).toBe('custom-workflow')
+    expect(hydratedState.metrics[1].customConfig?.workflowId).toBe('workflow-precision-review')
+    expect(hydratedState.metrics[1].customConfig?.mappings[0].sourceFieldId).toBe('app.input.query')
+    expect(hydratedState.metrics[1].customConfig?.mappings[0].targetVariableId).toBe('query')
+    expect(hydratedState.conditions[0].logicalOperator).toBe('or')
+    expect(hydratedState.conditions[0].items[0]).toMatchObject({
+      fieldId: 'system.has_context',
+      operator: 'is',
+      value: true,
+    })
+    expect(hydratedState.activeBatchTab).toBe('history')
+    expect(hydratedState.uploadedFileName).toBe('batch.csv')
+    expect(hydratedState.batchRecords).toHaveLength(1)
+  })
 })
--- a/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx
+++ b/web/app/components/evaluation/components/layout/non-pipeline-evaluation.tsx
@ -1,13 +1,13 @@
 'use client'

-import type { EvaluationResourceProps } from '../types'
+import type { EvaluationResourceProps } from '../../types'
 import { useTranslation } from 'react-i18next'
 import { useDocLink } from '@/context/i18n'
-import BatchTestPanel from './batch-test-panel'
-import ConditionsSection from './conditions-section'
-import JudgeModelSelector from './judge-model-selector'
-import MetricSection from './metric-section'
-import SectionHeader, { InlineSectionHeader } from './section-header'
+import BatchTestPanel from '../batch-test-panel'
+import ConditionsSection from '../conditions-section'
+import JudgeModelSelector from '../judge-model-selector'
+import MetricSection from '../metric-section'
+import SectionHeader, { InlineSectionHeader } from '../section-header'

 const NonPipelineEvaluation = ({
  resourceType,
--- a/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
+++ b/web/app/components/evaluation/components/layout/pipeline-evaluation.tsx
@ -1,6 +1,6 @@
 'use client'

-import type { EvaluationResourceProps, MetricOption } from '../types'
+import type { EvaluationResourceProps, MetricOption } from '../../types'
 import { useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import Badge from '@/app/components/base/badge'
@ -11,10 +11,10 @@ import { toast } from '@/app/components/base/ui/toast'
 import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
 import { useDocLink } from '@/context/i18n'
 import { cn } from '@/utils/classnames'
-import { getEvaluationMockConfig } from '../mock'
-import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../store'
-import JudgeModelSelector from './judge-model-selector'
-import SectionHeader, { InlineSectionHeader } from './section-header'
+import { getEvaluationMockConfig } from '../../mock'
+import { isEvaluationRunnable, useEvaluationResource, useEvaluationStore } from '../../store'
+import JudgeModelSelector from '../judge-model-selector'
+import SectionHeader, { InlineSectionHeader } from '../section-header'

 type PipelineMetricItemProps = {
  metric: MetricOption
--- a/web/app/components/evaluation/components/metric-section/tests/index.spec.tsx
+++ b/web/app/components/evaluation/components/metric-section/tests/index.spec.tsx
@ -1,5 +1,5 @@
-import { act, fireEvent, render, screen } from '@testing-library/react'
 import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
+import { act, fireEvent, render, screen } from '@testing-library/react'
 import MetricSection from '..'
 import { useEvaluationStore } from '../../../store'

@ -13,7 +13,7 @@ vi.mock('@/service/use-evaluation', () => ({
  useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
 }))

-const resourceType = 'workflow' as const
+const resourceType = 'apps' as const
 const resourceId = 'metric-section-resource'

 const renderMetricSection = () => {
--- a/web/app/components/evaluation/components/metric-section/index.tsx
+++ b/web/app/components/evaluation/components/metric-section/index.tsx
@ -21,7 +21,7 @@ const MetricSection = ({
  const [nodeInfoMap, setNodeInfoMap] = useState<Record<string, NodeInfo[]>>({})
  const hasMetrics = resource.metrics.length > 0
  const hasBuiltinMetrics = resource.metrics.some(metric => metric.kind === 'builtin')
-  const shouldLoadNodeInfo = resourceType !== 'pipeline' && !!resourceId && hasBuiltinMetrics
+  const shouldLoadNodeInfo = resourceType !== 'datasets' && !!resourceId && hasBuiltinMetrics
  const { data: availableMetricsData } = useAvailableEvaluationMetrics(shouldLoadNodeInfo)
  const { mutate: loadNodeInfo } = useEvaluationNodeInfoMutation()
  const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])
--- a/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts
+++ b/web/app/components/evaluation/components/metric-selector/use-metric-selector-data.ts
@ -13,7 +13,7 @@ import {
 type UseMetricSelectorDataOptions = {
  open: boolean
  query: string
-  resourceType: 'workflow' | 'pipeline' | 'snippet'
+  resourceType: 'apps' | 'datasets' | 'snippets'
  resourceId: string
  nodeInfoMap: Record<string, NodeInfo[]>
  setNodeInfoMap: (value: Record<string, NodeInfo[]>) => void
@ -63,7 +63,7 @@ export const useMetricSelectorData = ({
    if (!open)
      return

-    if (resourceType === 'pipeline' || !resourceId || availableMetricIds.length === 0)
+    if (resourceType === 'datasets' || !resourceId || availableMetricIds.length === 0)
      return

    let isActive = true
@ -107,7 +107,7 @@ export const useMetricSelectorData = ({
        || metric.label.toLowerCase().includes(keyword)
        || metric.description.toLowerCase().includes(keyword)
      const metricNodes = nodeInfoMap[metric.id] ?? []
-      const supportsNodeSelection = resourceType !== 'pipeline'
+      const supportsNodeSelection = resourceType !== 'datasets'
      const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0

      if (hasNoNodeInfo) {
--- a/web/app/components/evaluation/components/metric-selector/utils.ts
+++ b/web/app/components/evaluation/components/metric-selector/utils.ts
@ -2,8 +2,8 @@ import type { MetricOption } from '../../types'
 import type { MetricVisualTone } from './types'
 import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation'

-export const toEvaluationTargetType = (resourceType: 'workflow' | 'snippet'): EvaluationTargetType => {
-  return resourceType === 'snippet' ? 'snippets' : 'app'
+export const toEvaluationTargetType = (resourceType: 'apps' | 'snippets'): EvaluationTargetType => {
+  return resourceType === 'snippets' ? 'snippets' : 'app'
 }

 const humanizeMetricId = (metricId: string) => {
--- a/web/app/components/evaluation/index.tsx
+++ b/web/app/components/evaluation/index.tsx
@ -2,21 +2,31 @@

 import type { EvaluationResourceProps } from './types'
 import { useEffect } from 'react'
-import NonPipelineEvaluation from './components/non-pipeline-evaluation'
-import PipelineEvaluation from './components/pipeline-evaluation'
+import { useEvaluationConfig } from '@/service/use-evaluation'
+import NonPipelineEvaluation from './components/layout/non-pipeline-evaluation'
+import PipelineEvaluation from './components/layout/pipeline-evaluation'
 import { useEvaluationStore } from './store'

 const Evaluation = ({
  resourceType,
  resourceId,
 }: EvaluationResourceProps) => {
+  const { data: config } = useEvaluationConfig(resourceType, resourceId)
  const ensureResource = useEvaluationStore(state => state.ensureResource)
+  const hydrateResource = useEvaluationStore(state => state.hydrateResource)

  useEffect(() => {
    ensureResource(resourceType, resourceId)
  }, [ensureResource, resourceId, resourceType])

-  if (resourceType === 'pipeline') {
+  useEffect(() => {
+    if (!config)
+      return
+
+    hydrateResource(resourceType, resourceId, config)
+  }, [config, hydrateResource, resourceId, resourceType])
+
+  if (resourceType === 'datasets') {
    return (
      <PipelineEvaluation
        resourceType={resourceType}
--- a/web/app/components/evaluation/mock.ts
+++ b/web/app/components/evaluation/mock.ts
@ -160,7 +160,7 @@ export const getDefaultOperator = (fieldType: EvaluationFieldOption['type']): Co
 }

 export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): EvaluationMockConfig => {
-  if (resourceType === 'pipeline') {
+  if (resourceType === 'datasets') {
    return {
      judgeModels,
      builtinMetrics: pipelineBuiltinMetrics,
@ -176,7 +176,7 @@ export const getEvaluationMockConfig = (resourceType: EvaluationResourceType): E
    }
  }

-  if (resourceType === 'snippet') {
+  if (resourceType === 'snippets') {
    return {
      judgeModels,
      builtinMetrics,
--- a/web/app/components/evaluation/store-utils.ts
+++ b/web/app/components/evaluation/store-utils.ts
@ -9,24 +9,195 @@ import type {
  JudgmentConditionGroup,
  MetricOption,
 } from './types'
-import type { NodeInfo } from '@/types/evaluation'
+import type {
+  EvaluationConditionValue,
+  EvaluationConfig,
+  EvaluationCustomizedMetric,
+  EvaluationDefaultMetric,
+  EvaluationJudgementConditionGroup,
+  EvaluationJudgementConditionItem,
+  EvaluationMetricsConfig,
+  NodeInfo,
+} from '@/types/evaluation'
 import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
+import { encodeModelSelection } from './utils'

 type EvaluationStoreResources = Record<string, EvaluationResourceState>

 const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`

+const humanizeMetricId = (metricId: string) => {
+  return metricId
+    .split(/[-_]/g)
+    .filter(Boolean)
+    .map(part => part.charAt(0).toUpperCase() + part.slice(1))
+    .join(' ')
+}
+
+const resolveMetricOption = (resourceType: EvaluationResourceType, metricId: string): MetricOption => {
+  const config = getEvaluationMockConfig(resourceType)
+  return config.builtinMetrics.find(metric => metric.id === metricId) ?? {
+    id: metricId,
+    label: humanizeMetricId(metricId),
+    description: '',
+    group: config.builtinMetrics[0]?.group ?? 'other',
+    badges: ['Built-in'],
+  }
+}
+
+const normalizeNodeInfoList = (value: NodeInfo[] | undefined): NodeInfo[] => {
+  if (!value?.length)
+    return []
+
+  return value
+    .map((item) => {
+      const nodeId = typeof item.node_id === 'string' ? item.node_id : ''
+      const title = typeof item.title === 'string' ? item.title : nodeId
+      const type = typeof item.type === 'string' ? item.type : ''
+
+      if (!nodeId)
+        return null
+
+      return {
+        node_id: nodeId,
+        title,
+        type,
+      }
+    })
+    .filter((item): item is NodeInfo => !!item)
+}
+
+const normalizeDefaultMetrics = (
+  resourceType: EvaluationResourceType,
+  value: EvaluationDefaultMetric[] | undefined,
+): EvaluationMetric[] => {
+  if (!value?.length)
+    return []
+
+  return value
+    .map((item) => {
+      const metricId = typeof item.metric === 'string' ? item.metric : ''
+      if (!metricId)
+        return null
+
+      const metricOption = resolveMetricOption(resourceType, metricId)
+      return createBuiltinMetric(metricOption, normalizeNodeInfoList(item.node_info_list ?? []))
+    })
+    .filter((item): item is EvaluationMetric => !!item)
+}
+
+const normalizeCustomMetricMappings = (
+  value: EvaluationCustomizedMetric['input_fields'],
+): CustomMetricMapping[] => {
+  if (!value)
+    return [createCustomMetricMapping()]
+
+  const mappings = Object.entries(value)
+    .filter((entry): entry is [string, string] => {
+      const [, targetVariableId] = entry
+      return typeof targetVariableId === 'string' && !!targetVariableId
+    })
+    .map(([sourceFieldId, targetVariableId]) => ({
+      id: createId('mapping'),
+      sourceFieldId,
+      targetVariableId,
+    }))
+
+  return mappings.length > 0 ? mappings : [createCustomMetricMapping()]
+}
+
+const normalizeCustomMetric = (
+  value: EvaluationCustomizedMetric | null | undefined,
+): EvaluationMetric[] => {
+  if (!value)
+    return []
+
+  const workflowId = typeof value.evaluation_workflow_id === 'string' ? value.evaluation_workflow_id : null
+  if (!workflowId)
+    return []
+
+  const customMetric = createCustomMetric()
+
+  return [{
+    ...customMetric,
+    customConfig: customMetric.customConfig
+      ? {
+          ...customMetric.customConfig,
+          workflowId,
+          mappings: normalizeCustomMetricMappings(value.input_fields),
+        }
+      : customMetric.customConfig,
+  }]
+}
+
+const normalizeConditionItem = (
+  resourceType: EvaluationResourceType,
+  value: EvaluationJudgementConditionItem,
+): JudgmentConditionGroup['items'][number] => {
+  const fieldId = typeof value.fieldId === 'string'
+    ? value.fieldId
+    : typeof value.field_id === 'string'
+      ? value.field_id
+      : null
+  const operatorValue = typeof value.operator === 'string' ? value.operator : null
+  const field = getEvaluationMockConfig(resourceType).fieldOptions.find(option => option.id === fieldId)
+  const allowedOperators = field ? getComparisonOperators(field.type) : ['contains']
+  const operator = operatorValue && allowedOperators.includes(operatorValue as ComparisonOperator)
+    ? operatorValue as ComparisonOperator
+    : field
+      ? getDefaultOperator(field.type)
+      : 'contains'
+  const rawValue: EvaluationConditionValue = value.value ?? null
+
+  return {
+    id: typeof value.id === 'string' ? value.id : createId('condition'),
+    fieldId,
+    operator,
+    value: getConditionValue(field, operator, rawValue),
+  }
+}
+
+const normalizeConditionGroups = (
+  resourceType: EvaluationResourceType,
+  value: EvaluationConfig['judgement_conditions'],
+): JudgmentConditionGroup[] => {
+  const groupsValue: EvaluationJudgementConditionGroup[] = Array.isArray(value)
+    ? value
+    : Array.isArray(value?.groups)
+      ? value.groups
+      : []
+
+  const groups = groupsValue
+    .map((group) => {
+      const itemsValue = Array.isArray(group.items) ? group.items : []
+      const items = itemsValue
+        .map(item => normalizeConditionItem(resourceType, item))
+
+      if (items.length === 0)
+        return null
+
+      return {
+        id: typeof group.id === 'string' ? group.id : createId('group'),
+        logicalOperator: group.logicalOperator === 'or' || group.logical_operator === 'or' ? 'or' : 'and',
+        items,
+      } satisfies JudgmentConditionGroup
+    })
+    .filter((group): group is JudgmentConditionGroup => !!group)
+
+  return groups.length > 0 ? groups : [createConditionGroup(resourceType)]
+}
+
 export const buildResourceKey = (resourceType: EvaluationResourceType, resourceId: string) => `${resourceType}:${resourceId}`

 const conditionOperatorsWithoutValue: ComparisonOperator[] = ['is_empty', 'is_not_empty']

 export const requiresConditionValue = (operator: ComparisonOperator) => !conditionOperatorsWithoutValue.includes(operator)

-export const getConditionValue = (
+export function getConditionValue(
  field: EvaluationFieldOption | undefined,
  operator: ComparisonOperator,
  previousValue: string | number | boolean | null = null,
-) => {
+) {
  if (!field || !requiresConditionValue(operator))
    return null

@ -42,36 +213,42 @@ export const getConditionValue = (
  return typeof previousValue === 'string' ? previousValue : null
 }

-export const createBuiltinMetric = (metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric => ({
-  id: createId('metric'),
-  optionId: metric.id,
-  kind: 'builtin',
-  label: metric.label,
-  description: metric.description,
-  badges: metric.badges,
-  nodeInfoList,
-})
+export function createBuiltinMetric(metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric {
+  return {
+    id: createId('metric'),
+    optionId: metric.id,
+    kind: 'builtin',
+    label: metric.label,
+    description: metric.description,
+    badges: metric.badges,
+    nodeInfoList,
+  }
+}

-export const createCustomMetricMapping = (): CustomMetricMapping => ({
-  id: createId('mapping'),
-  sourceFieldId: null,
-  targetVariableId: null,
-})
+export function createCustomMetricMapping(): CustomMetricMapping {
+  return {
+    id: createId('mapping'),
+    sourceFieldId: null,
+    targetVariableId: null,
+  }
+}

-export const createCustomMetric = (): EvaluationMetric => ({
-  id: createId('metric'),
-  optionId: createId('custom'),
-  kind: 'custom-workflow',
-  label: 'Custom Evaluator',
-  description: 'Map workflow variables to your evaluation inputs.',
-  badges: ['Workflow'],
-  customConfig: {
-    workflowId: null,
-    workflowAppId: null,
-    workflowName: null,
-    mappings: [createCustomMetricMapping()],
-  },
-})
+export function createCustomMetric(): EvaluationMetric {
+  return {
+    id: createId('metric'),
+    optionId: createId('custom'),
+    kind: 'custom-workflow',
+    label: 'Custom Evaluator',
+    description: 'Map workflow variables to your evaluation inputs.',
+    badges: ['Workflow'],
+    customConfig: {
+      workflowId: null,
+      workflowAppId: null,
+      workflowName: null,
+      mappings: [createCustomMetricMapping()],
+    },
+  }
+}

 export const buildConditionItem = (resourceType: EvaluationResourceType) => {
  const field = getEvaluationMockConfig(resourceType).fieldOptions[0]
@ -85,11 +262,13 @@ export const buildConditionItem = (resourceType: EvaluationResourceType) => {
  }
 }

-export const createConditionGroup = (resourceType: EvaluationResourceType): JudgmentConditionGroup => ({
-  id: createId('group'),
-  logicalOperator: 'and',
-  items: [buildConditionItem(resourceType)],
-})
+export function createConditionGroup(resourceType: EvaluationResourceType): JudgmentConditionGroup {
+  return {
+    id: createId('group'),
+    logicalOperator: 'and',
+    items: [buildConditionItem(resourceType)],
+  }
+}

 export const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
  return {
@ -102,6 +281,24 @@ export const buildInitialState = (resourceType: EvaluationResourceType): Evaluat
  }
 }

+export const buildStateFromEvaluationConfig = (
+  resourceType: EvaluationResourceType,
+  config: EvaluationConfig,
+): EvaluationResourceState => {
+  const metricsConfig: EvaluationMetricsConfig = config.metrics_config ?? {}
+  const defaultMetrics = normalizeDefaultMetrics(resourceType, metricsConfig.default_metrics)
+  const customMetrics = normalizeCustomMetric(metricsConfig.customized_metrics)
+
+  return {
+    ...buildInitialState(resourceType),
+    judgeModelId: config.evaluation_model && config.evaluation_model_provider
+      ? encodeModelSelection(config.evaluation_model_provider, config.evaluation_model)
+      : null,
+    metrics: [...defaultMetrics, ...customMetrics],
+    conditions: normalizeConditionGroups(resourceType, config.judgement_conditions),
+  }
+}
+
 const getResourceState = (
  resources: EvaluationStoreResources,
  resourceType: EvaluationResourceType,
--- a/web/app/components/evaluation/store.ts
+++ b/web/app/components/evaluation/store.ts
@ -3,13 +3,14 @@ import type {
  EvaluationResourceState,
  EvaluationResourceType,
 } from './types'
-import type { NodeInfo } from '@/types/evaluation'
+import type { EvaluationConfig, NodeInfo } from '@/types/evaluation'
 import { create } from 'zustand'
 import { getDefaultOperator, getEvaluationMockConfig } from './mock'
 import {
  buildConditionItem,
  buildInitialState,
  buildResourceKey,
+  buildStateFromEvaluationConfig,
  createBatchTestRecord,
  createBuiltinMetric,
  createConditionGroup,
@ -28,6 +29,7 @@ import {
 type EvaluationStore = {
  resources: Record<string, EvaluationResourceState>
  ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
+  hydrateResource: (resourceType: EvaluationResourceType, resourceId: string, config: EvaluationConfig) => void
  setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
  addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void
  addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
@ -82,6 +84,19 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
      },
    }))
  },
+  hydrateResource: (resourceType, resourceId, config) => {
+    set(state => ({
+      resources: {
+        ...state.resources,
+        [buildResourceKey(resourceType, resourceId)]: {
+          ...buildStateFromEvaluationConfig(resourceType, config),
+          activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields',
+          uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null,
+          batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [],
+        },
+      },
+    }))
+  },
  setJudgeModel: (resourceType, resourceId, judgeModelId) => {
    set(state => ({
      resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
--- a/web/app/components/evaluation/types.ts
+++ b/web/app/components/evaluation/types.ts
@ -1,6 +1,6 @@
 import type { NodeInfo } from '@/types/evaluation'

-export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
+export type EvaluationResourceType = 'apps' | 'datasets' | 'snippets'

 export type EvaluationResourceProps = {
  resourceType: EvaluationResourceType
--- a/web/app/components/snippets/snippet-evaluation-page.tsx
+++ b/web/app/components/snippets/snippet-evaluation-page.tsx
@ -22,7 +22,7 @@ const SnippetEvaluationPage = ({ snippetId }: SnippetEvaluationPageProps) => {
      snippet={snippet}
      section="evaluation"
    >
-      <Evaluation resourceType="snippet" resourceId={snippetId} />
+      <Evaluation resourceType="snippets" resourceId={snippetId} />
    </SnippetLayout>
  )
 }
--- a/web/service/use-evaluation.ts
+++ b/web/service/use-evaluation.ts
@ -1,4 +1,5 @@
-import type { AvailableEvaluationWorkflowsResponse } from '@/types/evaluation'
+import type { EvaluationResourceType } from '@/app/components/evaluation/types'
+import type { AvailableEvaluationWorkflowsResponse, EvaluationConfig } from '@/types/evaluation'
 import {
  keepPreviousData,
  useInfiniteQuery,
@ -28,6 +29,45 @@ const normalizeAvailableEvaluationWorkflowsParams = (params: AvailableEvaluation
  }
 }

+const toEvaluationTargetType = (resourceType: Exclude<EvaluationResourceType, 'datasets'>) => {
+  return resourceType === 'snippets' ? 'snippets' : 'app'
+}
+
+const getEvaluationConfigQueryOptions = (
+  resourceType: EvaluationResourceType,
+  resourceId: string,
+) => {
+  if (resourceType === 'datasets') {
+    return consoleQuery.datasetEvaluation.config.queryOptions({
+      input: {
+        params: {
+          datasetId: resourceId,
+        },
+      },
+      enabled: !!resourceId,
+      refetchOnWindowFocus: false,
+    })
+  }
+
+  return consoleQuery.evaluation.config.queryOptions({
+    input: {
+      params: {
+        targetType: toEvaluationTargetType(resourceType),
+        targetId: resourceId,
+      },
+    },
+    enabled: !!resourceId,
+    refetchOnWindowFocus: false,
+  })
+}
+
+export const useEvaluationConfig = (
+  resourceType: EvaluationResourceType,
+  resourceId: string,
+) => {
+  return useQuery<EvaluationConfig>(getEvaluationConfigQueryOptions(resourceType, resourceId))
+}
+
 export const useAvailableEvaluationMetrics = (enabled = true) => {
  return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({
    enabled,
--- a/web/types/evaluation.ts
+++ b/web/types/evaluation.ts
@ -1,10 +1,38 @@
 export type EvaluationTargetType = 'app' | 'snippets'

+export type EvaluationMetricsConfig = {
+  default_metrics?: EvaluationDefaultMetric[]
+  customized_metrics?: EvaluationCustomizedMetric | null
+}
+
+export type EvaluationConditionValue = string | number | boolean | null
+
+export type EvaluationJudgementConditionItem = {
+  id?: string
+  fieldId?: string
+  field_id?: string
+  operator?: string
+  value?: EvaluationConditionValue
+}
+
+export type EvaluationJudgementConditionGroup = {
+  id?: string
+  logicalOperator?: 'and' | 'or'
+  logical_operator?: 'and' | 'or'
+  items?: EvaluationJudgementConditionItem[]
+}
+
+export type EvaluationJudgementConditions
+  = | EvaluationJudgementConditionGroup[]
+    | {
+      groups?: EvaluationJudgementConditionGroup[]
+    }
+
 export type EvaluationConfig = {
  evaluation_model: string | null
  evaluation_model_provider: string | null
-  metrics_config: Record<string, unknown> | null
-  judgement_conditions: Record<string, unknown> | null
+  metrics_config: EvaluationMetricsConfig | null
+  judgement_conditions: EvaluationJudgementConditions | null
 }

 export type NodeInfo = {
@ -20,8 +48,8 @@ export type EvaluationDefaultMetric = {

 export type EvaluationCustomizedMetric = {
  evaluation_workflow_id?: string
-  input_fields?: Record<string, unknown>
-  output_fields?: Record<string, unknown>[]
+  input_fields?: Record<string, string | null | undefined>
+  output_fields?: Array<Record<string, string | null | undefined>>
 }

 export type EvaluationConfigData = {
@ -29,7 +57,7 @@ export type EvaluationConfigData = {
  evaluation_model_provider?: string
  default_metrics?: EvaluationDefaultMetric[]
  customized_metrics?: EvaluationCustomizedMetric | null
-  judgment_config?: Record<string, unknown> | null
+  judgment_config?: EvaluationJudgementConditions | null
 }

 export type EvaluationRunRequest = EvaluationConfigData & {