mirror of
https://github.com/langgenius/dify.git
synced 2026-05-12 07:37:09 +08:00
feat(web): evaluation run detail
This commit is contained in:
parent
8b6b3cddea
commit
79fc352a5a
@ -1,7 +1,7 @@
|
||||
import type { EvaluationResourceProps } from '../../types'
|
||||
import type { EvaluationLogFile } from '@/types/evaluation'
|
||||
import type { EvaluationLog, EvaluationLogFile } from '@/types/evaluation'
|
||||
import { keepPreviousData, useMutation, useQuery } from '@tanstack/react-query'
|
||||
import { useState } from 'react'
|
||||
import { useEffect, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Pagination from '@/app/components/base/pagination'
|
||||
import {
|
||||
@ -11,7 +11,9 @@ import {
|
||||
DropdownMenuTrigger,
|
||||
} from '@/app/components/base/ui/dropdown-menu'
|
||||
import { consoleClient, consoleQuery } from '@/service/client'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import { downloadUrl } from '@/utils/download'
|
||||
import { useEvaluationResource, useEvaluationStore } from '../../store'
|
||||
|
||||
const PAGE_SIZE = 16
|
||||
const LOADING_ROW_IDS = ['1', '2', '3', '4', '5', '6']
|
||||
@ -23,12 +25,18 @@ const formatCreatedAt = (createdAt: string) => {
|
||||
return createdAt.includes('T') ? createdAt.slice(0, 10) : createdAt
|
||||
}
|
||||
|
||||
const getLogRunId = (record: EvaluationLog) => {
|
||||
return record.run_id ?? record.evaluation_run_id ?? record.id ?? null
|
||||
}
|
||||
|
||||
const HistoryTab = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const [page, setPage] = useState(0)
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const setSelectedRunId = useEvaluationStore(state => state.setSelectedRunId)
|
||||
const logsQuery = useQuery({
|
||||
...consoleQuery.evaluation.logs.queryOptions({
|
||||
input: {
|
||||
@ -58,10 +66,19 @@ const HistoryTab = ({
|
||||
downloadUrl({ url: fileInfo.download_url, fileName: file.name })
|
||||
},
|
||||
})
|
||||
const records = logsQuery.data?.data ?? []
|
||||
const records = useMemo(() => logsQuery.data?.data ?? [], [logsQuery.data?.data])
|
||||
const total = logsQuery.data?.total ?? 0
|
||||
const isInitialLoading = logsQuery.isLoading && !logsQuery.data
|
||||
|
||||
useEffect(() => {
|
||||
if (resource.selectedRunId)
|
||||
return
|
||||
|
||||
const firstRunId = records.map(getLogRunId).find((runId): runId is string => !!runId)
|
||||
if (firstRunId)
|
||||
setSelectedRunId(resourceType, resourceId, firstRunId)
|
||||
}, [records, resource.selectedRunId, resourceId, resourceType, setSelectedRunId])
|
||||
|
||||
return (
|
||||
<div className="flex min-h-full flex-col">
|
||||
<div className="min-h-0 flex-1 overflow-hidden">
|
||||
@ -98,7 +115,19 @@ const HistoryTab = ({
|
||||
</tr>
|
||||
))}
|
||||
{!isInitialLoading && records.map(record => (
|
||||
<tr key={`${record.created_at}-${record.test_file.id}`} className="border-b border-divider-subtle">
|
||||
<tr
|
||||
key={`${record.created_at}-${record.test_file.id}`}
|
||||
className={cn(
|
||||
'border-b border-divider-subtle',
|
||||
getLogRunId(record) && 'cursor-pointer hover:bg-state-base-hover',
|
||||
getLogRunId(record) === resource.selectedRunId && 'bg-background-default-subtle',
|
||||
)}
|
||||
onClick={() => {
|
||||
const runId = getLogRunId(record)
|
||||
if (runId)
|
||||
setSelectedRunId(resourceType, resourceId, runId)
|
||||
}}
|
||||
>
|
||||
<td className="h-10 truncate px-3 system-sm-regular text-text-secondary">{formatCreatedAt(record.created_at)}</td>
|
||||
<td className="h-10 truncate px-3 system-sm-regular text-text-secondary">{record.created_by}</td>
|
||||
<td className="h-10 truncate px-3 system-sm-regular text-text-secondary">{record.version || '-'}</td>
|
||||
@ -115,6 +144,7 @@ const HistoryTab = ({
|
||||
type="button"
|
||||
aria-label={t('history.actions.open')}
|
||||
className="inline-flex h-8 w-8 items-center justify-center rounded-md text-text-tertiary hover:bg-state-base-hover hover:text-text-secondary"
|
||||
onClick={event => event.stopPropagation()}
|
||||
/>
|
||||
)}
|
||||
>
|
||||
@ -123,7 +153,10 @@ const HistoryTab = ({
|
||||
<DropdownMenuContent popupClassName="w-[180px] rounded-lg border-[0.5px] border-components-panel-border py-1 shadow-lg">
|
||||
<DropdownMenuItem
|
||||
className="gap-2"
|
||||
onClick={() => fileDownloadMutation.mutate(record.test_file)}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation()
|
||||
fileDownloadMutation.mutate(record.test_file)
|
||||
}}
|
||||
>
|
||||
<span aria-hidden="true" className="i-ri-file-download-line h-4 w-4" />
|
||||
{t('history.actions.downloadTestFile')}
|
||||
@ -131,7 +164,11 @@ const HistoryTab = ({
|
||||
<DropdownMenuItem
|
||||
className="gap-2"
|
||||
disabled={!record.result_file}
|
||||
onClick={() => record.result_file && fileDownloadMutation.mutate(record.result_file)}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation()
|
||||
if (record.result_file)
|
||||
fileDownloadMutation.mutate(record.result_file)
|
||||
}}
|
||||
>
|
||||
<span aria-hidden="true" className="i-ri-download-2-line h-4 w-4" />
|
||||
{t('history.actions.downloadResultFile')}
|
||||
|
||||
@ -36,6 +36,7 @@ export const useInputFieldsActions = ({
|
||||
const { t } = useTranslation('evaluation')
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const setBatchTab = useEvaluationStore(state => state.setBatchTab)
|
||||
const setSelectedRunId = useEvaluationStore(state => state.setSelectedRunId)
|
||||
const setUploadedFile = useEvaluationStore(state => state.setUploadedFile)
|
||||
const setUploadedFileName = useEvaluationStore(state => state.setUploadedFileName)
|
||||
const startRunMutation = useStartEvaluationRunMutation()
|
||||
@ -115,8 +116,9 @@ export const useInputFieldsActions = ({
|
||||
},
|
||||
body,
|
||||
}, {
|
||||
onSuccess: () => {
|
||||
onSuccess: (run) => {
|
||||
toast.success(t('batch.runStarted'))
|
||||
setSelectedRunId(resourceType, resourceId, run.id)
|
||||
setIsUploadPopoverOpen(false)
|
||||
setBatchTab(resourceType, resourceId, 'history')
|
||||
},
|
||||
|
||||
@ -84,7 +84,10 @@ const PipelineEvaluation = ({
|
||||
</div>
|
||||
|
||||
<div className="min-h-0 flex-1 bg-background-default">
|
||||
<PipelineResultsPanel />
|
||||
<PipelineResultsPanel
|
||||
resourceType={resourceType}
|
||||
resourceId={resourceId}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
||||
@ -1,16 +1,132 @@
|
||||
'use client'
|
||||
|
||||
import type { EvaluationResourceProps } from '../../types'
|
||||
import { skipToken, useMutation, useQuery } from '@tanstack/react-query'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { consoleClient, consoleQuery } from '@/service/client'
|
||||
import { downloadUrl } from '@/utils/download'
|
||||
import { useEvaluationResource } from '../../store'
|
||||
import { decodeModelSelection } from '../../utils'
|
||||
import PipelineResultsTable from './pipeline-results-table'
|
||||
import { getMetricColumns, getRunDate } from './pipeline-results-utils'
|
||||
|
||||
const PipelineResultsPanel = () => {
|
||||
const PAGE_SIZE = 100
|
||||
|
||||
const PipelineResultsPanel = ({
|
||||
resourceType,
|
||||
resourceId,
|
||||
}: EvaluationResourceProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
const resource = useEvaluationResource(resourceType, resourceId)
|
||||
const selectedModel = decodeModelSelection(resource.judgeModelId)
|
||||
const selectedRunId = resource.selectedRunId
|
||||
const runDetailQuery = useQuery(consoleQuery.evaluation.runDetail.queryOptions({
|
||||
input: selectedRunId
|
||||
? {
|
||||
params: {
|
||||
targetType: resourceType,
|
||||
targetId: resourceId,
|
||||
runId: selectedRunId,
|
||||
},
|
||||
query: {
|
||||
page: 1,
|
||||
page_size: PAGE_SIZE,
|
||||
},
|
||||
}
|
||||
: skipToken,
|
||||
refetchOnWindowFocus: false,
|
||||
}))
|
||||
const resultFileDownloadMutation = useMutation({
|
||||
mutationFn: async (fileId: string) => {
|
||||
const fileInfo = await consoleClient.evaluation.file({
|
||||
params: {
|
||||
targetType: resourceType,
|
||||
targetId: resourceId,
|
||||
fileId,
|
||||
},
|
||||
})
|
||||
|
||||
downloadUrl({ url: fileInfo.download_url, fileName: fileInfo.name })
|
||||
},
|
||||
})
|
||||
const runDetail = runDetailQuery.data
|
||||
const items = runDetail?.items.data ?? []
|
||||
const metricColumns = getMetricColumns(resource, items)
|
||||
const thresholdColumns = metricColumns.filter(column => column.threshold !== undefined)
|
||||
const isEmpty = !selectedRunId || (!runDetailQuery.isLoading && items.length === 0)
|
||||
|
||||
if (isEmpty) {
|
||||
return (
|
||||
<div className="flex min-h-[360px] flex-1 items-center justify-center xl:min-h-0">
|
||||
<div className="flex flex-col items-center gap-4 px-4 text-center">
|
||||
<span aria-hidden="true" className="i-ri-file-list-3-line h-12 w-12 text-text-quaternary" />
|
||||
<div className="system-md-medium text-text-quaternary">{t('results.empty')}</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex min-h-[360px] flex-1 items-center justify-center xl:min-h-0">
|
||||
<div className="flex flex-col items-center gap-4 px-4 text-center">
|
||||
<span aria-hidden="true" className="i-ri-file-list-3-line h-12 w-12 text-text-quaternary" />
|
||||
<div className="system-md-medium text-text-quaternary">{t('results.empty')}</div>
|
||||
<div className="flex h-full min-h-0 flex-col border-l border-divider-subtle bg-background-default">
|
||||
<div className="shrink-0 px-6 pt-4 pb-2">
|
||||
<h2 className="system-xl-semibold text-text-primary">{t('results.title')}</h2>
|
||||
</div>
|
||||
{runDetailQuery.isError && (
|
||||
<div className="px-6 py-4 system-sm-regular text-text-destructive">{t('results.loadFailed')}</div>
|
||||
)}
|
||||
{!runDetailQuery.isError && (
|
||||
<div className="flex min-h-0 flex-1 flex-col px-6 py-1">
|
||||
<div className="flex shrink-0 flex-wrap items-center justify-between gap-3 py-1">
|
||||
<div className="flex min-w-0 flex-wrap items-center gap-2 system-xs-regular text-text-secondary">
|
||||
<span>{getRunDate(runDetail?.run.started_at ?? runDetail?.run.created_at ?? null)}</span>
|
||||
<span aria-hidden="true">·</span>
|
||||
<span>{t('results.queryCount', { count: runDetail?.run.total_items ?? runDetail?.items.total ?? items.length })}</span>
|
||||
{selectedModel && (
|
||||
<>
|
||||
<span aria-hidden="true">·</span>
|
||||
<span className="inline-flex min-w-0 items-center gap-1.5 rounded-lg bg-background-section-burn px-2 py-1">
|
||||
<span aria-hidden="true" className="i-ri-robot-2-line h-4 w-4 shrink-0 text-text-accent" />
|
||||
<span className="truncate">{selectedModel.model}</span>
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
{thresholdColumns.length > 0 && (
|
||||
<>
|
||||
<span aria-hidden="true">·</span>
|
||||
<span className="flex min-w-0 flex-wrap items-center gap-1">
|
||||
{thresholdColumns.map(column => (
|
||||
<span
|
||||
key={column.id}
|
||||
className="rounded-lg border-[0.5px] border-divider-subtle bg-background-section px-2 py-1 text-text-tertiary"
|
||||
>
|
||||
{t('results.metricThreshold', { metric: column.label, threshold: column.threshold })}
|
||||
</span>
|
||||
))}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
className="inline-flex h-7 shrink-0 items-center gap-1 rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 system-xs-medium text-components-button-secondary-text shadow-xs disabled:cursor-not-allowed disabled:opacity-50"
|
||||
disabled={!runDetail?.run.result_file_id || resultFileDownloadMutation.isPending}
|
||||
onClick={() => {
|
||||
if (runDetail?.run.result_file_id)
|
||||
resultFileDownloadMutation.mutate(runDetail.run.result_file_id)
|
||||
}}
|
||||
>
|
||||
<span aria-hidden="true" className="i-ri-download-2-line h-3.5 w-3.5" />
|
||||
{t('results.export')}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<PipelineResultsTable
|
||||
items={items}
|
||||
metricColumns={metricColumns}
|
||||
isLoading={runDetailQuery.isLoading}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@ -0,0 +1,118 @@
|
||||
import type { MetricColumn } from './pipeline-results-utils'
|
||||
import type { EvaluationRunItem } from '@/types/evaluation'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import {
|
||||
formatValue,
|
||||
getIsItemPassed,
|
||||
getMetricTextClassName,
|
||||
getMetricValue,
|
||||
getQueryContent,
|
||||
} from './pipeline-results-utils'
|
||||
|
||||
const LOADING_ROW_IDS = ['1', '2', '3', '4', '5', '6']
|
||||
|
||||
type PipelineResultsTableProps = {
|
||||
items: EvaluationRunItem[]
|
||||
metricColumns: MetricColumn[]
|
||||
isLoading: boolean
|
||||
}
|
||||
|
||||
const PipelineResultsTable = ({
|
||||
items,
|
||||
metricColumns,
|
||||
isLoading,
|
||||
}: PipelineResultsTableProps) => {
|
||||
const { t } = useTranslation('evaluation')
|
||||
|
||||
return (
|
||||
<div className="min-h-0 flex-1 overflow-auto py-2">
|
||||
<table className="min-w-full table-fixed border-collapse overflow-hidden rounded-lg">
|
||||
<colgroup>
|
||||
<col className="w-10" />
|
||||
<col className="w-[220px]" />
|
||||
<col className="w-[190px]" />
|
||||
<col className="w-[220px]" />
|
||||
{metricColumns.map(column => <col key={column.id} className="w-24" />)}
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr className="bg-background-section">
|
||||
<th className="h-7 rounded-l-lg" />
|
||||
<th className="h-7 px-3 text-left system-xs-medium-uppercase text-text-tertiary">{t('results.columns.query')}</th>
|
||||
<th className="h-7 px-3 text-left system-xs-medium-uppercase text-text-tertiary">{t('results.columns.expected')}</th>
|
||||
<th className="h-7 px-3 text-left system-xs-medium-uppercase text-text-tertiary">{t('results.columns.actual')}</th>
|
||||
{metricColumns.map((column, index) => (
|
||||
<th
|
||||
key={column.id}
|
||||
className={cn(
|
||||
'h-7 px-3 text-left system-xs-medium-uppercase text-text-tertiary',
|
||||
index === metricColumns.length - 1 && 'rounded-r-lg',
|
||||
)}
|
||||
>
|
||||
{column.label}
|
||||
</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{isLoading && LOADING_ROW_IDS.map(rowId => (
|
||||
<tr key={rowId} className="border-b border-divider-subtle">
|
||||
<td colSpan={4 + metricColumns.length} className="h-10 px-3">
|
||||
<div className="h-4 animate-pulse rounded bg-background-section" />
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
{!isLoading && items.map((item) => {
|
||||
const isPassed = getIsItemPassed(item, metricColumns)
|
||||
const actualOutput = item.error ?? item.actual_output
|
||||
|
||||
return (
|
||||
<tr key={item.id} className="border-b border-divider-subtle even:bg-background-default-subtle">
|
||||
<td className="h-10 px-3 align-top">
|
||||
<span
|
||||
aria-label={isPassed ? t('results.status.passed') : t('results.status.failed')}
|
||||
className={cn(
|
||||
'mt-3 inline-block h-4 w-4',
|
||||
isPassed
|
||||
? 'i-ri-check-line text-util-colors-green-green-600'
|
||||
: 'i-ri-close-line text-util-colors-red-red-600',
|
||||
)}
|
||||
/>
|
||||
</td>
|
||||
<td className="h-10 px-3 py-3 align-top system-sm-regular text-text-secondary">
|
||||
<div className="line-clamp-2 break-words">{getQueryContent(item)}</div>
|
||||
</td>
|
||||
<td className="h-10 px-3 py-3 align-top system-sm-regular text-text-secondary">
|
||||
<div className="line-clamp-2 break-words">{formatValue(item.expected_output)}</div>
|
||||
</td>
|
||||
<td className={cn(
|
||||
'h-10 px-3 py-3 align-top system-sm-regular',
|
||||
actualOutput ? 'text-text-secondary' : 'text-text-destructive',
|
||||
)}
|
||||
>
|
||||
<div className="line-clamp-2 break-words">
|
||||
{actualOutput ? formatValue(actualOutput) : t('results.noResult')}
|
||||
</div>
|
||||
</td>
|
||||
{metricColumns.map((column) => {
|
||||
const metricValue = getMetricValue(item.metrics, column)
|
||||
|
||||
return (
|
||||
<td
|
||||
key={column.id}
|
||||
className={cn('h-10 px-3 py-3 align-top system-sm-regular', getMetricTextClassName(metricValue, column))}
|
||||
>
|
||||
{formatValue(metricValue)}
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
</tr>
|
||||
)
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default PipelineResultsTable
|
||||
@ -0,0 +1,179 @@
|
||||
import type { EvaluationResourceState } from '../../types'
|
||||
import type { EvaluationRunItem, EvaluationRunMetric } from '@/types/evaluation'
|
||||
import { formatTime } from '@/utils/time'
|
||||
|
||||
const PREFERRED_QUERY_INPUT_KEYS = ['query', 'question', 'input']
|
||||
|
||||
export type MetricColumn = {
|
||||
id: string
|
||||
label: string
|
||||
threshold?: number
|
||||
}
|
||||
|
||||
const normalizeMetricKey = (value: string) => value.toLowerCase().replace(/[\s_-]/g, '')
|
||||
|
||||
const humanizeMetricName = (name: string) => {
|
||||
return name
|
||||
.split(/[-_]/g)
|
||||
.filter(Boolean)
|
||||
.map(part => part.charAt(0).toUpperCase() + part.slice(1))
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
export const formatValue = (value: unknown) => {
|
||||
if (value === null || value === undefined || value === '')
|
||||
return '-'
|
||||
|
||||
if (typeof value === 'string')
|
||||
return value
|
||||
|
||||
if (typeof value === 'number') {
|
||||
return Number.isInteger(value)
|
||||
? String(value)
|
||||
: value.toLocaleString(undefined, { maximumFractionDigits: 3 })
|
||||
}
|
||||
|
||||
if (typeof value === 'boolean')
|
||||
return value ? 'true' : 'false'
|
||||
|
||||
return JSON.stringify(value)
|
||||
}
|
||||
|
||||
export const getQueryContent = (item: EvaluationRunItem) => {
|
||||
for (const key of PREFERRED_QUERY_INPUT_KEYS) {
|
||||
const value = item.inputs[key]
|
||||
if (value !== undefined)
|
||||
return formatValue(value)
|
||||
}
|
||||
|
||||
const firstValue = Object.values(item.inputs).find(value => value !== undefined && value !== null && value !== '')
|
||||
return formatValue(firstValue)
|
||||
}
|
||||
|
||||
export const getMetricValue = (metrics: EvaluationRunMetric[], column: MetricColumn) => {
|
||||
const normalizedColumnId = normalizeMetricKey(column.id)
|
||||
const normalizedColumnLabel = normalizeMetricKey(column.label)
|
||||
const metric = metrics.find((item) => {
|
||||
if (!item.name)
|
||||
return false
|
||||
|
||||
const normalizedMetricName = normalizeMetricKey(item.name)
|
||||
return normalizedMetricName === normalizedColumnId || normalizedMetricName === normalizedColumnLabel
|
||||
})
|
||||
|
||||
return metric?.value
|
||||
}
|
||||
|
||||
const getNumericMetricValue = (metrics: EvaluationRunMetric[], column: MetricColumn) => {
|
||||
const value = getMetricValue(metrics, column)
|
||||
if (typeof value === 'number')
|
||||
return value
|
||||
|
||||
if (typeof value === 'string' && value.trim() !== '') {
|
||||
const numericValue = Number(value)
|
||||
return Number.isNaN(numericValue) ? null : numericValue
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
export const getMetricTextClassName = (value: unknown, column: MetricColumn) => {
|
||||
const numericValue = typeof value === 'number'
|
||||
? value
|
||||
: typeof value === 'string' && value.trim() !== ''
|
||||
? Number(value)
|
||||
: null
|
||||
|
||||
if (numericValue === null || Number.isNaN(numericValue))
|
||||
return 'text-text-secondary'
|
||||
|
||||
if (column.threshold === undefined)
|
||||
return 'text-text-secondary'
|
||||
|
||||
if (numericValue >= column.threshold)
|
||||
return 'text-util-colors-green-green-600'
|
||||
|
||||
if (numericValue === 0)
|
||||
return 'text-util-colors-red-red-600'
|
||||
|
||||
return 'text-util-colors-warning-warning-600'
|
||||
}
|
||||
|
||||
const getJudgmentResult = (judgment: Record<string, unknown>) => {
|
||||
for (const key of ['passed', 'pass', 'success', 'result']) {
|
||||
const value = judgment[key]
|
||||
if (typeof value === 'boolean')
|
||||
return value
|
||||
|
||||
if (typeof value === 'string') {
|
||||
const normalizedValue = value.toLowerCase()
|
||||
if (['passed', 'pass', 'success', 'succeeded', 'true'].includes(normalizedValue))
|
||||
return true
|
||||
|
||||
if (['failed', 'fail', 'failure', 'false'].includes(normalizedValue))
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
export const getIsItemPassed = (item: EvaluationRunItem, metricColumns: MetricColumn[]) => {
|
||||
if (item.error)
|
||||
return false
|
||||
|
||||
const judgmentResult = getJudgmentResult(item.judgment)
|
||||
if (judgmentResult !== null)
|
||||
return judgmentResult
|
||||
|
||||
const thresholdColumns = metricColumns.filter(column => column.threshold !== undefined)
|
||||
if (thresholdColumns.length > 0) {
|
||||
return thresholdColumns.every((column) => {
|
||||
const metricValue = getNumericMetricValue(item.metrics, column)
|
||||
const threshold = column.threshold
|
||||
return threshold !== undefined && metricValue !== null && metricValue >= threshold
|
||||
})
|
||||
}
|
||||
|
||||
return item.overall_score === null ? true : item.overall_score > 0
|
||||
}
|
||||
|
||||
export const getMetricColumns = (
|
||||
resource: EvaluationResourceState,
|
||||
items: EvaluationRunItem[],
|
||||
) => {
|
||||
const columns = new Map<string, MetricColumn>()
|
||||
|
||||
resource.metrics.forEach((metric) => {
|
||||
columns.set(normalizeMetricKey(metric.optionId), {
|
||||
id: metric.optionId,
|
||||
label: metric.label,
|
||||
threshold: metric.threshold,
|
||||
})
|
||||
})
|
||||
|
||||
items.forEach((item) => {
|
||||
item.metrics.forEach((metric) => {
|
||||
if (!metric.name)
|
||||
return
|
||||
|
||||
const normalizedName = normalizeMetricKey(metric.name)
|
||||
if (!columns.has(normalizedName)) {
|
||||
columns.set(normalizedName, {
|
||||
id: metric.name,
|
||||
label: humanizeMetricName(metric.name),
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
return Array.from(columns.values())
|
||||
}
|
||||
|
||||
export const getRunDate = (timestamp: number | null) => {
|
||||
if (!timestamp)
|
||||
return '-'
|
||||
|
||||
const milliseconds = timestamp > 1_000_000_000_000 ? timestamp : timestamp * 1000
|
||||
return formatTime({ date: milliseconds, dateFormat: 'YYYY-MM-DD HH:mm' })
|
||||
}
|
||||
@ -394,6 +394,7 @@ export const buildInitialState = (_resourceType: EvaluationResourceType): Evalua
|
||||
activeBatchTab: 'input-fields',
|
||||
uploadedFileId: null,
|
||||
uploadedFileName: null,
|
||||
selectedRunId: null,
|
||||
batchRecords: [],
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,6 +82,7 @@ type EvaluationStore = {
|
||||
uploadedFile: { id: string, name: string } | null,
|
||||
) => void
|
||||
setUploadedFileName: (resourceType: EvaluationResourceType, resourceId: string, uploadedFileName: string | null) => void
|
||||
setSelectedRunId: (resourceType: EvaluationResourceType, resourceId: string, runId: string | null) => void
|
||||
runBatchTest: (resourceType: EvaluationResourceType, resourceId: string) => void
|
||||
}
|
||||
|
||||
@ -110,6 +111,7 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
|
||||
activeBatchTab: state.resources[buildResourceKey(resourceType, resourceId)]?.activeBatchTab ?? 'input-fields',
|
||||
uploadedFileId: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileId ?? null,
|
||||
uploadedFileName: state.resources[buildResourceKey(resourceType, resourceId)]?.uploadedFileName ?? null,
|
||||
selectedRunId: state.resources[buildResourceKey(resourceType, resourceId)]?.selectedRunId ?? null,
|
||||
batchRecords: state.resources[buildResourceKey(resourceType, resourceId)]?.batchRecords ?? [],
|
||||
},
|
||||
},
|
||||
@ -393,6 +395,14 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
|
||||
})),
|
||||
}))
|
||||
},
|
||||
setSelectedRunId: (resourceType, resourceId, runId) => {
|
||||
set(state => ({
|
||||
resources: updateResourceState(state.resources, resourceType, resourceId, resource => ({
|
||||
...resource,
|
||||
selectedRunId: runId,
|
||||
})),
|
||||
}))
|
||||
},
|
||||
runBatchTest: (resourceType, resourceId) => {
|
||||
const { uploadedFileName } = get().resources[buildResourceKey(resourceType, resourceId)] ?? buildInitialState(resourceType)
|
||||
const nextRecord = createBatchTestRecord(resourceType, uploadedFileName)
|
||||
|
||||
@ -138,6 +138,7 @@ export type EvaluationResourceState = {
|
||||
activeBatchTab: BatchTestTab
|
||||
uploadedFileId: string | null
|
||||
uploadedFileName: string | null
|
||||
selectedRunId: string | null
|
||||
batchRecords: BatchTestRecord[]
|
||||
}
|
||||
|
||||
|
||||
@ -115,6 +115,18 @@
|
||||
"metrics.update": "Update",
|
||||
"pipeline.passIf": "Pass if \u2265",
|
||||
"pipeline.uploadAndRun": "Upload & Run Test",
|
||||
"results.columns.actual": "Actual Result",
|
||||
"results.columns.expected": "Expect Result",
|
||||
"results.columns.query": "Query Content",
|
||||
"results.empty": "No evaluation results yet.",
|
||||
"results.export": "Export",
|
||||
"results.loadFailed": "Failed to load evaluation results.",
|
||||
"results.metricThreshold": "{{metric}} \u2265 {{threshold}}",
|
||||
"results.noResult": "No Result",
|
||||
"results.queryCount_one": "{{count}} query",
|
||||
"results.queryCount_other": "{{count}} queries",
|
||||
"results.status.failed": "Failed",
|
||||
"results.status.passed": "Passed",
|
||||
"results.title": "Test Details",
|
||||
"title": "Evaluation"
|
||||
}
|
||||
|
||||
@ -91,13 +91,36 @@
|
||||
"metrics.custom.workflowLabel": "评测工作流",
|
||||
"metrics.custom.workflowPlaceholder": "选择工作流",
|
||||
"metrics.description": "从内置指标中选择,如 Groundedness 和 Correctness ,以评估您的工作流输出。",
|
||||
"metrics.expandNodes": "展开节点",
|
||||
"metrics.groups.operations": "运行",
|
||||
"metrics.groups.other": "其他",
|
||||
"metrics.groups.quality": "质量",
|
||||
"metrics.noNodesInWorkflow": "当前工作流中没有 LLM 节点",
|
||||
"metrics.noResults": "没有匹配的指标。",
|
||||
"metrics.nodesAll": "全部节点",
|
||||
"metrics.nodesLabel": "节点范围",
|
||||
"metrics.nodesSelected": "已选节点",
|
||||
"metrics.remove": "删除指标",
|
||||
"metrics.searchNodeOrMetrics": "搜索节点或指标",
|
||||
"metrics.searchPlaceholder": "搜索指标",
|
||||
"metrics.showLess": "收起",
|
||||
"metrics.showMore": "展开更多",
|
||||
"metrics.title": "指标",
|
||||
"metrics.update": "更新",
|
||||
"pipeline.passIf": "通过条件 \u2265",
|
||||
"pipeline.uploadAndRun": "上传并运行测试",
|
||||
"results.columns.actual": "实际结果",
|
||||
"results.columns.expected": "预期结果",
|
||||
"results.columns.query": "Query 内容",
|
||||
"results.empty": "还没有评测结果。",
|
||||
"results.export": "导出",
|
||||
"results.loadFailed": "加载评测结果失败。",
|
||||
"results.metricThreshold": "{{metric}} \u2265 {{threshold}}",
|
||||
"results.noResult": "无结果",
|
||||
"results.queryCount_one": "{{count}} 条 query",
|
||||
"results.queryCount_other": "{{count}} 条 query",
|
||||
"results.status.failed": "失败",
|
||||
"results.status.passed": "通过",
|
||||
"results.title": "测试详情",
|
||||
"title": "评测"
|
||||
}
|
||||
|
||||
@ -83,6 +83,9 @@ export type EvaluationLogFile = {
|
||||
}
|
||||
|
||||
export type EvaluationLog = {
|
||||
id?: string
|
||||
run_id?: string
|
||||
evaluation_run_id?: string
|
||||
created_at: string
|
||||
created_by: string
|
||||
test_file: EvaluationLogFile
|
||||
|
||||
Loading…
Reference in New Issue
Block a user