{ "batch.description": "执行批量评测并追踪性能历史。", "batch.downloadTemplate": "下载 CSV 模板", "batch.emptyHistory": "还没有测试历史。", "batch.example": "示例:", "batch.fileRequired": "请先上传评估数据集文件,再运行测试。", "batch.loadingInputFields": "正在加载模板列...", "batch.noInputFields": "未找到已发布 Start 节点的输入字段。", "batch.noSnippetInputFields": "未找到已发布的片段输入字段。", "batch.noTemplateColumns": "未找到模板列。", "batch.noticeDescription": "配置尚未完成。请先在左侧选择判定模型和指标,以生成批量测试模板。", "batch.noticeTitle": "快速开始", "batch.removeUploadedFile": "移除已上传文件", "batch.requirementsDescription": "运行此批量测试所需的输入变量。请确保上传的数据集包含这些字段。", "batch.requirementsTitle": "数据要求", "batch.run": "运行测试", "batch.runFailed": "启动批量测试失败。", "batch.runStarted": "批量测试已启动。", "batch.status.failed": "失败", "batch.status.running": "运行中", "batch.status.success": "成功", "batch.tabs.history": "测试历史", "batch.tabs.input-fields": "输入字段", "batch.templateColumnsError": "生成 CSV 模板失败。", "batch.title": "批量测试", "batch.uploadAndRun": "上传并运行测试", "batch.uploadDropzoneEmphasis": "已填写的", "batch.uploadDropzonePrefix": "拖拽你的", "batch.uploadDropzoneSuffix": "CSV 模板", "batch.uploadDropzoneUploadButton": "上传文件", "batch.uploadError": "文件上传失败。", "batch.uploadHint": "选择 .csv 文件", "batch.uploadTitle": "上传测试文件", "batch.uploading": "文件上传中...", "batch.validation": "运行批量测试前,请先完成判定模型、指标和自定义映射配置。", "conditions.addCondition": "添加条件", "conditions.addGroup": "添加条件组", "conditions.boolean.false": "否", "conditions.boolean.true": "是", "conditions.description": "定义额外规则,决定结果何时通过或失败。", "conditions.emptyDescription": "请先添加上方指标,再配置通过 / 失败阈值。", "conditions.emptyTitle": "还没有条件", "conditions.fieldPlaceholder": "选择指标", "conditions.groupLabel": "条件组 {{index}}", "conditions.logical.and": "且", "conditions.logical.or": "或", "conditions.operators.contains": "包含", "conditions.operators.greater_or_equal": "大于等于", "conditions.operators.greater_than": "大于", "conditions.operators.is": "等于", "conditions.operators.is_empty": "为空", "conditions.operators.is_not": "不等于", "conditions.operators.is_not_empty": "不为空", "conditions.operators.less_or_equal": "小于等于", "conditions.operators.less_than": "小于", "conditions.operators.not_contains": "不包含", "conditions.removeCondition": "删除条件", "conditions.removeGroup": "删除条件组", "conditions.selectFieldFirst": "请先选择指标", "conditions.selectValue": "选择值", "conditions.title": "判定条件", "conditions.valuePlaceholder": "输入值", "conditions.valueTypes.boolean": "布尔", "conditions.valueTypes.number": "数值", "conditions.valueTypes.string": "文本", "config.saveFailed": "保存评测配置失败。", "description": "配置自动化测试,对应用表现进行评分。", "history.actions.downloadResultFile": "下载结果文件", "history.actions.downloadTestFile": "下载测试文件", "history.actions.open": "打开历史记录操作", "history.columns.creator": "创建人", "history.columns.status": "状态", "history.columns.time": "时间", "history.columns.version": "版本", "history.creatorYou": "你", "history.empty": "还没有测试历史", "history.latestVersion": "最新", "history.searchPlaceholder": "搜索", "history.status.cancelled": "已取消", "history.status.completed": "已完成", "history.status.failed": "失败", "history.status.pending": "等待中", "history.status.running": "运行中", "history.title": "测试历史", "judgeModel.description": "选择用于打分和判定评测结果的模型。", "judgeModel.title": "判定模型", "metrics.add": "添加指标", "metrics.addCustom": "添加自定义指标", "metrics.addNode": "添加节点", "metrics.added": "已添加", "metrics.builtin.description.answerCorrectness": "衡量模型回答相对于标准答案的事实准确性与完整性。它结合了语义相似度与关键信息覆盖情况,因此不仅关注表达含义,也关注内容是否完整准确。", "metrics.builtin.description.answerRelevancy": "衡量模型回答与用户问题的贴合程度。高分表示回答始终围绕问题展开;低分表示内容偏题,或没有真正回答用户的实际问题。", "metrics.builtin.description.contextPrecision": "衡量检索出的上下文片段中,实际与问题相关的内容占比(Precision)。高分表示检索流程带回的噪声较少。", "metrics.builtin.description.contextRecall": "衡量标准答案所需的真实信息,有多少被检索出的上下文片段覆盖到(Recall)。高分表示检索流程没有遗漏重要的支撑证据。", "metrics.builtin.description.contextRelevance": "衡量每一个被检索出的上下文片段与查询的相关程度。它与 CONTEXT_PRECISION 类似,但评估粒度在单个 chunk 层面,而不是相对于参考答案整体评估。", "metrics.builtin.description.faithfulness": "衡量模型回答中的每一个陈述,是否都能从提供的检索上下文中找到依据。高分表示回答中没有幻觉内容,每一条表述都可以追溯到上下文中的某个片段。", "metrics.builtin.description.semanticSimilarity": "衡量模型回答与参考答案在向量语义空间中的余弦相似度。它评估的是两段文本是否表达了相同含义,而不直接判断事实是否正确。", "metrics.builtin.description.taskCompletion": "衡量 Agent 是否最终完成了用户明确提出的目标。它会整体评估推理链路、中间步骤和最终输出;高分表示任务已被完整达成。", "metrics.builtin.description.toolCorrectness": "衡量 Agent 在任务执行过程中发起的工具调用是否正确,包括工具选择本身以及传入参数是否合理。高分表示 Agent 的工具使用策略符合预期行为。", "metrics.custom.description": "选择评测工作流并完成变量映射后即可运行测试。", "metrics.custom.footerDescription": "连接已发布的评测工作流", "metrics.custom.footerTitle": "自定义指标", "metrics.custom.limitDescription": "只能添加一个自定义指标。", "metrics.custom.mappingTitle": "变量映射", "metrics.custom.mappingWarning": "请先完成工作流选择和所有变量映射,再运行批量测试。", "metrics.custom.outputPlaceholder": "选择输出变量", "metrics.custom.outputTitle": "输出", "metrics.custom.title": "自定义评测器", "metrics.custom.warningBadge": "待配置", "metrics.custom.workflowLabel": "评测工作流", "metrics.custom.workflowPlaceholder": "选择工作流", "metrics.description": "从内置指标中选择,如 Groundedness 和 Correctness ,以评估您的工作流输出。", "metrics.expandNodes": "展开节点", "metrics.groups.operations": "运行", "metrics.groups.other": "其他", "metrics.groups.quality": "质量", "metrics.noNodesInWorkflow": "没有可选节点", "metrics.noResults": "没有匹配的指标。", "metrics.nodesAll": "全部节点", "metrics.nodesLabel": "节点范围", "metrics.nodesSelected": "已选节点", "metrics.remove": "删除指标", "metrics.searchNodeOrMetrics": "搜索节点或指标", "metrics.searchPlaceholder": "搜索指标", "metrics.showLess": "收起", "metrics.showMore": "展开更多", "metrics.title": "指标", "metrics.update": "更新", "pipeline.passIf": "通过条件 \u2265", "pipeline.uploadAndRun": "上传并运行测试", "results.columns.actual": "实际结果", "results.columns.expected": "预期结果", "results.columns.query": "Query 内容", "results.empty": "还没有评测结果。", "results.export": "导出", "results.loadFailed": "加载评测结果失败。", "results.metricThreshold": "{{metric}} \u2265 {{threshold}}", "results.noResult": "无结果", "results.queryCount_one": "{{count}} 条 query", "results.queryCount_other": "{{count}} 条 query", "results.status.failed": "失败", "results.status.passed": "通过", "results.title": "测试详情", "title": "评测" }