dify/web/i18n/en-US/evaluation.json

{
  "batch.description": "Execute batch evaluations and track performance history.",
  "batch.downloadTemplate": "Download CSV Template",
  "batch.emptyHistory": "No test history yet.",
  "batch.example": "Example:",
  "batch.fileRequired": "Upload an evaluation dataset file before running the test.",
  "batch.loadingInputFields": "Loading template columns...",
  "batch.noInputFields": "No published start node input fields found.",
  "batch.noSnippetInputFields": "No published snippet input fields found.",
  "batch.noTemplateColumns": "No template columns found.",
  "batch.noticeDescription": "Configuration incomplete. Select the Judge Model and Metrics on the left to generate your batch test template.",
  "batch.noticeTitle": "Quick start",
  "batch.removeUploadedFile": "Remove uploaded file",
  "batch.requirementsDescription": "The input variables required to run this batch test. Ensure your uploaded dataset matches these fields.",
  "batch.requirementsTitle": "Data requirements",
  "batch.run": "Run Test",
  "batch.runFailed": "Failed to start batch test.",
  "batch.runStarted": "Batch test started.",
  "batch.status.failed": "Failed",
  "batch.status.running": "Running",
  "batch.status.success": "Success",
  "batch.tabs.history": "Test History",
  "batch.tabs.input-fields": "Input Fields",
  "batch.templateColumnsError": "Failed to generate the CSV template.",
  "batch.title": "Batch Test",
  "batch.uploadAndRun": "Upload & Run Test",
  "batch.uploadDropzoneEmphasis": "filled",
  "batch.uploadDropzonePrefix": "Drag and drop your",
  "batch.uploadDropzoneSuffix": "CSV Template",
  "batch.uploadDropzoneUploadButton": "Upload file",
  "batch.uploadError": "Failed to upload file.",
  "batch.uploadHint": "Select a .csv file",
  "batch.uploadTitle": "Upload test file",
  "batch.uploading": "Uploading file...",
  "batch.validation": "Complete the judge model, metrics, and custom mappings before running a batch test.",
  "conditions.addCondition": "Add Condition",
  "conditions.addGroup": "Add Condition Group",
  "conditions.boolean.false": "False",
  "conditions.boolean.true": "True",
  "conditions.description": "Define additional rules for when results should pass or fail.",
  "conditions.emptyDescription": "Add metrics above to configure pass/fail thresholds.",
  "conditions.emptyTitle": "No conditions yet",
  "conditions.fieldPlaceholder": "Select metric",
  "conditions.groupLabel": "Group {{index}}",
  "conditions.logical.and": "AND",
  "conditions.logical.or": "OR",
  "conditions.operators.contains": "Contains",
  "conditions.operators.greater_or_equal": "Greater than or equal",
  "conditions.operators.greater_than": "Greater than",
  "conditions.operators.is": "Is",
  "conditions.operators.is_empty": "Is empty",
  "conditions.operators.is_not": "Is not",
  "conditions.operators.is_not_empty": "Is not empty",
  "conditions.operators.less_or_equal": "Less than or equal",
  "conditions.operators.less_than": "Less than",
  "conditions.operators.not_contains": "Does not contain",
  "conditions.removeCondition": "Remove condition",
  "conditions.removeGroup": "Remove condition group",
  "conditions.selectFieldFirst": "Select a metric first",
  "conditions.selectValue": "Choose a value",
  "conditions.title": "Judgment Conditions",
  "conditions.valuePlaceholder": "Enter a value",
  "conditions.valueTypes.boolean": "Boolean",
  "conditions.valueTypes.number": "Number",
  "conditions.valueTypes.string": "String",
  "config.saveFailed": "Failed to save evaluation configuration.",
  "description": "Configure automated testing to grade your application's performance.",
  "history.actions.downloadResultFile": "Download result",
  "history.actions.downloadTestFile": "Download test file",
  "history.actions.open": "Open history actions",
  "history.columns.creator": "Creator",
  "history.columns.status": "Status",
  "history.columns.time": "Time",
  "history.columns.version": "Version",
  "history.creatorYou": "You",
  "history.empty": "No test history yet",
  "history.latestVersion": "Latest",
  "history.searchPlaceholder": "Search",
  "history.status.cancelled": "Cancelled",
  "history.status.completed": "Completed",
  "history.status.failed": "Failed",
  "history.status.pending": "Pending",
  "history.status.running": "Running",
  "history.title": "Test History",
  "judgeModel.description": "Choose the model used to score your evaluation results.",
  "judgeModel.title": "Judge Model",
  "metrics.add": "Add Metric",
  "metrics.addCustom": "Add Custom Metrics",
  "metrics.addNode": "Add Node",
  "metrics.added": "Added",
  "metrics.builtin.description.answerCorrectness": "Measures the factual accuracy and completeness of the model's answer relative to a ground-truth reference. It combines semantic similarity with key-fact coverage, so both meaning and content matter.",
  "metrics.builtin.description.answerRelevancy": "Measures how well the model's response addresses the user's question. A high score means the answer stays on-topic; a low score indicates irrelevant content or a failure to answer the actual question.",
  "metrics.builtin.description.contextPrecision": "Measures the proportion of retrieved context chunks that are actually relevant to the question (precision). A high score means the retrieval pipeline returns little noise.",
  "metrics.builtin.description.contextRecall": "Measures the proportion of ground-truth information that is covered by the retrieved context chunks (recall). A high score means the retrieval pipeline does not miss important supporting evidence.",
  "metrics.builtin.description.contextRelevance": "Measures how relevant each individual retrieved chunk is to the query. Similar to CONTEXT_PRECISION but evaluated at the chunk level rather than against a reference answer.",
  "metrics.builtin.description.faithfulness": "Measures whether every claim in the model's response is grounded in the provided retrieved context. A high score means the answer contains no hallucinated content; each statement can be traced back to a passage in the context.",
  "metrics.builtin.description.semanticSimilarity": "Measures the cosine similarity between the model's response and the reference answer in an embedding space. It evaluates whether the two texts convey the same meaning, independent of factual correctness.",
  "metrics.builtin.description.taskCompletion": "Measures whether the agent ultimately achieves the user's stated goal. It evaluates the reasoning chain, intermediate steps, and final output holistically; a high score means the task was fully accomplished.",
  "metrics.builtin.description.toolCorrectness": "Measures the correctness of the tool calls made by the agent during task execution: both the choice of tool and the arguments passed. A high score means the agent's tool-use strategy matches the expected behavior.",
  "metrics.collapseNodes": "Collapse nodes",
  "metrics.custom.description": "Select an evaluation workflow and map your variables before running tests.",
  "metrics.custom.footerDescription": "Connect your published evaluation workflows",
  "metrics.custom.footerTitle": "Custom metrics",
  "metrics.custom.limitDescription": "Only one custom metric can be added.",
  "metrics.custom.mappingTitle": "Variable Mapping",
  "metrics.custom.mappingWarning": "Complete the workflow selection and each variable mapping to enable batch tests.",
  "metrics.custom.outputPlaceholder": "Select an output variable",
  "metrics.custom.outputTitle": "Output",
  "metrics.custom.title": "Custom Evaluator",
  "metrics.custom.warningBadge": "Needs setup",
  "metrics.custom.workflowLabel": "Evaluation Workflow",
  "metrics.custom.workflowPlaceholder": "Select a workflow",
  "metrics.description": "Choose from built-in metrics like Groundedness and Correctness to evaluate your workflow outputs.",
  "metrics.expandNodes": "Expand nodes",
  "metrics.groups.operations": "Operations",
  "metrics.groups.other": "Other",
  "metrics.groups.quality": "Quality",
  "metrics.noNodesInWorkflow": "No selectable nodes",
  "metrics.noResults": "No metrics or nodes were found",
  "metrics.nodesAll": "All nodes",
  "metrics.nodesLabel": "Node Scope",
  "metrics.nodesSelected": "Selected nodes",
  "metrics.remove": "Remove metric",
  "metrics.searchNodeOrMetrics": "Search node or metrics",
  "metrics.searchPlaceholder": "Search metrics",
  "metrics.showLess": "Show less",
  "metrics.showMore": "Show more",
  "metrics.title": "Metrics",
  "metrics.update": "Update",
  "pipeline.passIf": "Pass if \u2265",
  "pipeline.uploadAndRun": "Upload & Run Test",
  "results.columns.actual": "Actual Result",
  "results.columns.expected": "Expect Result",
  "results.columns.query": "Query Content",
  "results.empty": "No evaluation results yet.",
  "results.export": "Export",
  "results.loadFailed": "Failed to load evaluation results.",
  "results.metricThreshold": "{{metric}} \u2265 {{threshold}}",
  "results.noResult": "No Result",
  "results.queryCount_one": "{{count}} query",
  "results.queryCount_other": "{{count}} queries",
  "results.status.failed": "Failed",
  "results.status.passed": "Passed",
  "results.title": "Test Details",
  "title": "Evaluation"
}