feat: Parse the expression to get the input parameters for the evaluation workflow.

This commit is contained in:
FFXN 2026-03-13 10:09:38 +08:00
parent 18198b88ff
commit c68194093e
2 changed files with 40 additions and 22 deletions

View File

@ -182,14 +182,14 @@ class BaseEvaluationInstance(ABC):
# Check if the entire value is a single expression.
full_match = VARIABLE_REGEX.fullmatch(value_source)
if full_match:
workflow_inputs[field_name] = _resolve_variable_selector(
workflow_inputs[field_name] = resolve_variable_selector(
full_match.group(1), node_run_result_mapping,
)
elif VARIABLE_REGEX.search(value_source):
# Mixed template: interpolate all expressions as strings.
workflow_inputs[field_name] = VARIABLE_REGEX.sub(
lambda m: str(
_resolve_variable_selector(m.group(1), node_run_result_mapping)
resolve_variable_selector(m.group(1), node_run_result_mapping)
),
value_source,
)
@ -203,12 +203,7 @@ class BaseEvaluationInstance(ABC):
def _extract_workflow_metrics(
response: Mapping[str, object],
) -> list[EvaluationMetric]:
"""Extract evaluation metrics from workflow output variables.
Each output variable is treated as a metric. The variable name
becomes the metric name, and its value is stored as-is regardless
of type (numeric, string, dict, etc.).
"""
"""Extract evaluation metrics from workflow output variables."""
metrics: list[EvaluationMetric] = []
data = response.get("data")
@ -231,15 +226,14 @@ class BaseEvaluationInstance(ABC):
return metrics
def _resolve_variable_selector(
def resolve_variable_selector(
selector_raw: str,
node_run_result_mapping: dict[str, NodeRunResult],
) -> object:
"""Resolve a ``#node_id.output_key#`` selector against node run results.
Returns the resolved value in its original type, or an empty string
if the node or any key along the path is not found.
"""
# "#node_id.output_key#" → "node_id.output_key"
Resolve a ``#node_id.output_key#`` selector against node run results.
"""
#
cleaned = selector_raw.strip("#")
parts = cleaned.split(".")

View File

@ -67,7 +67,7 @@ class BaseEvaluationRunner(ABC):
evaluation_run = self.session.query(EvaluationRun).filter_by(id=evaluation_run_id).first()
if not evaluation_run:
raise ValueError(f"EvaluationRun {evaluation_run_id} not found")
if not default_metric and not customized_metrics:
raise ValueError("Either default_metric or customized_metrics must be provided")
@ -144,7 +144,17 @@ class BaseEvaluationRunner(ABC):
node_run_result_mapping_list: list[dict[str, NodeRunResult]] | None = None,
) -> list[EvaluationItemResult]:
"""Apply judgment conditions to each result's metrics.
Left side (``metric_name``): looked up from evaluate-phase metrics only.
Right side: when ``value_source="variable"``, ``condition.value``
contains an expression (e.g. ``{{#node_id.output_key#}}``). The
expression is parsed and resolved against the corresponding
``node_run_result_mapping`` to obtain the actual comparison value.
"""
from core.evaluation.base_evaluation_instance import resolve_variable_selector
from core.evaluation.entities.judgment_entity import JudgmentValueSource
from core.workflow.nodes.base.variable_template_parser import REGEX as VARIABLE_REGEX
judged_results: list[EvaluationItemResult] = []
for idx, result in enumerate(results):
@ -155,14 +165,28 @@ class BaseEvaluationRunner(ABC):
# Left side: only metrics
metric_values: dict[str, object] = {m.name: m.value for m in result.metrics}
# Right side variable pool: metrics + intermediate node run results
variable_values: dict[str, object] = dict(metric_values)
if node_run_result_mapping_list and idx < len(node_run_result_mapping_list):
node_run_result_mapping = node_run_result_mapping_list[idx]
for node_id, node_result in node_run_result_mapping.items():
if node_result.outputs:
for output_key, output_value in node_result.outputs.items():
variable_values[f"{node_id}.{output_key}"] = output_value
# Right side: pre-resolve variable expressions against node run results.
# Each condition.value expression (e.g. "{{#llm1.text#}}") is resolved
# and stored in variable_values keyed by the raw expression string, so
# that JudgmentProcessor._resolve_comparison_value can look it up.
variable_values: dict[str, object] = {}
node_run_result_mapping = (
node_run_result_mapping_list[idx]
if node_run_result_mapping_list and idx < len(node_run_result_mapping_list)
else {}
)
for condition in judgment_config.conditions:
if (
condition.value_source == JudgmentValueSource.VARIABLE
and isinstance(condition.value, str)
and node_run_result_mapping
):
match = VARIABLE_REGEX.fullmatch(condition.value)
if match:
resolved = resolve_variable_selector(
match.group(1), node_run_result_mapping
)
variable_values[condition.value] = resolved
judgment_result = JudgmentProcessor.evaluate(
metric_values, judgment_config, variable_values=variable_values