mirror of
https://github.com/langgenius/dify.git
synced 2026-05-11 14:58:23 +08:00
feat: Parse the expression to get the input parameters for the evaluation workflow.
This commit is contained in:
parent
18198b88ff
commit
c68194093e
@ -182,14 +182,14 @@ class BaseEvaluationInstance(ABC):
|
||||
# Check if the entire value is a single expression.
|
||||
full_match = VARIABLE_REGEX.fullmatch(value_source)
|
||||
if full_match:
|
||||
workflow_inputs[field_name] = _resolve_variable_selector(
|
||||
workflow_inputs[field_name] = resolve_variable_selector(
|
||||
full_match.group(1), node_run_result_mapping,
|
||||
)
|
||||
elif VARIABLE_REGEX.search(value_source):
|
||||
# Mixed template: interpolate all expressions as strings.
|
||||
workflow_inputs[field_name] = VARIABLE_REGEX.sub(
|
||||
lambda m: str(
|
||||
_resolve_variable_selector(m.group(1), node_run_result_mapping)
|
||||
resolve_variable_selector(m.group(1), node_run_result_mapping)
|
||||
),
|
||||
value_source,
|
||||
)
|
||||
@ -203,12 +203,7 @@ class BaseEvaluationInstance(ABC):
|
||||
def _extract_workflow_metrics(
|
||||
response: Mapping[str, object],
|
||||
) -> list[EvaluationMetric]:
|
||||
"""Extract evaluation metrics from workflow output variables.
|
||||
|
||||
Each output variable is treated as a metric. The variable name
|
||||
becomes the metric name, and its value is stored as-is regardless
|
||||
of type (numeric, string, dict, etc.).
|
||||
"""
|
||||
"""Extract evaluation metrics from workflow output variables."""
|
||||
metrics: list[EvaluationMetric] = []
|
||||
|
||||
data = response.get("data")
|
||||
@ -231,15 +226,14 @@ class BaseEvaluationInstance(ABC):
|
||||
return metrics
|
||||
|
||||
|
||||
def _resolve_variable_selector(
|
||||
def resolve_variable_selector(
|
||||
selector_raw: str,
|
||||
node_run_result_mapping: dict[str, NodeRunResult],
|
||||
) -> object:
|
||||
"""Resolve a ``#node_id.output_key#`` selector against node run results.
|
||||
Returns the resolved value in its original type, or an empty string
|
||||
if the node or any key along the path is not found.
|
||||
"""
|
||||
# "#node_id.output_key#" → "node_id.output_key"
|
||||
Resolve a ``#node_id.output_key#`` selector against node run results.
|
||||
"""
|
||||
#
|
||||
cleaned = selector_raw.strip("#")
|
||||
parts = cleaned.split(".")
|
||||
|
||||
|
||||
@ -67,7 +67,7 @@ class BaseEvaluationRunner(ABC):
|
||||
evaluation_run = self.session.query(EvaluationRun).filter_by(id=evaluation_run_id).first()
|
||||
if not evaluation_run:
|
||||
raise ValueError(f"EvaluationRun {evaluation_run_id} not found")
|
||||
|
||||
|
||||
if not default_metric and not customized_metrics:
|
||||
raise ValueError("Either default_metric or customized_metrics must be provided")
|
||||
|
||||
@ -144,7 +144,17 @@ class BaseEvaluationRunner(ABC):
|
||||
node_run_result_mapping_list: list[dict[str, NodeRunResult]] | None = None,
|
||||
) -> list[EvaluationItemResult]:
|
||||
"""Apply judgment conditions to each result's metrics.
|
||||
|
||||
Left side (``metric_name``): looked up from evaluate-phase metrics only.
|
||||
Right side: when ``value_source="variable"``, ``condition.value``
|
||||
contains an expression (e.g. ``{{#node_id.output_key#}}``). The
|
||||
expression is parsed and resolved against the corresponding
|
||||
``node_run_result_mapping`` to obtain the actual comparison value.
|
||||
"""
|
||||
from core.evaluation.base_evaluation_instance import resolve_variable_selector
|
||||
from core.evaluation.entities.judgment_entity import JudgmentValueSource
|
||||
from core.workflow.nodes.base.variable_template_parser import REGEX as VARIABLE_REGEX
|
||||
|
||||
judged_results: list[EvaluationItemResult] = []
|
||||
|
||||
for idx, result in enumerate(results):
|
||||
@ -155,14 +165,28 @@ class BaseEvaluationRunner(ABC):
|
||||
# Left side: only metrics
|
||||
metric_values: dict[str, object] = {m.name: m.value for m in result.metrics}
|
||||
|
||||
# Right side variable pool: metrics + intermediate node run results
|
||||
variable_values: dict[str, object] = dict(metric_values)
|
||||
if node_run_result_mapping_list and idx < len(node_run_result_mapping_list):
|
||||
node_run_result_mapping = node_run_result_mapping_list[idx]
|
||||
for node_id, node_result in node_run_result_mapping.items():
|
||||
if node_result.outputs:
|
||||
for output_key, output_value in node_result.outputs.items():
|
||||
variable_values[f"{node_id}.{output_key}"] = output_value
|
||||
# Right side: pre-resolve variable expressions against node run results.
|
||||
# Each condition.value expression (e.g. "{{#llm1.text#}}") is resolved
|
||||
# and stored in variable_values keyed by the raw expression string, so
|
||||
# that JudgmentProcessor._resolve_comparison_value can look it up.
|
||||
variable_values: dict[str, object] = {}
|
||||
node_run_result_mapping = (
|
||||
node_run_result_mapping_list[idx]
|
||||
if node_run_result_mapping_list and idx < len(node_run_result_mapping_list)
|
||||
else {}
|
||||
)
|
||||
for condition in judgment_config.conditions:
|
||||
if (
|
||||
condition.value_source == JudgmentValueSource.VARIABLE
|
||||
and isinstance(condition.value, str)
|
||||
and node_run_result_mapping
|
||||
):
|
||||
match = VARIABLE_REGEX.fullmatch(condition.value)
|
||||
if match:
|
||||
resolved = resolve_variable_selector(
|
||||
match.group(1), node_run_result_mapping
|
||||
)
|
||||
variable_values[condition.value] = resolved
|
||||
|
||||
judgment_result = JudgmentProcessor.evaluate(
|
||||
metric_values, judgment_config, variable_values=variable_values
|
||||
|
||||
Loading…
Reference in New Issue
Block a user