fix(template-transform): use base64 encoding for Jinja2 templates to fix #26818 (#30223)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
DevByteAI 2025-12-29 07:03:39 +02:00 committed by GitHub
parent dfd2dd5c68
commit 0b1439fee4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 132 additions and 10 deletions

View File

@ -1,9 +1,14 @@
from collections.abc import Mapping
from textwrap import dedent
from typing import Any
from core.helper.code_executor.template_transformer import TemplateTransformer
class Jinja2TemplateTransformer(TemplateTransformer):
# Use separate placeholder for base64-encoded template to avoid confusion
_template_b64_placeholder: str = "{{template_b64}}"
@classmethod
def transform_response(cls, response: str):
"""
@ -13,18 +18,35 @@ class Jinja2TemplateTransformer(TemplateTransformer):
"""
return {"result": cls.extract_result_str_from_response(response)}
@classmethod
def assemble_runner_script(cls, code: str, inputs: Mapping[str, Any]) -> str:
"""
Override base class to use base64 encoding for template code.
This prevents issues with special characters (quotes, newlines) in templates
breaking the generated Python script. Fixes #26818.
"""
script = cls.get_runner_script()
# Encode template as base64 to safely embed any content including quotes
code_b64 = cls.serialize_code(code)
script = script.replace(cls._template_b64_placeholder, code_b64)
inputs_str = cls.serialize_inputs(inputs)
script = script.replace(cls._inputs_placeholder, inputs_str)
return script
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
def main(**inputs):
import jinja2
template = jinja2.Template('''{cls._code_placeholder}''')
return template.render(**inputs)
import jinja2
import json
from base64 import b64decode
# declare main function
def main(**inputs):
# Decode base64-encoded template to handle special characters safely
template_code = b64decode('{cls._template_b64_placeholder}').decode('utf-8')
template = jinja2.Template(template_code)
return template.render(**inputs)
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))

View File

@ -13,6 +13,15 @@ class TemplateTransformer(ABC):
_inputs_placeholder: str = "{{inputs}}"
_result_tag: str = "<<RESULT>>"
@classmethod
def serialize_code(cls, code: str) -> str:
"""
Serialize template code to base64 to safely embed in generated script.
This prevents issues with special characters like quotes breaking the script.
"""
code_bytes = code.encode("utf-8")
return b64encode(code_bytes).decode("utf-8")
@classmethod
def transform_caller(cls, code: str, inputs: Mapping[str, Any]) -> tuple[str, str]:
"""

View File

@ -7,11 +7,14 @@ CODE_LANGUAGE = CodeLanguage.JINJA2
def test_jinja2():
"""Test basic Jinja2 template rendering."""
template = "Hello {{template}}"
# Template must be base64 encoded to match the new safe embedding approach
template_b64 = base64.b64encode(template.encode("utf-8")).decode("utf-8")
inputs = base64.b64encode(b'{"template": "World"}').decode("utf-8")
code = (
Jinja2TemplateTransformer.get_runner_script()
.replace(Jinja2TemplateTransformer._code_placeholder, template)
.replace(Jinja2TemplateTransformer._template_b64_placeholder, template_b64)
.replace(Jinja2TemplateTransformer._inputs_placeholder, inputs)
)
result = CodeExecutor.execute_code(
@ -21,6 +24,7 @@ def test_jinja2():
def test_jinja2_with_code_template():
"""Test template rendering via the high-level workflow API."""
result = CodeExecutor.execute_workflow_code_template(
language=CODE_LANGUAGE, code="Hello {{template}}", inputs={"template": "World"}
)
@ -28,7 +32,64 @@ def test_jinja2_with_code_template():
def test_jinja2_get_runner_script():
"""Test that runner script contains required placeholders."""
runner_script = Jinja2TemplateTransformer.get_runner_script()
assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._template_b64_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2
def test_jinja2_template_with_special_characters():
"""
Test that templates with special characters (quotes, newlines) render correctly.
This is a regression test for issue #26818 where textarea pre-fill values
containing special characters would break template rendering.
"""
# Template with triple quotes, single quotes, double quotes, and newlines
template = """<html>
<body>
<input value="{{ task.get('Task ID', '') }}"/>
<textarea>{{ task.get('Issues', 'No issues reported') }}</textarea>
<p>Status: "{{ status }}"</p>
<pre>'''code block'''</pre>
</body>
</html>"""
inputs = {"task": {"Task ID": "TASK-123", "Issues": "Line 1\nLine 2\nLine 3"}, "status": "completed"}
result = CodeExecutor.execute_workflow_code_template(language=CODE_LANGUAGE, code=template, inputs=inputs)
# Verify the template rendered correctly with all special characters
output = result["result"]
assert 'value="TASK-123"' in output
assert "<textarea>Line 1\nLine 2\nLine 3</textarea>" in output
assert 'Status: "completed"' in output
assert "'''code block'''" in output
def test_jinja2_template_with_html_textarea_prefill():
"""
Specific test for HTML textarea with Jinja2 variable pre-fill.
Verifies fix for issue #26818.
"""
template = "<textarea name='notes'>{{ notes }}</textarea>"
notes_content = "This is a multi-line note.\nWith special chars: 'single' and \"double\" quotes."
inputs = {"notes": notes_content}
result = CodeExecutor.execute_workflow_code_template(language=CODE_LANGUAGE, code=template, inputs=inputs)
expected_output = f"<textarea name='notes'>{notes_content}</textarea>"
assert result["result"] == expected_output
def test_jinja2_assemble_runner_script_encodes_template():
"""Test that assemble_runner_script properly base64 encodes the template."""
template = "Hello {{ name }}!"
inputs = {"name": "World"}
script = Jinja2TemplateTransformer.assemble_runner_script(template, inputs)
# The template should be base64 encoded in the script
template_b64 = base64.b64encode(template.encode("utf-8")).decode("utf-8")
assert template_b64 in script
# The raw template should NOT appear in the script (it's encoded)
assert "Hello {{ name }}!" not in script

View File

@ -12,10 +12,12 @@ class TestJinja2CodeExecutor(CodeExecutorTestMixin):
_, Jinja2TemplateTransformer = self.jinja2_imports
template = "Hello {{template}}"
# Template must be base64 encoded to match the new safe embedding approach
template_b64 = base64.b64encode(template.encode("utf-8")).decode("utf-8")
inputs = base64.b64encode(b'{"template": "World"}').decode("utf-8")
code = (
Jinja2TemplateTransformer.get_runner_script()
.replace(Jinja2TemplateTransformer._code_placeholder, template)
.replace(Jinja2TemplateTransformer._template_b64_placeholder, template_b64)
.replace(Jinja2TemplateTransformer._inputs_placeholder, inputs)
)
result = CodeExecutor.execute_code(
@ -37,6 +39,34 @@ class TestJinja2CodeExecutor(CodeExecutorTestMixin):
_, Jinja2TemplateTransformer = self.jinja2_imports
runner_script = Jinja2TemplateTransformer.get_runner_script()
assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._template_b64_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2
def test_jinja2_template_with_special_characters(self, flask_app_with_containers):
"""
Test that templates with special characters (quotes, newlines) render correctly.
This is a regression test for issue #26818 where textarea pre-fill values
containing special characters would break template rendering.
"""
CodeExecutor, CodeLanguage = self.code_executor_imports
# Template with triple quotes, single quotes, double quotes, and newlines
template = """<html>
<body>
<input value="{{ task.get('Task ID', '') }}"/>
<textarea>{{ task.get('Issues', 'No issues reported') }}</textarea>
<p>Status: "{{ status }}"</p>
<pre>'''code block'''</pre>
</body>
</html>"""
inputs = {"task": {"Task ID": "TASK-123", "Issues": "Line 1\nLine 2\nLine 3"}, "status": "completed"}
result = CodeExecutor.execute_workflow_code_template(language=CodeLanguage.JINJA2, code=template, inputs=inputs)
# Verify the template rendered correctly with all special characters
output = result["result"]
assert 'value="TASK-123"' in output
assert "<textarea>Line 1\nLine 2\nLine 3</textarea>" in output
assert 'Status: "completed"' in output
assert "'''code block'''" in output