From bd409a3caf91b99fb91b52405db6e7d39cb8baca Mon Sep 17 00:00:00 2001
From: Yeuoly <admin@srmxy.cn>
Date: Wed, 20 Mar 2024 23:01:24 +0800
Subject: [PATCH] enhance: code node validator

---
 api/core/workflow/nodes/code/code_node.py     | 91 +++++++++++++------
 api/core/workflow/nodes/code/entities.py      |  2 +-
 .../workflow/nodes/test_code.py               | 76 +++++++++++++++-
 3 files changed, 140 insertions(+), 29 deletions(-)

diff --git a/api/core/workflow/nodes/code/code_node.py b/api/core/workflow/nodes/code/code_node.py
index 3ac4f4b2e9..2ca5a9f8f9 100644
--- a/api/core/workflow/nodes/code/code_node.py
+++ b/api/core/workflow/nodes/code/code_node.py
@@ -13,6 +13,7 @@ MAX_PRECISION = 20
 MAX_DEPTH = 5
 MAX_STRING_LENGTH = 5000
 MAX_STRING_ARRAY_LENGTH = 30
+MAX_OBJECT_ARRAY_LENGTH = 30
 MAX_NUMBER_ARRAY_LENGTH = 1000
 
 JAVASCRIPT_DEFAULT_CODE = """function main({arg1, arg2}) {
@@ -200,20 +201,30 @@ class CodeNode(BaseNode):
                         variable=f'{prefix}.{output_name}' if prefix else output_name
                     )
                 elif isinstance(output_value, list):
-                    if all(isinstance(value, int | float) for value in output_value):
-                        for value in output_value:
-                            self._check_number(
-                                value=value,
-                                variable=f'{prefix}.{output_name}' if prefix else output_name
-                            )
-                    elif all(isinstance(value, str) for value in output_value):
-                        for value in output_value:
-                            self._check_string(
-                                value=value,
-                                variable=f'{prefix}.{output_name}' if prefix else output_name
-                            )
-                    else:
-                        raise ValueError(f'Output {prefix}.{output_name} is not a valid array. make sure all elements are of the same type.')
+                    first_element = output_value[0] if len(output_value) > 0 else None
+                    if first_element is not None:
+                        if isinstance(first_element, int | float) and all(isinstance(value, int | float) for value in output_value):
+                            for i, value in enumerate(output_value):
+                                self._check_number(
+                                    value=value,
+                                    variable=f'{prefix}.{output_name}[{i}]' if prefix else f'{output_name}[{i}]'
+                                )
+                        elif isinstance(first_element, str) and all(isinstance(value, str) for value in output_value):
+                            for i, value in enumerate(output_value):
+                                self._check_string(
+                                    value=value,
+                                    variable=f'{prefix}.{output_name}[{i}]' if prefix else f'{output_name}[{i}]'
+                                )
+                        elif isinstance(first_element, dict) and all(isinstance(value, dict) for value in output_value):
+                            for i, value in enumerate(output_value):
+                                self._transform_result(
+                                    result=value,
+                                    output_schema=None,
+                                    prefix=f'{prefix}.{output_name}[{i}]' if prefix else f'{output_name}[{i}]',
+                                    depth=depth + 1
+                                )
+                        else:
+                            raise ValueError(f'Output {prefix}.{output_name} is not a valid array. make sure all elements are of the same type.')
                 else:
                     raise ValueError(f'Output {prefix}.{output_name} is not a valid type.')
                 
@@ -221,68 +232,96 @@ class CodeNode(BaseNode):
 
         parameters_validated = {}
         for output_name, output_config in output_schema.items():
+            dot = '.' if prefix else ''
             if output_config.type == 'object':
                 # check if output is object
                 if not isinstance(result.get(output_name), dict):
                     raise ValueError(
-                        f'Output {prefix}.{output_name} is not an object, got {type(result.get(output_name))} instead.'
+                        f'Output {prefix}{dot}{output_name} is not an object, got {type(result.get(output_name))} instead.'
                     )
 
                 transformed_result[output_name] = self._transform_result(
                     result=result[output_name],
                     output_schema=output_config.children,
-                    prefix=f'{prefix}.{output_name}' if prefix else output_name,
+                    prefix=f'{prefix}.{output_name}',
                     depth=depth + 1
                 )
             elif output_config.type == 'number':
                 # check if number available
                 transformed_result[output_name] = self._check_number(
                     value=result[output_name],
-                    variable=f'{prefix}.{output_name}' if prefix else output_name
+                    variable=f'{prefix}{dot}{output_name}'
                 )
             elif output_config.type == 'string':
                 # check if string available
                 transformed_result[output_name] = self._check_string(
                     value=result[output_name],
-                    variable=f'{prefix}.{output_name}' if prefix else output_name,
+                    variable=f'{prefix}{dot}{output_name}',
                 )
             elif output_config.type == 'array[number]':
                 # check if array of number available
                 if not isinstance(result[output_name], list):
                     raise ValueError(
-                        f'Output {prefix}.{output_name} is not an array, got {type(result.get(output_name))} instead.'
+                        f'Output {prefix}{dot}{output_name} is not an array, got {type(result.get(output_name))} instead.'
                     )
 
                 if len(result[output_name]) > MAX_NUMBER_ARRAY_LENGTH:
                     raise ValueError(
-                        f'{prefix}.{output_name} in output form must be less than {MAX_NUMBER_ARRAY_LENGTH} characters'
+                        f'{prefix}{dot}{output_name} in output form must be less than {MAX_NUMBER_ARRAY_LENGTH} characters.'
                     )
 
                 transformed_result[output_name] = [
                     self._check_number(
                         value=value,
-                        variable=f'{prefix}.{output_name}' if prefix else output_name
+                        variable=f'{prefix}{dot}{output_name}[{i}]'
                     )
-                    for value in result[output_name]
+                    for i, value in enumerate(result[output_name])
                 ]
             elif output_config.type == 'array[string]':
                 # check if array of string available
                 if not isinstance(result[output_name], list):
                     raise ValueError(
-                        f'Output {prefix}.{output_name} is not an array, got {type(result.get(output_name))} instead.'
+                        f'Output {prefix}{dot}{output_name} is not an array, got {type(result.get(output_name))} instead.'
                     )
 
                 if len(result[output_name]) > MAX_STRING_ARRAY_LENGTH:
                     raise ValueError(
-                        f'{prefix}.{output_name} in output form must be less than {MAX_STRING_ARRAY_LENGTH} characters'
+                        f'{prefix}{dot}{output_name} in output form must be less than {MAX_STRING_ARRAY_LENGTH} characters.'
                     )
 
                 transformed_result[output_name] = [
                     self._check_string(
                         value=value,
-                        variable=f'{prefix}.{output_name}' if prefix else output_name
+                        variable=f'{prefix}{dot}{output_name}[{i}]'
                     )
-                    for value in result[output_name]
+                    for i, value in enumerate(result[output_name])
+                ]
+            elif output_config.type == 'array[object]':
+                # check if array of object available
+                if not isinstance(result[output_name], list):
+                    raise ValueError(
+                        f'Output {prefix}{dot}{output_name} is not an array, got {type(result.get(output_name))} instead.'
+                    )
+
+                if len(result[output_name]) > MAX_OBJECT_ARRAY_LENGTH:
+                    raise ValueError(
+                        f'{prefix}{dot}{output_name} in output form must be less than {MAX_OBJECT_ARRAY_LENGTH} characters.'
+                    )
+                
+                for i, value in enumerate(result[output_name]):
+                    if not isinstance(value, dict):
+                        raise ValueError(
+                            f'Output {prefix}{dot}{output_name}[{i}] is not an object, got {type(value)} instead at index {i}.'
+                        )
+
+                transformed_result[output_name] = [
+                    self._transform_result(
+                        result=value,
+                        output_schema=output_config.children,
+                        prefix=f'{prefix}{dot}{output_name}[{i}]',
+                        depth=depth + 1
+                    )
+                    for i, value in enumerate(result[output_name])
                 ]
             else:
                 raise ValueError(f'Output type {output_config.type} is not supported.')
diff --git a/api/core/workflow/nodes/code/entities.py b/api/core/workflow/nodes/code/entities.py
index 97e178f5df..555bb3918e 100644
--- a/api/core/workflow/nodes/code/entities.py
+++ b/api/core/workflow/nodes/code/entities.py
@@ -11,7 +11,7 @@ class CodeNodeData(BaseNodeData):
     Code Node Data.
     """
     class Output(BaseModel):
-        type: Literal['string', 'number', 'object', 'array[string]', 'array[number]']
+        type: Literal['string', 'number', 'object', 'array[string]', 'array[number]', 'array[object]']
         children: Optional[dict[str, 'Output']]
 
     variables: list[VariableSelector]
diff --git a/api/tests/integration_tests/workflow/nodes/test_code.py b/api/tests/integration_tests/workflow/nodes/test_code.py
index 0b7217b053..1b220a861e 100644
--- a/api/tests/integration_tests/workflow/nodes/test_code.py
+++ b/api/tests/integration_tests/workflow/nodes/test_code.py
@@ -227,7 +227,7 @@ def test_execute_code_output_validator_depth():
     # construct result
     result = {
         "number_validator": 1,
-        "string_validator": "1" * 2000,
+        "string_validator": "1" * 6000,
         "number_array_validator": [1, 2, 3, 3.333],
         "string_array_validator": ["1", "2", "3"],
         "object_validator": {
@@ -263,4 +263,76 @@ def test_execute_code_output_validator_depth():
     # validate
     with pytest.raises(ValueError):
         node._transform_result(result, node.node_data.outputs)
-        
\ No newline at end of file
+
+
+def test_execute_code_output_object_list():
+    code = '''
+    def main(args1: int, args2: int) -> dict:
+        return {
+            "result": {
+                "result": args1 + args2,
+            }
+        }
+    '''
+    # trim first 4 spaces at the beginning of each line
+    code = '\n'.join([line[4:] for line in code.split('\n')])
+    node = CodeNode(
+        tenant_id='1',
+        app_id='1',
+        workflow_id='1',
+        user_id='1',
+        user_from=InvokeFrom.WEB_APP,
+        config={
+            'id': '1',
+            'data': {
+                "outputs": {
+                    "object_list": {
+                        "type": "array[object]",
+                    },
+                },
+                'title': '123',
+                'variables': [
+                    {
+                        'variable': 'args1',
+                        'value_selector': ['1', '123', 'args1'],
+                    },
+                    {
+                        'variable': 'args2',
+                        'value_selector': ['1', '123', 'args2']
+                    }
+                ],
+                'answer': '123',
+                'code_language': 'python3',
+                'code': code
+            }
+        }
+    )
+
+    # construct result
+    result = {
+        "object_list": [{
+            "result": 1,
+        }, {
+            "result": 2,
+        }, {
+            "result": [1, 2, 3],
+        }]
+    }
+
+    # validate
+    node._transform_result(result, node.node_data.outputs)
+
+    # construct result
+    result = {
+        "object_list": [{
+            "result": 1,
+        }, {
+            "result": 2,
+        }, {
+            "result": [1, 2, 3],
+        }, 1]
+    }
+
+    # validate
+    with pytest.raises(ValueError):
+        node._transform_result(result, node.node_data.outputs)