From bd409a3caf91b99fb91b52405db6e7d39cb8baca Mon Sep 17 00:00:00 2001 From: Yeuoly Date: Wed, 20 Mar 2024 23:01:24 +0800 Subject: [PATCH] enhance: code node validator --- api/core/workflow/nodes/code/code_node.py | 91 +++++++++++++------ api/core/workflow/nodes/code/entities.py | 2 +- .../workflow/nodes/test_code.py | 76 +++++++++++++++- 3 files changed, 140 insertions(+), 29 deletions(-) diff --git a/api/core/workflow/nodes/code/code_node.py b/api/core/workflow/nodes/code/code_node.py index 3ac4f4b2e9..2ca5a9f8f9 100644 --- a/api/core/workflow/nodes/code/code_node.py +++ b/api/core/workflow/nodes/code/code_node.py @@ -13,6 +13,7 @@ MAX_PRECISION = 20 MAX_DEPTH = 5 MAX_STRING_LENGTH = 5000 MAX_STRING_ARRAY_LENGTH = 30 +MAX_OBJECT_ARRAY_LENGTH = 30 MAX_NUMBER_ARRAY_LENGTH = 1000 JAVASCRIPT_DEFAULT_CODE = """function main({arg1, arg2}) { @@ -200,20 +201,30 @@ class CodeNode(BaseNode): variable=f'{prefix}.{output_name}' if prefix else output_name ) elif isinstance(output_value, list): - if all(isinstance(value, int | float) for value in output_value): - for value in output_value: - self._check_number( - value=value, - variable=f'{prefix}.{output_name}' if prefix else output_name - ) - elif all(isinstance(value, str) for value in output_value): - for value in output_value: - self._check_string( - value=value, - variable=f'{prefix}.{output_name}' if prefix else output_name - ) - else: - raise ValueError(f'Output {prefix}.{output_name} is not a valid array. make sure all elements are of the same type.') + first_element = output_value[0] if len(output_value) > 0 else None + if first_element is not None: + if isinstance(first_element, int | float) and all(isinstance(value, int | float) for value in output_value): + for i, value in enumerate(output_value): + self._check_number( + value=value, + variable=f'{prefix}.{output_name}[{i}]' if prefix else f'{output_name}[{i}]' + ) + elif isinstance(first_element, str) and all(isinstance(value, str) for value in output_value): + for i, value in enumerate(output_value): + self._check_string( + value=value, + variable=f'{prefix}.{output_name}[{i}]' if prefix else f'{output_name}[{i}]' + ) + elif isinstance(first_element, dict) and all(isinstance(value, dict) for value in output_value): + for i, value in enumerate(output_value): + self._transform_result( + result=value, + output_schema=None, + prefix=f'{prefix}.{output_name}[{i}]' if prefix else f'{output_name}[{i}]', + depth=depth + 1 + ) + else: + raise ValueError(f'Output {prefix}.{output_name} is not a valid array. make sure all elements are of the same type.') else: raise ValueError(f'Output {prefix}.{output_name} is not a valid type.') @@ -221,68 +232,96 @@ class CodeNode(BaseNode): parameters_validated = {} for output_name, output_config in output_schema.items(): + dot = '.' if prefix else '' if output_config.type == 'object': # check if output is object if not isinstance(result.get(output_name), dict): raise ValueError( - f'Output {prefix}.{output_name} is not an object, got {type(result.get(output_name))} instead.' + f'Output {prefix}{dot}{output_name} is not an object, got {type(result.get(output_name))} instead.' ) transformed_result[output_name] = self._transform_result( result=result[output_name], output_schema=output_config.children, - prefix=f'{prefix}.{output_name}' if prefix else output_name, + prefix=f'{prefix}.{output_name}', depth=depth + 1 ) elif output_config.type == 'number': # check if number available transformed_result[output_name] = self._check_number( value=result[output_name], - variable=f'{prefix}.{output_name}' if prefix else output_name + variable=f'{prefix}{dot}{output_name}' ) elif output_config.type == 'string': # check if string available transformed_result[output_name] = self._check_string( value=result[output_name], - variable=f'{prefix}.{output_name}' if prefix else output_name, + variable=f'{prefix}{dot}{output_name}', ) elif output_config.type == 'array[number]': # check if array of number available if not isinstance(result[output_name], list): raise ValueError( - f'Output {prefix}.{output_name} is not an array, got {type(result.get(output_name))} instead.' + f'Output {prefix}{dot}{output_name} is not an array, got {type(result.get(output_name))} instead.' ) if len(result[output_name]) > MAX_NUMBER_ARRAY_LENGTH: raise ValueError( - f'{prefix}.{output_name} in output form must be less than {MAX_NUMBER_ARRAY_LENGTH} characters' + f'{prefix}{dot}{output_name} in output form must be less than {MAX_NUMBER_ARRAY_LENGTH} characters.' ) transformed_result[output_name] = [ self._check_number( value=value, - variable=f'{prefix}.{output_name}' if prefix else output_name + variable=f'{prefix}{dot}{output_name}[{i}]' ) - for value in result[output_name] + for i, value in enumerate(result[output_name]) ] elif output_config.type == 'array[string]': # check if array of string available if not isinstance(result[output_name], list): raise ValueError( - f'Output {prefix}.{output_name} is not an array, got {type(result.get(output_name))} instead.' + f'Output {prefix}{dot}{output_name} is not an array, got {type(result.get(output_name))} instead.' ) if len(result[output_name]) > MAX_STRING_ARRAY_LENGTH: raise ValueError( - f'{prefix}.{output_name} in output form must be less than {MAX_STRING_ARRAY_LENGTH} characters' + f'{prefix}{dot}{output_name} in output form must be less than {MAX_STRING_ARRAY_LENGTH} characters.' ) transformed_result[output_name] = [ self._check_string( value=value, - variable=f'{prefix}.{output_name}' if prefix else output_name + variable=f'{prefix}{dot}{output_name}[{i}]' ) - for value in result[output_name] + for i, value in enumerate(result[output_name]) + ] + elif output_config.type == 'array[object]': + # check if array of object available + if not isinstance(result[output_name], list): + raise ValueError( + f'Output {prefix}{dot}{output_name} is not an array, got {type(result.get(output_name))} instead.' + ) + + if len(result[output_name]) > MAX_OBJECT_ARRAY_LENGTH: + raise ValueError( + f'{prefix}{dot}{output_name} in output form must be less than {MAX_OBJECT_ARRAY_LENGTH} characters.' + ) + + for i, value in enumerate(result[output_name]): + if not isinstance(value, dict): + raise ValueError( + f'Output {prefix}{dot}{output_name}[{i}] is not an object, got {type(value)} instead at index {i}.' + ) + + transformed_result[output_name] = [ + self._transform_result( + result=value, + output_schema=output_config.children, + prefix=f'{prefix}{dot}{output_name}[{i}]', + depth=depth + 1 + ) + for i, value in enumerate(result[output_name]) ] else: raise ValueError(f'Output type {output_config.type} is not supported.') diff --git a/api/core/workflow/nodes/code/entities.py b/api/core/workflow/nodes/code/entities.py index 97e178f5df..555bb3918e 100644 --- a/api/core/workflow/nodes/code/entities.py +++ b/api/core/workflow/nodes/code/entities.py @@ -11,7 +11,7 @@ class CodeNodeData(BaseNodeData): Code Node Data. """ class Output(BaseModel): - type: Literal['string', 'number', 'object', 'array[string]', 'array[number]'] + type: Literal['string', 'number', 'object', 'array[string]', 'array[number]', 'array[object]'] children: Optional[dict[str, 'Output']] variables: list[VariableSelector] diff --git a/api/tests/integration_tests/workflow/nodes/test_code.py b/api/tests/integration_tests/workflow/nodes/test_code.py index 0b7217b053..1b220a861e 100644 --- a/api/tests/integration_tests/workflow/nodes/test_code.py +++ b/api/tests/integration_tests/workflow/nodes/test_code.py @@ -227,7 +227,7 @@ def test_execute_code_output_validator_depth(): # construct result result = { "number_validator": 1, - "string_validator": "1" * 2000, + "string_validator": "1" * 6000, "number_array_validator": [1, 2, 3, 3.333], "string_array_validator": ["1", "2", "3"], "object_validator": { @@ -263,4 +263,76 @@ def test_execute_code_output_validator_depth(): # validate with pytest.raises(ValueError): node._transform_result(result, node.node_data.outputs) - \ No newline at end of file + + +def test_execute_code_output_object_list(): + code = ''' + def main(args1: int, args2: int) -> dict: + return { + "result": { + "result": args1 + args2, + } + } + ''' + # trim first 4 spaces at the beginning of each line + code = '\n'.join([line[4:] for line in code.split('\n')]) + node = CodeNode( + tenant_id='1', + app_id='1', + workflow_id='1', + user_id='1', + user_from=InvokeFrom.WEB_APP, + config={ + 'id': '1', + 'data': { + "outputs": { + "object_list": { + "type": "array[object]", + }, + }, + 'title': '123', + 'variables': [ + { + 'variable': 'args1', + 'value_selector': ['1', '123', 'args1'], + }, + { + 'variable': 'args2', + 'value_selector': ['1', '123', 'args2'] + } + ], + 'answer': '123', + 'code_language': 'python3', + 'code': code + } + } + ) + + # construct result + result = { + "object_list": [{ + "result": 1, + }, { + "result": 2, + }, { + "result": [1, 2, 3], + }] + } + + # validate + node._transform_result(result, node.node_data.outputs) + + # construct result + result = { + "object_list": [{ + "result": 1, + }, { + "result": 2, + }, { + "result": [1, 2, 3], + }, 1] + } + + # validate + with pytest.raises(ValueError): + node._transform_result(result, node.node_data.outputs)