diff --git a/api/core/datasource/utils/parser.py b/api/core/datasource/utils/parser.py deleted file mode 100644 index db1766a059..0000000000 --- a/api/core/datasource/utils/parser.py +++ /dev/null @@ -1,388 +0,0 @@ -import re -import uuid -from json import dumps as json_dumps -from json import loads as json_loads -from json.decoder import JSONDecodeError - -from flask import request -from requests import get -from yaml import YAMLError, safe_load # type: ignore - -from core.tools.entities.common_entities import I18nObject -from core.tools.entities.tool_bundle import ApiToolBundle -from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter -from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError - - -class ApiBasedToolSchemaParser: - @staticmethod - def parse_openapi_to_tool_bundle( - openapi: dict, extra_info: dict | None = None, warning: dict | None = None - ) -> list[ApiToolBundle]: - warning = warning if warning is not None else {} - extra_info = extra_info if extra_info is not None else {} - - # set description to extra_info - extra_info["description"] = openapi["info"].get("description", "") - - if len(openapi["servers"]) == 0: - raise ToolProviderNotFoundError("No server found in the openapi yaml.") - - server_url = openapi["servers"][0]["url"] - request_env = request.headers.get("X-Request-Env") - if request_env: - matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env] - server_url = matched_servers[0] if matched_servers else server_url - - # list all interfaces - interfaces = [] - for path, path_item in openapi["paths"].items(): - methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"] - for method in methods: - if method in path_item: - interfaces.append( - { - "path": path, - "method": method, - "operation": path_item[method], - } - ) - - # get all parameters - bundles = [] - for interface in interfaces: - # convert parameters - parameters = [] - if "parameters" in interface["operation"]: - for parameter in interface["operation"]["parameters"]: - tool_parameter = ToolParameter( - name=parameter["name"], - label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]), - human_description=I18nObject( - en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "") - ), - type=ToolParameter.ToolParameterType.STRING, - required=parameter.get("required", False), - form=ToolParameter.ToolParameterForm.LLM, - llm_description=parameter.get("description"), - default=parameter["schema"]["default"] - if "schema" in parameter and "default" in parameter["schema"] - else None, - placeholder=I18nObject( - en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "") - ), - ) - - # check if there is a type - typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter) - if typ: - tool_parameter.type = typ - - parameters.append(tool_parameter) - # create tool bundle - # check if there is a request body - if "requestBody" in interface["operation"]: - request_body = interface["operation"]["requestBody"] - if "content" in request_body: - for content_type, content in request_body["content"].items(): - # if there is a reference, get the reference and overwrite the content - if "schema" not in content: - continue - - if "$ref" in content["schema"]: - # get the reference - root = openapi - reference = content["schema"]["$ref"].split("/")[1:] - for ref in reference: - root = root[ref] - # overwrite the content - interface["operation"]["requestBody"]["content"][content_type]["schema"] = root - - # parse body parameters - if "schema" in interface["operation"]["requestBody"]["content"][content_type]: # pyright: ignore[reportIndexIssue, reportPossiblyUnboundVariable] - body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"] # pyright: ignore[reportIndexIssue, reportPossiblyUnboundVariable] - required = body_schema.get("required", []) - properties = body_schema.get("properties", {}) - for name, property in properties.items(): - tool = ToolParameter( - name=name, - label=I18nObject(en_US=name, zh_Hans=name), - human_description=I18nObject( - en_US=property.get("description", ""), zh_Hans=property.get("description", "") - ), - type=ToolParameter.ToolParameterType.STRING, - required=name in required, - form=ToolParameter.ToolParameterForm.LLM, - llm_description=property.get("description", ""), - default=property.get("default", None), - placeholder=I18nObject( - en_US=property.get("description", ""), zh_Hans=property.get("description", "") - ), - ) - - # check if there is a type - typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property) - if typ: - tool.type = typ - - parameters.append(tool) - - # check if parameters is duplicated - parameters_count = {} - for parameter in parameters: - if parameter.name not in parameters_count: - parameters_count[parameter.name] = 0 - parameters_count[parameter.name] += 1 - for name, count in parameters_count.items(): - if count > 1: - warning["duplicated_parameter"] = f"Parameter {name} is duplicated." - - # check if there is a operation id, use $path_$method as operation id if not - if "operationId" not in interface["operation"]: - # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$ - path = interface["path"] - if interface["path"].startswith("/"): - path = interface["path"][1:] - # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$ - path = re.sub(r"[^a-zA-Z0-9_-]", "", path) - if not path: - path = str(uuid.uuid4()) - - interface["operation"]["operationId"] = f"{path}_{interface['method']}" - - bundles.append( - ApiToolBundle( - server_url=server_url + interface["path"], - method=interface["method"], - summary=interface["operation"]["description"] - if "description" in interface["operation"] - else interface["operation"].get("summary", None), - operation_id=interface["operation"]["operationId"], - parameters=parameters, - author="", - icon=None, - openapi=interface["operation"], - ) - ) - - return bundles - - @staticmethod - def _get_tool_parameter_type(parameter: dict) -> ToolParameter.ToolParameterType | None: - parameter = parameter or {} - typ: str | None = None - if parameter.get("format") == "binary": - return ToolParameter.ToolParameterType.FILE - - if "type" in parameter: - typ = parameter["type"] - elif "schema" in parameter and "type" in parameter["schema"]: - typ = parameter["schema"]["type"] - - if typ in {"integer", "number"}: - return ToolParameter.ToolParameterType.NUMBER - elif typ == "boolean": - return ToolParameter.ToolParameterType.BOOLEAN - elif typ == "string": - return ToolParameter.ToolParameterType.STRING - elif typ == "array": - items = parameter.get("items") or parameter.get("schema", {}).get("items") - return ToolParameter.ToolParameterType.FILES if items and items.get("format") == "binary" else None - else: - return None - - @staticmethod - def parse_openapi_yaml_to_tool_bundle( - yaml: str, extra_info: dict | None = None, warning: dict | None = None - ) -> list[ApiToolBundle]: - """ - parse openapi yaml to tool bundle - - :param yaml: the yaml string - :param extra_info: the extra info - :param warning: the warning message - :return: the tool bundle - """ - warning = warning if warning is not None else {} - extra_info = extra_info if extra_info is not None else {} - - openapi: dict = safe_load(yaml) - if openapi is None: - raise ToolApiSchemaError("Invalid openapi yaml.") - return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning) - - @staticmethod - def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict: - warning = warning or {} - """ - parse swagger to openapi - - :param swagger: the swagger dict - :return: the openapi dict - """ - # convert swagger to openapi - info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"}) - - servers = swagger.get("servers", []) - - if len(servers) == 0: - raise ToolApiSchemaError("No server found in the swagger yaml.") - - openapi = { - "openapi": "3.0.0", - "info": { - "title": info.get("title", "Swagger"), - "description": info.get("description", "Swagger"), - "version": info.get("version", "1.0.0"), - }, - "servers": swagger["servers"], - "paths": {}, - "components": {"schemas": {}}, - } - - # check paths - if "paths" not in swagger or len(swagger["paths"]) == 0: - raise ToolApiSchemaError("No paths found in the swagger yaml.") - - # convert paths - for path, path_item in swagger["paths"].items(): - openapi["paths"][path] = {} # pyright: ignore[reportIndexIssue] - for method, operation in path_item.items(): - if "operationId" not in operation: - raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.") - - if ("summary" not in operation or len(operation["summary"]) == 0) and ( - "description" not in operation or len(operation["description"]) == 0 - ): - if warning is not None: - warning["missing_summary"] = f"No summary or description found in operation {method} {path}." - - openapi["paths"][path][method] = { # pyright: ignore[reportIndexIssue] - "operationId": operation["operationId"], - "summary": operation.get("summary", ""), - "description": operation.get("description", ""), - "parameters": operation.get("parameters", []), - "responses": operation.get("responses", {}), - } - - if "requestBody" in operation: - openapi["paths"][path][method]["requestBody"] = operation["requestBody"] # pyright: ignore[reportIndexIssue] - - # convert definitions - for name, definition in swagger["definitions"].items(): - openapi["components"]["schemas"][name] = definition # pyright: ignore[reportIndexIssue, reportArgumentType] - - return openapi - - @staticmethod - def parse_openai_plugin_json_to_tool_bundle( - json: str, extra_info: dict | None = None, warning: dict | None = None - ) -> list[ApiToolBundle]: - """ - parse openapi plugin yaml to tool bundle - - :param json: the json string - :param extra_info: the extra info - :param warning: the warning message - :return: the tool bundle - """ - warning = warning if warning is not None else {} - extra_info = extra_info if extra_info is not None else {} - - try: - openai_plugin = json_loads(json) - api = openai_plugin["api"] - api_url = api["url"] - api_type = api["type"] - except JSONDecodeError: - raise ToolProviderNotFoundError("Invalid openai plugin json.") - - if api_type != "openapi": - raise ToolNotSupportedError("Only openapi is supported now.") - - # get openapi yaml - response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5) - - if response.status_code != 200: - raise ToolProviderNotFoundError("cannot get openapi yaml from url.") - - return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle( - response.text, extra_info=extra_info, warning=warning - ) - - @staticmethod - def auto_parse_to_tool_bundle( - content: str, extra_info: dict | None = None, warning: dict | None = None - ) -> tuple[list[ApiToolBundle], str]: - """ - auto parse to tool bundle - - :param content: the content - :param extra_info: the extra info - :param warning: the warning message - :return: tools bundle, schema_type - """ - warning = warning if warning is not None else {} - extra_info = extra_info if extra_info is not None else {} - - content = content.strip() - loaded_content = None - json_error = None - yaml_error = None - - try: - loaded_content = json_loads(content) - except JSONDecodeError as e: - json_error = e - - if loaded_content is None: - try: - loaded_content = safe_load(content) - except YAMLError as e: - yaml_error = e - if loaded_content is None: - raise ToolApiSchemaError( - f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)}," - f" yaml error: {str(yaml_error)}" - ) - - swagger_error = None - openapi_error = None - openapi_plugin_error = None - schema_type = None - - try: - openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle( - loaded_content, extra_info=extra_info, warning=warning - ) - schema_type = ApiProviderSchemaType.OPENAPI.value - return openapi, schema_type - except ToolApiSchemaError as e: - openapi_error = e - - # openai parse error, fallback to swagger - try: - converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi( - loaded_content, extra_info=extra_info, warning=warning - ) - schema_type = ApiProviderSchemaType.SWAGGER.value - return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle( - converted_swagger, extra_info=extra_info, warning=warning - ), schema_type - except ToolApiSchemaError as e: - swagger_error = e - - # swagger parse error, fallback to openai plugin - try: - openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( - json_dumps(loaded_content), extra_info=extra_info, warning=warning - ) - return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value - except ToolNotSupportedError as e: - # maybe it's not plugin at all - openapi_plugin_error = e - - raise ToolApiSchemaError( - f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)}," - f" openapi plugin error: {str(openapi_plugin_error)}" - ) diff --git a/api/core/datasource/utils/text_processing_utils.py b/api/core/datasource/utils/text_processing_utils.py deleted file mode 100644 index 105823f896..0000000000 --- a/api/core/datasource/utils/text_processing_utils.py +++ /dev/null @@ -1,17 +0,0 @@ -import re - - -def remove_leading_symbols(text: str) -> str: - """ - Remove leading punctuation or symbols from the given text. - - Args: - text (str): The input text to process. - - Returns: - str: The text with leading punctuation or symbols removed. - """ - # Match Unicode ranges for punctuation and symbols - # FIXME this pattern is confused quick fix for #11868 maybe refactor it later - pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+" - return re.sub(pattern, "", text) diff --git a/api/core/datasource/utils/uuid_utils.py b/api/core/datasource/utils/uuid_utils.py deleted file mode 100644 index 3046c08c89..0000000000 --- a/api/core/datasource/utils/uuid_utils.py +++ /dev/null @@ -1,9 +0,0 @@ -import uuid - - -def is_valid_uuid(uuid_str: str) -> bool: - try: - uuid.UUID(uuid_str) - return True - except Exception: - return False diff --git a/api/core/datasource/utils/workflow_configuration_sync.py b/api/core/datasource/utils/workflow_configuration_sync.py deleted file mode 100644 index d16d6fc576..0000000000 --- a/api/core/datasource/utils/workflow_configuration_sync.py +++ /dev/null @@ -1,43 +0,0 @@ -from collections.abc import Mapping, Sequence -from typing import Any - -from core.app.app_config.entities import VariableEntity -from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration - - -class WorkflowToolConfigurationUtils: - @classmethod - def check_parameter_configurations(cls, configurations: list[Mapping[str, Any]]): - for configuration in configurations: - WorkflowToolParameterConfiguration.model_validate(configuration) - - @classmethod - def get_workflow_graph_variables(cls, graph: Mapping[str, Any]) -> Sequence[VariableEntity]: - """ - get workflow graph variables - """ - nodes = graph.get("nodes", []) - start_node = next(filter(lambda x: x.get("data", {}).get("type") == "start", nodes), None) - - if not start_node: - return [] - - return [VariableEntity.model_validate(variable) for variable in start_node.get("data", {}).get("variables", [])] - - @classmethod - def check_is_synced( - cls, variables: list[VariableEntity], tool_configurations: list[WorkflowToolParameterConfiguration] - ): - """ - check is synced - - raise ValueError if not synced - """ - variable_names = [variable.variable for variable in variables] - - if len(tool_configurations) != len(variables): - raise ValueError("parameter configuration mismatch, please republish the tool to update") - - for parameter in tool_configurations: - if parameter.name not in variable_names: - raise ValueError("parameter configuration mismatch, please republish the tool to update") diff --git a/api/core/datasource/utils/yaml_utils.py b/api/core/datasource/utils/yaml_utils.py deleted file mode 100644 index ee7ca11e05..0000000000 --- a/api/core/datasource/utils/yaml_utils.py +++ /dev/null @@ -1,35 +0,0 @@ -import logging -from pathlib import Path -from typing import Any - -import yaml # type: ignore -from yaml import YAMLError - -logger = logging.getLogger(__name__) - - -def load_yaml_file(file_path: str, ignore_error: bool = True, default_value: Any = {}) -> Any: - """ - Safe loading a YAML file - :param file_path: the path of the YAML file - :param ignore_error: - if True, return default_value if error occurs and the error will be logged in debug level - if False, raise error if error occurs - :param default_value: the value returned when errors ignored - :return: an object of the YAML content - """ - if not file_path or not Path(file_path).exists(): - if ignore_error: - return default_value - else: - raise FileNotFoundError(f"File not found: {file_path}") - - with open(file_path, encoding="utf-8") as yaml_file: - try: - yaml_content = yaml.safe_load(yaml_file) - return yaml_content or default_value - except Exception as e: - if ignore_error: - return default_value - else: - raise YAMLError(f"Failed to load YAML file {file_path}: {e}") from e