From 9ccbfbaf9d56c18549ab5f5ad16561c624e17441 Mon Sep 17 00:00:00 2001 From: Manan Bansal <66985466+manan-tech@users.noreply.github.com> Date: Tue, 16 Jun 2026 13:12:51 +0530 Subject: [PATCH] fix(api): add bounded timeouts to Nacos remote settings HTTP requests (#37444) Co-authored-by: Claude Opus 4.8 (1M context) Co-authored-by: Crazywoola <100913391+crazywoola@users.noreply.github.com> --- api/.env.example | 8 +++ .../nacos/http_request.py | 18 ++++++- .../configs/test_nacos_http_client.py | 51 +++++++++++++++++++ docker/envs/core-services/shared.env.example | 6 +++ 4 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 api/tests/unit_tests/configs/test_nacos_http_client.py diff --git a/api/.env.example b/api/.env.example index f645ba7bf0..8a2af53c6e 100644 --- a/api/.env.example +++ b/api/.env.example @@ -774,3 +774,11 @@ EVENT_BUS_LISTENER_JOIN_TIMEOUT_MS=2000 ENABLE_HUMAN_INPUT_TIMEOUT_TASK=true # Human input timeout check interval in minutes HUMAN_INPUT_TIMEOUT_TASK_INTERVAL=1 + +# Nacos remote settings source HTTP timeouts (seconds). +# Bound how long requests to the Nacos endpoint wait before failing, so a slow or +# unresponsive Nacos server cannot stall API startup or token refresh. +# Read timeout for Nacos requests (default: 10.0) +DIFY_ENV_NACOS_REQUEST_TIMEOUT=10.0 +# Connect timeout for Nacos requests (default: 3.0) +DIFY_ENV_NACOS_CONNECT_TIMEOUT=3.0 diff --git a/api/configs/remote_settings_sources/nacos/http_request.py b/api/configs/remote_settings_sources/nacos/http_request.py index 1a0744a21b..a15b6aeaae 100644 --- a/api/configs/remote_settings_sources/nacos/http_request.py +++ b/api/configs/remote_settings_sources/nacos/http_request.py @@ -20,6 +20,12 @@ class NacosHttpClient: self.token: str | None = None self.token_ttl = 18000 self.token_expire_time: float = 0 + # Bounded timeouts so a slow or unresponsive Nacos server cannot hang the API + # service indefinitely during startup or token refresh. + self.timeout = httpx.Timeout( + float(os.getenv("DIFY_ENV_NACOS_REQUEST_TIMEOUT", "10.0")), + connect=float(os.getenv("DIFY_ENV_NACOS_CONNECT_TIMEOUT", "3.0")), + ) def http_request( self, url: str, method: str = "GET", headers: dict[str, str] | None = None, params: dict[str, str] | None = None @@ -28,12 +34,17 @@ class NacosHttpClient: headers = {} if params is None: params = {} + full_url = "http://" + self.server + url try: self._inject_auth_info(headers, params) - response = httpx.request(method, url="http://" + self.server + url, headers=headers, params=params) + response = httpx.request(method, url=full_url, headers=headers, params=params, timeout=self.timeout) response.raise_for_status() return response.text + except httpx.TimeoutException as e: + logger.warning("Request to Nacos timed out (url=%s, timeout=%s): %s", full_url, self.timeout, e) + return f"Request to Nacos timed out: {e}" except httpx.RequestError as e: + logger.warning("Request to Nacos failed (url=%s): %s", full_url, e) return f"Request to Nacos failed: {e}" def _inject_auth_info(self, headers: dict[str, str], params: dict[str, str], module: str = "config") -> None: @@ -78,13 +89,16 @@ class NacosHttpClient: params = {"username": self.username, "password": self.password} url = "http://" + self.server + "/nacos/v1/auth/login" try: - resp = httpx.request("POST", url, headers=None, params=params) + resp = httpx.request("POST", url, headers=None, params=params, timeout=self.timeout) resp.raise_for_status() response_data = resp.json() self.token = response_data.get("accessToken") self.token_ttl = response_data.get("tokenTtl", 18000) self.token_expire_time = current_time + self.token_ttl - 10 return self.token + except httpx.TimeoutException: + logger.exception("[get-access-token] request to Nacos timed out (url=%s, timeout=%s)", url, self.timeout) + raise except Exception: logger.exception("[get-access-token] exception occur") raise diff --git a/api/tests/unit_tests/configs/test_nacos_http_client.py b/api/tests/unit_tests/configs/test_nacos_http_client.py new file mode 100644 index 0000000000..855a1a8acc --- /dev/null +++ b/api/tests/unit_tests/configs/test_nacos_http_client.py @@ -0,0 +1,51 @@ +from unittest.mock import MagicMock, patch + +import httpx + +from configs.remote_settings_sources.nacos.http_request import NacosHttpClient + + +def _ok_response(text: str = "ok", json_data: dict | None = None) -> MagicMock: + response = MagicMock() + response.text = text + response.raise_for_status.return_value = None + if json_data is not None: + response.json.return_value = json_data + return response + + +def test_http_request_passes_bounded_timeout(): + client = NacosHttpClient() + with patch("configs.remote_settings_sources.nacos.http_request.httpx.request") as mock_request: + mock_request.return_value = _ok_response() + client.http_request("/nacos/v1/cs/configs") + + timeout = mock_request.call_args.kwargs["timeout"] + assert isinstance(timeout, httpx.Timeout) + assert timeout.read is not None + assert timeout.connect is not None + + +def test_http_request_returns_graceful_message_on_timeout(): + client = NacosHttpClient() + with patch( + "configs.remote_settings_sources.nacos.http_request.httpx.request", + side_effect=httpx.ConnectTimeout("connection timed out"), + ): + result = client.http_request("/nacos/v1/cs/configs") + + assert "Nacos" in result + assert "timed out" in result.lower() + + +def test_get_access_token_passes_bounded_timeout(): + client = NacosHttpClient() + client.username = "user" + client.password = "pass" + with patch("configs.remote_settings_sources.nacos.http_request.httpx.request") as mock_request: + mock_request.return_value = _ok_response(json_data={"accessToken": "tok", "tokenTtl": 100}) + token = client.get_access_token(force_refresh=True) + + assert token == "tok" + timeout = mock_request.call_args.kwargs["timeout"] + assert isinstance(timeout, httpx.Timeout) diff --git a/docker/envs/core-services/shared.env.example b/docker/envs/core-services/shared.env.example index 49a8d9bbaa..0cc840d2a4 100644 --- a/docker/envs/core-services/shared.env.example +++ b/docker/envs/core-services/shared.env.example @@ -481,5 +481,11 @@ MILVUS_ENABLE_HYBRID_SEARCH=False ENABLE_HUMAN_INPUT_TIMEOUT_TASK=true HUMAN_INPUT_TIMEOUT_TASK_INTERVAL=1 +# Nacos remote settings source HTTP timeouts (seconds). +# Bound how long requests to the Nacos endpoint wait before failing, so a slow or +# unresponsive Nacos server cannot stall API startup or token refresh. +DIFY_ENV_NACOS_REQUEST_TIMEOUT=10.0 +DIFY_ENV_NACOS_CONNECT_TIMEOUT=3.0 + # uv cache dir UV_CACHE_DIR=/tmp/uv_cache