From e83a4090ac3dc93f5f43ce44910820721c561e6e Mon Sep 17 00:00:00 2001 From: kurokobo Date: Mon, 6 Apr 2026 11:26:26 +0900 Subject: [PATCH] fix: lighten the health checks for the Worker and Worker Beat services, and disable them by default (#34572) --- api/celery_healthcheck.py | 18 ++++++++++++ api/extensions/ext_celery.py | 28 +++++++++++-------- .../unit_tests/extensions/test_celery_ssl.py | 24 ++++++++-------- docker/.env.example | 12 ++++++++ docker/docker-compose-template.yaml | 14 ++++++---- docker/docker-compose.yaml | 14 ++++++---- docker/generate_docker_compose | 19 +++++++++++-- 7 files changed, 91 insertions(+), 38 deletions(-) create mode 100644 api/celery_healthcheck.py diff --git a/api/celery_healthcheck.py b/api/celery_healthcheck.py new file mode 100644 index 0000000000..23d856d7d0 --- /dev/null +++ b/api/celery_healthcheck.py @@ -0,0 +1,18 @@ +# This module provides a lightweight Celery instance for use in Docker health checks. +# Unlike celery_entrypoint.py, this does NOT import app.py and therefore avoids +# initializing all Flask extensions (DB, Redis, storage, blueprints, etc.). +# Using this module keeps the health check fast and low-cost. +from celery import Celery + +from configs import dify_config +from extensions.ext_celery import get_celery_broker_transport_options, get_celery_ssl_options + +celery = Celery(broker=dify_config.CELERY_BROKER_URL) + +broker_transport_options = get_celery_broker_transport_options() +if broker_transport_options: + celery.conf.update(broker_transport_options=broker_transport_options) + +ssl_options = get_celery_ssl_options() +if ssl_options: + celery.conf.update(broker_use_ssl=ssl_options) diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 4eed34436a..1b3ccd1207 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -10,7 +10,7 @@ from configs import dify_config from dify_app import DifyApp -def _get_celery_ssl_options() -> dict[str, Any] | None: +def get_celery_ssl_options() -> dict[str, Any] | None: """Get SSL configuration for Celery broker/backend connections.""" # Only apply SSL if we're using Redis as broker/backend if not dify_config.BROKER_USE_SSL: @@ -43,6 +43,19 @@ def _get_celery_ssl_options() -> dict[str, Any] | None: return ssl_options +def get_celery_broker_transport_options() -> dict[str, Any]: + """Get broker transport options (e.g. Redis Sentinel) for Celery connections.""" + if dify_config.CELERY_USE_SENTINEL: + return { + "master_name": dify_config.CELERY_SENTINEL_MASTER_NAME, + "sentinel_kwargs": { + "socket_timeout": dify_config.CELERY_SENTINEL_SOCKET_TIMEOUT, + "password": dify_config.CELERY_SENTINEL_PASSWORD, + }, + } + return {} + + def init_app(app: DifyApp) -> Celery: class FlaskTask(Task): def __call__(self, *args: object, **kwargs: object) -> object: @@ -53,16 +66,7 @@ def init_app(app: DifyApp) -> Celery: init_request_context() return self.run(*args, **kwargs) - broker_transport_options = {} - - if dify_config.CELERY_USE_SENTINEL: - broker_transport_options = { - "master_name": dify_config.CELERY_SENTINEL_MASTER_NAME, - "sentinel_kwargs": { - "socket_timeout": dify_config.CELERY_SENTINEL_SOCKET_TIMEOUT, - "password": dify_config.CELERY_SENTINEL_PASSWORD, - }, - } + broker_transport_options = get_celery_broker_transport_options() celery_app = Celery( app.name, @@ -89,7 +93,7 @@ def init_app(app: DifyApp) -> Celery: ) # Apply SSL configuration if enabled - ssl_options = _get_celery_ssl_options() + ssl_options = get_celery_ssl_options() if ssl_options: celery_app.conf.update( broker_use_ssl=ssl_options, diff --git a/api/tests/unit_tests/extensions/test_celery_ssl.py b/api/tests/unit_tests/extensions/test_celery_ssl.py index 2ec7d6b4fc..81687ce5f8 100644 --- a/api/tests/unit_tests/extensions/test_celery_ssl.py +++ b/api/tests/unit_tests/extensions/test_celery_ssl.py @@ -14,9 +14,9 @@ class TestCelerySSLConfiguration: dify_config = DifyConfig(CELERY_BROKER_URL="redis://localhost:6379/0") with patch("extensions.ext_celery.dify_config", dify_config): - from extensions.ext_celery import _get_celery_ssl_options + from extensions.ext_celery import get_celery_ssl_options - result = _get_celery_ssl_options() + result = get_celery_ssl_options() assert result is None def test_get_celery_ssl_options_when_broker_not_redis(self): @@ -25,9 +25,9 @@ class TestCelerySSLConfiguration: mock_config.CELERY_BROKER_URL = "amqp://localhost:5672" with patch("extensions.ext_celery.dify_config", mock_config): - from extensions.ext_celery import _get_celery_ssl_options + from extensions.ext_celery import get_celery_ssl_options - result = _get_celery_ssl_options() + result = get_celery_ssl_options() assert result is None def test_get_celery_ssl_options_with_cert_none(self): @@ -40,9 +40,9 @@ class TestCelerySSLConfiguration: mock_config.REDIS_SSL_KEYFILE = None with patch("extensions.ext_celery.dify_config", mock_config): - from extensions.ext_celery import _get_celery_ssl_options + from extensions.ext_celery import get_celery_ssl_options - result = _get_celery_ssl_options() + result = get_celery_ssl_options() assert result is not None assert result["ssl_cert_reqs"] == ssl.CERT_NONE assert result["ssl_ca_certs"] is None @@ -59,9 +59,9 @@ class TestCelerySSLConfiguration: mock_config.REDIS_SSL_KEYFILE = "/path/to/client.key" with patch("extensions.ext_celery.dify_config", mock_config): - from extensions.ext_celery import _get_celery_ssl_options + from extensions.ext_celery import get_celery_ssl_options - result = _get_celery_ssl_options() + result = get_celery_ssl_options() assert result is not None assert result["ssl_cert_reqs"] == ssl.CERT_REQUIRED assert result["ssl_ca_certs"] == "/path/to/ca.crt" @@ -78,9 +78,9 @@ class TestCelerySSLConfiguration: mock_config.REDIS_SSL_KEYFILE = None with patch("extensions.ext_celery.dify_config", mock_config): - from extensions.ext_celery import _get_celery_ssl_options + from extensions.ext_celery import get_celery_ssl_options - result = _get_celery_ssl_options() + result = get_celery_ssl_options() assert result is not None assert result["ssl_cert_reqs"] == ssl.CERT_OPTIONAL assert result["ssl_ca_certs"] == "/path/to/ca.crt" @@ -95,9 +95,9 @@ class TestCelerySSLConfiguration: mock_config.REDIS_SSL_KEYFILE = None with patch("extensions.ext_celery.dify_config", mock_config): - from extensions.ext_celery import _get_celery_ssl_options + from extensions.ext_celery import get_celery_ssl_options - result = _get_celery_ssl_options() + result = get_celery_ssl_options() assert result is not None assert result["ssl_cert_reqs"] == ssl.CERT_NONE # Should default to CERT_NONE diff --git a/docker/.env.example b/docker/.env.example index b2d6244b46..f20d57c71a 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -1358,6 +1358,18 @@ SSRF_POOL_KEEPALIVE_EXPIRY=5.0 # ------------------------------ COMPOSE_PROFILES=${VECTOR_STORE:-weaviate},${DB_TYPE:-postgresql} +# ------------------------------ +# Worker health check configuration for worker and worker_beat services. +# Set to false to enable the health check. +# Note: enabling the health check may cause periodic CPU spikes and increased load, +# as it establishes a broker connection and sends a Celery ping on every check interval. +# ------------------------------ +COMPOSE_WORKER_HEALTHCHECK_DISABLED=true +# Interval between health checks (e.g. 30s, 1m) +COMPOSE_WORKER_HEALTHCHECK_INTERVAL=30s +# Timeout for each health check (e.g. 30s, 1m) +COMPOSE_WORKER_HEALTHCHECK_TIMEOUT=30s + # ------------------------------ # Docker Compose Service Expose Host Port Configurations # ------------------------------ diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 57584cb829..5234202a62 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -102,11 +102,12 @@ services: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage healthcheck: - test: ["CMD-SHELL", "celery -A celery_entrypoint.celery inspect ping"] - interval: 30s - timeout: 10s + test: ["CMD-SHELL", "celery -A celery_healthcheck.celery inspect ping"] + interval: ${COMPOSE_WORKER_HEALTHCHECK_INTERVAL:-30s} + timeout: ${COMPOSE_WORKER_HEALTHCHECK_TIMEOUT:-30s} retries: 3 start_period: 60s + disable: ${COMPOSE_WORKER_HEALTHCHECK_DISABLED:-true} networks: - ssrf_proxy_network - default @@ -139,11 +140,12 @@ services: redis: condition: service_started healthcheck: - test: ["CMD-SHELL", "celery -A app.celery inspect ping"] - interval: 30s - timeout: 10s + test: ["CMD-SHELL", "celery -A celery_healthcheck.celery inspect ping"] + interval: ${COMPOSE_WORKER_HEALTHCHECK_INTERVAL:-30s} + timeout: ${COMPOSE_WORKER_HEALTHCHECK_TIMEOUT:-30s} retries: 3 start_period: 60s + disable: ${COMPOSE_WORKER_HEALTHCHECK_DISABLED:-true} networks: - ssrf_proxy_network - default diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 097fadc959..d03835e2b0 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -811,11 +811,12 @@ services: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage healthcheck: - test: ["CMD-SHELL", "celery -A celery_entrypoint.celery inspect ping"] - interval: 30s - timeout: 10s + test: ["CMD-SHELL", "celery -A celery_healthcheck.celery inspect ping"] + interval: ${COMPOSE_WORKER_HEALTHCHECK_INTERVAL:-30s} + timeout: ${COMPOSE_WORKER_HEALTHCHECK_TIMEOUT:-30s} retries: 3 start_period: 60s + disable: ${COMPOSE_WORKER_HEALTHCHECK_DISABLED:-true} networks: - ssrf_proxy_network - default @@ -848,11 +849,12 @@ services: redis: condition: service_started healthcheck: - test: ["CMD-SHELL", "celery -A app.celery inspect ping"] - interval: 30s - timeout: 10s + test: ["CMD-SHELL", "celery -A celery_healthcheck.celery inspect ping"] + interval: ${COMPOSE_WORKER_HEALTHCHECK_INTERVAL:-30s} + timeout: ${COMPOSE_WORKER_HEALTHCHECK_TIMEOUT:-30s} retries: 3 start_period: 60s + disable: ${COMPOSE_WORKER_HEALTHCHECK_DISABLED:-true} networks: - ssrf_proxy_network - default diff --git a/docker/generate_docker_compose b/docker/generate_docker_compose index bf6c1423c9..46d948f3c1 100755 --- a/docker/generate_docker_compose +++ b/docker/generate_docker_compose @@ -3,6 +3,20 @@ import os import re import sys +# Variables that exist only for Docker Compose orchestration and must NOT be +# injected into containers as environment variables. +SHARED_ENV_EXCLUDE = frozenset( + [ + # Docker Compose profile selection + "COMPOSE_PROFILES", + # Worker health check orchestration flags (consumed by docker-compose, + # not by the application running inside the container) + "COMPOSE_WORKER_HEALTHCHECK_DISABLED", + "COMPOSE_WORKER_HEALTHCHECK_INTERVAL", + "COMPOSE_WORKER_HEALTHCHECK_TIMEOUT", + ] +) + def parse_env_example(file_path): """ @@ -37,7 +51,7 @@ def generate_shared_env_block(env_vars, anchor_name="shared-api-worker-env"): """ lines = [f"x-shared-env: &{anchor_name}"] for key, default in env_vars.items(): - if key == "COMPOSE_PROFILES": + if key in SHARED_ENV_EXCLUDE: continue # If default value is empty, use ${KEY:-} if default == "": @@ -54,6 +68,7 @@ def insert_shared_env(template_path, output_path, shared_env_block, header_comme """ Inserts the shared environment variables block and header comments into the template file, removing any existing x-shared-env anchors, and generates the final docker-compose.yaml file. + Always writes with LF line endings. """ with open(template_path, "r", encoding="utf-8") as f: template_content = f.read() @@ -69,7 +84,7 @@ def insert_shared_env(template_path, output_path, shared_env_block, header_comme # Prepare the final content with header comments and shared env block final_content = f"{header_comments}\n{shared_env_block}\n\n{template_content}" - with open(output_path, "w", encoding="utf-8") as f: + with open(output_path, "w", encoding="utf-8", newline="\n") as f: f.write(final_content) print(f"Generated {output_path}")