From 11191e253703f7a6f5e815838159f5a42cb29ec8 Mon Sep 17 00:00:00 2001 From: QuantumGhost Date: Thu, 30 Oct 2025 15:43:08 +0800 Subject: [PATCH] docs(api): update docs about gevent setup in app.py (#27611) Add a warning about top level importing in gunicorn.conf.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .devcontainer/post_create_command.sh | 2 +- .vscode/launch.json.template | 7 ++----- api/README.md | 2 +- api/app.py | 19 ++++--------------- api/gunicorn.conf.py | 15 ++++++++++++++- docker/.env.example | 12 ++++++++++++ 6 files changed, 34 insertions(+), 23 deletions(-) diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index 2e787ab855..a958832aa9 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -6,7 +6,7 @@ cd web && pnpm install pipx install uv echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc -echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage\"" >> ~/.bashrc +echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage\"" >> ~/.bashrc echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc diff --git a/.vscode/launch.json.template b/.vscode/launch.json.template index bf04d31998..bd5a787d4c 100644 --- a/.vscode/launch.json.template +++ b/.vscode/launch.json.template @@ -8,8 +8,7 @@ "module": "flask", "env": { "FLASK_APP": "app.py", - "FLASK_ENV": "development", - "GEVENT_SUPPORT": "True" + "FLASK_ENV": "development" }, "args": [ "run", @@ -28,9 +27,7 @@ "type": "debugpy", "request": "launch", "module": "celery", - "env": { - "GEVENT_SUPPORT": "True" - }, + "env": {}, "args": [ "-A", "app.celery", diff --git a/api/README.md b/api/README.md index fc21158c7a..45dad07af0 100644 --- a/api/README.md +++ b/api/README.md @@ -80,7 +80,7 @@ 1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service. ```bash -uv run celery -A app.celery worker -P gevent -c 2 --loglevel INFO -Q dataset,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,priority_pipeline,pipeline +uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,priority_pipeline,pipeline ``` Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service: diff --git a/api/app.py b/api/app.py index e0a903b10d..4ed743dcb4 100644 --- a/api/app.py +++ b/api/app.py @@ -13,23 +13,12 @@ if is_db_command(): app = create_migrations_app() else: - # It seems that JetBrains Python debugger does not work well with gevent, - # so we need to disable gevent in debug mode. - # If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent. - # if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}: - # from gevent import monkey + # Gunicorn and Celery handle monkey patching automatically in production by + # specifying the `gevent` worker class. Manual monkey patching is not required here. # - # # gevent - # monkey.patch_all() + # See `api/docker/entrypoint.sh` (lines 33 and 47) for details. # - # from grpc.experimental import gevent as grpc_gevent # type: ignore - # - # # grpc gevent - # grpc_gevent.init_gevent() - - # import psycogreen.gevent # type: ignore - # - # psycogreen.gevent.patch_psycopg() + # For third-party library patching, refer to `gunicorn.conf.py` and `celery_entrypoint.py`. from app_factory import create_app diff --git a/api/gunicorn.conf.py b/api/gunicorn.conf.py index 943ee100ca..da75d25ba6 100644 --- a/api/gunicorn.conf.py +++ b/api/gunicorn.conf.py @@ -2,6 +2,19 @@ import psycogreen.gevent as pscycogreen_gevent # type: ignore from gevent import events as gevent_events from grpc.experimental import gevent as grpc_gevent # type: ignore +# WARNING: This module is loaded very early in the Gunicorn worker lifecycle, +# before gevent's monkey-patching is applied. Importing modules at the top level here can +# interfere with gevent's ability to properly patch the standard library, +# potentially causing subtle and difficult-to-diagnose bugs. +# +# To ensure correct behavior, defer any initialization or imports that depend on monkey-patching +# to the `post_patch` hook below, or use a gevent_events subscriber as shown. +# +# For further context, see: https://github.com/langgenius/dify/issues/26689 +# +# Note: The `post_fork` hook is also executed before monkey-patching, +# so moving imports there does not resolve this issue. + # NOTE(QuantumGhost): here we cannot use post_fork to patch gRPC, as # grpc_gevent.init_gevent must be called after patching stdlib. # Gunicorn calls `post_init` before applying monkey patch. @@ -11,7 +24,7 @@ from grpc.experimental import gevent as grpc_gevent # type: ignore # ref: # - https://github.com/grpc/grpc/blob/62533ea13879d6ee95c6fda11ec0826ca822c9dd/src/python/grpcio/grpc/experimental/gevent.py # - https://github.com/gevent/gevent/issues/2060#issuecomment-3016768668 -# - https://github.com/benoitc/gunicorn/blob/master/gunicorn/arbiter.py#L607-L613 +# - https://github.com/benoitc/gunicorn/blob/23.0.0/gunicorn/arbiter.py#L605-L609 def post_patch(event): diff --git a/docker/.env.example b/docker/.env.example index 7af92248dc..7d7c2a2cda 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -149,6 +149,12 @@ DIFY_PORT=5001 SERVER_WORKER_AMOUNT=1 # Defaults to gevent. If using windows, it can be switched to sync or solo. +# +# Warning: Changing this parameter requires disabling patching for +# psycopg2 and gRPC (see `gunicorn.conf.py` and `celery_entrypoint.py`). +# Modifying it may also decrease throughput. +# +# It is strongly discouraged to change this parameter. SERVER_WORKER_CLASS=gevent # Default number of worker connections, the default is 10. @@ -156,6 +162,12 @@ SERVER_WORKER_CONNECTIONS=10 # Similar to SERVER_WORKER_CLASS. # If using windows, it can be switched to sync or solo. +# +# Warning: Changing this parameter requires disabling patching for +# psycopg2 and gRPC (see `gunicorn_conf.py` and `celery_entrypoint.py`). +# Modifying it may also decrease throughput. +# +# It is strongly discouraged to change this parameter. CELERY_WORKER_CLASS= # Request handling timeout. The default is 200,