docs(api): update docs about gevent setup in app.py (#27611)

Add a warning about top level importing in gunicorn.conf.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
QuantumGhost 2025-10-30 15:43:08 +08:00 committed by GitHub
parent a1c0bd7a1c
commit 20ad5b7ac2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 34 additions and 23 deletions

View File

@ -6,7 +6,7 @@ cd web && pnpm install
pipx install uv pipx install uv
echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc
echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage\"" >> ~/.bashrc echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage\"" >> ~/.bashrc
echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc
echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc
echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc

View File

@ -8,8 +8,7 @@
"module": "flask", "module": "flask",
"env": { "env": {
"FLASK_APP": "app.py", "FLASK_APP": "app.py",
"FLASK_ENV": "development", "FLASK_ENV": "development"
"GEVENT_SUPPORT": "True"
}, },
"args": [ "args": [
"run", "run",
@ -28,9 +27,7 @@
"type": "debugpy", "type": "debugpy",
"request": "launch", "request": "launch",
"module": "celery", "module": "celery",
"env": { "env": {},
"GEVENT_SUPPORT": "True"
},
"args": [ "args": [
"-A", "-A",
"app.celery", "app.celery",

View File

@ -80,7 +80,7 @@
1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service. 1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
```bash ```bash
uv run celery -A app.celery worker -P gevent -c 2 --loglevel INFO -Q dataset,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,priority_pipeline,pipeline uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,priority_pipeline,pipeline
``` ```
Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service: Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service:

View File

@ -13,23 +13,12 @@ if is_db_command():
app = create_migrations_app() app = create_migrations_app()
else: else:
# It seems that JetBrains Python debugger does not work well with gevent, # Gunicorn and Celery handle monkey patching automatically in production by
# so we need to disable gevent in debug mode. # specifying the `gevent` worker class. Manual monkey patching is not required here.
# If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent.
# if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
# from gevent import monkey
# #
# # gevent # See `api/docker/entrypoint.sh` (lines 33 and 47) for details.
# monkey.patch_all()
# #
# from grpc.experimental import gevent as grpc_gevent # type: ignore # For third-party library patching, refer to `gunicorn.conf.py` and `celery_entrypoint.py`.
#
# # grpc gevent
# grpc_gevent.init_gevent()
# import psycogreen.gevent # type: ignore
#
# psycogreen.gevent.patch_psycopg()
from app_factory import create_app from app_factory import create_app

View File

@ -2,6 +2,19 @@ import psycogreen.gevent as pscycogreen_gevent # type: ignore
from gevent import events as gevent_events from gevent import events as gevent_events
from grpc.experimental import gevent as grpc_gevent # type: ignore from grpc.experimental import gevent as grpc_gevent # type: ignore
# WARNING: This module is loaded very early in the Gunicorn worker lifecycle,
# before gevent's monkey-patching is applied. Importing modules at the top level here can
# interfere with gevent's ability to properly patch the standard library,
# potentially causing subtle and difficult-to-diagnose bugs.
#
# To ensure correct behavior, defer any initialization or imports that depend on monkey-patching
# to the `post_patch` hook below, or use a gevent_events subscriber as shown.
#
# For further context, see: https://github.com/langgenius/dify/issues/26689
#
# Note: The `post_fork` hook is also executed before monkey-patching,
# so moving imports there does not resolve this issue.
# NOTE(QuantumGhost): here we cannot use post_fork to patch gRPC, as # NOTE(QuantumGhost): here we cannot use post_fork to patch gRPC, as
# grpc_gevent.init_gevent must be called after patching stdlib. # grpc_gevent.init_gevent must be called after patching stdlib.
# Gunicorn calls `post_init` before applying monkey patch. # Gunicorn calls `post_init` before applying monkey patch.
@ -11,7 +24,7 @@ from grpc.experimental import gevent as grpc_gevent # type: ignore
# ref: # ref:
# - https://github.com/grpc/grpc/blob/62533ea13879d6ee95c6fda11ec0826ca822c9dd/src/python/grpcio/grpc/experimental/gevent.py # - https://github.com/grpc/grpc/blob/62533ea13879d6ee95c6fda11ec0826ca822c9dd/src/python/grpcio/grpc/experimental/gevent.py
# - https://github.com/gevent/gevent/issues/2060#issuecomment-3016768668 # - https://github.com/gevent/gevent/issues/2060#issuecomment-3016768668
# - https://github.com/benoitc/gunicorn/blob/master/gunicorn/arbiter.py#L607-L613 # - https://github.com/benoitc/gunicorn/blob/23.0.0/gunicorn/arbiter.py#L605-L609
def post_patch(event): def post_patch(event):

View File

@ -149,6 +149,12 @@ DIFY_PORT=5001
SERVER_WORKER_AMOUNT=1 SERVER_WORKER_AMOUNT=1
# Defaults to gevent. If using windows, it can be switched to sync or solo. # Defaults to gevent. If using windows, it can be switched to sync or solo.
#
# Warning: Changing this parameter requires disabling patching for
# psycopg2 and gRPC (see `gunicorn.conf.py` and `celery_entrypoint.py`).
# Modifying it may also decrease throughput.
#
# It is strongly discouraged to change this parameter.
SERVER_WORKER_CLASS=gevent SERVER_WORKER_CLASS=gevent
# Default number of worker connections, the default is 10. # Default number of worker connections, the default is 10.
@ -156,6 +162,12 @@ SERVER_WORKER_CONNECTIONS=10
# Similar to SERVER_WORKER_CLASS. # Similar to SERVER_WORKER_CLASS.
# If using windows, it can be switched to sync or solo. # If using windows, it can be switched to sync or solo.
#
# Warning: Changing this parameter requires disabling patching for
# psycopg2 and gRPC (see `gunicorn_conf.py` and `celery_entrypoint.py`).
# Modifying it may also decrease throughput.
#
# It is strongly discouraged to change this parameter.
CELERY_WORKER_CLASS= CELERY_WORKER_CLASS=
# Request handling timeout. The default is 200, # Request handling timeout. The default is 200,