mirror of
https://github.com/langgenius/dify.git
synced 2026-03-14 13:51:33 +08:00
feat(api): enhance database migration locking mechanism and configuration
- Introduced a configurable Redis lock TTL for database migrations in DeploymentConfig. - Updated the upgrade_db command to handle lock release errors gracefully. - Added documentation for the new MIGRATION_LOCK_TTL environment variable in the .env.example file and docker-compose.yaml. (cherry picked from commit 4a05fb120622908bc109a3715686706aab3d3b59)
This commit is contained in:
parent
1977e68b2d
commit
9acdfbde2f
@ -10,6 +10,7 @@ import click
|
||||
import sqlalchemy as sa
|
||||
from flask import current_app
|
||||
from pydantic import TypeAdapter
|
||||
from redis.exceptions import LockNotOwnedError, RedisError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
@ -727,7 +728,7 @@ def create_tenant(email: str, language: str | None = None, name: str | None = No
|
||||
@click.command("upgrade-db", help="Upgrade the database")
|
||||
def upgrade_db():
|
||||
click.echo("Preparing database migration...")
|
||||
lock = redis_client.lock(name="db_upgrade_lock", timeout=60)
|
||||
lock = redis_client.lock(name="db_upgrade_lock", timeout=dify_config.MIGRATION_LOCK_TTL)
|
||||
if lock.acquire(blocking=False):
|
||||
try:
|
||||
click.echo(click.style("Starting database migration.", fg="green"))
|
||||
@ -744,7 +745,13 @@ def upgrade_db():
|
||||
click.echo(click.style(f"Database migration failed: {e}", fg="red"))
|
||||
raise SystemExit(1)
|
||||
finally:
|
||||
lock.release()
|
||||
# Lock release errors should never mask the real migration failure.
|
||||
try:
|
||||
lock.release()
|
||||
except LockNotOwnedError:
|
||||
logger.warning("DB migration lock not owned on release (likely expired); ignoring.")
|
||||
except RedisError:
|
||||
logger.warning("Failed to release DB migration lock due to Redis error; ignoring.", exc_info=True)
|
||||
else:
|
||||
click.echo("Database migration skipped")
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from pydantic import Field
|
||||
from pydantic import Field, PositiveInt
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
@ -32,3 +32,8 @@ class DeploymentConfig(BaseSettings):
|
||||
description="Deployment environment (e.g., 'PRODUCTION', 'DEVELOPMENT'), default to PRODUCTION",
|
||||
default="PRODUCTION",
|
||||
)
|
||||
|
||||
MIGRATION_LOCK_TTL: PositiveInt = Field(
|
||||
description="Redis lock TTL for startup DB migration (seconds). Increase for large/slow databases.",
|
||||
default=3600,
|
||||
)
|
||||
|
||||
84
api/tests/unit_tests/commands/test_upgrade_db.py
Normal file
84
api/tests/unit_tests/commands/test_upgrade_db.py
Normal file
@ -0,0 +1,84 @@
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import commands
|
||||
from configs import dify_config
|
||||
|
||||
|
||||
def _install_fake_flask_migrate(monkeypatch, upgrade_impl) -> None:
|
||||
module = types.ModuleType("flask_migrate")
|
||||
module.upgrade = upgrade_impl
|
||||
monkeypatch.setitem(sys.modules, "flask_migrate", module)
|
||||
|
||||
|
||||
def _invoke_upgrade_db() -> int:
|
||||
try:
|
||||
commands.upgrade_db.callback()
|
||||
except SystemExit as e:
|
||||
return int(e.code or 0)
|
||||
return 0
|
||||
|
||||
|
||||
def test_upgrade_db_skips_when_lock_not_acquired(monkeypatch, capsys):
|
||||
monkeypatch.setattr(dify_config, "MIGRATION_LOCK_TTL", 1234)
|
||||
|
||||
lock = MagicMock()
|
||||
lock.acquire.return_value = False
|
||||
commands.redis_client.lock.return_value = lock
|
||||
|
||||
exit_code = _invoke_upgrade_db()
|
||||
captured = capsys.readouterr()
|
||||
|
||||
assert exit_code == 0
|
||||
assert "Database migration skipped" in captured.out
|
||||
|
||||
commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=1234)
|
||||
lock.acquire.assert_called_once_with(blocking=False)
|
||||
lock.release.assert_not_called()
|
||||
|
||||
|
||||
def test_upgrade_db_failure_not_masked_by_lock_release(monkeypatch, capsys):
|
||||
monkeypatch.setattr(dify_config, "MIGRATION_LOCK_TTL", 321)
|
||||
|
||||
lock = MagicMock()
|
||||
lock.acquire.return_value = True
|
||||
lock.release.side_effect = commands.LockNotOwnedError("simulated")
|
||||
commands.redis_client.lock.return_value = lock
|
||||
|
||||
def _upgrade():
|
||||
raise RuntimeError("boom")
|
||||
|
||||
_install_fake_flask_migrate(monkeypatch, _upgrade)
|
||||
|
||||
exit_code = _invoke_upgrade_db()
|
||||
captured = capsys.readouterr()
|
||||
|
||||
assert exit_code == 1
|
||||
assert "Database migration failed: boom" in captured.out
|
||||
|
||||
commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=321)
|
||||
lock.acquire.assert_called_once_with(blocking=False)
|
||||
lock.release.assert_called_once()
|
||||
|
||||
|
||||
def test_upgrade_db_success_ignores_lock_not_owned_on_release(monkeypatch, capsys):
|
||||
monkeypatch.setattr(dify_config, "MIGRATION_LOCK_TTL", 999)
|
||||
|
||||
lock = MagicMock()
|
||||
lock.acquire.return_value = True
|
||||
lock.release.side_effect = commands.LockNotOwnedError("simulated")
|
||||
commands.redis_client.lock.return_value = lock
|
||||
|
||||
_install_fake_flask_migrate(monkeypatch, lambda: None)
|
||||
|
||||
exit_code = _invoke_upgrade_db()
|
||||
captured = capsys.readouterr()
|
||||
|
||||
assert exit_code == 0
|
||||
assert "Database migration successful!" in captured.out
|
||||
|
||||
commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=999)
|
||||
lock.acquire.assert_called_once_with(blocking=False)
|
||||
lock.release.assert_called_once()
|
||||
|
||||
@ -125,6 +125,10 @@ OPENAI_API_BASE=https://api.openai.com/v1
|
||||
# and the application will start after the migrations have completed.
|
||||
MIGRATION_ENABLED=true
|
||||
|
||||
# Redis lock TTL (in seconds) for startup database migrations.
|
||||
# Increase this value for long-running migrations to avoid concurrent upgrades in multi-replica deployments.
|
||||
MIGRATION_LOCK_TTL=3600
|
||||
|
||||
# File Access Time specifies a time interval in seconds for the file to be accessed.
|
||||
# The default value is 300 seconds.
|
||||
FILES_ACCESS_TIMEOUT=300
|
||||
|
||||
@ -32,6 +32,7 @@ x-shared-env: &shared-api-worker-env
|
||||
CHECK_UPDATE_URL: ${CHECK_UPDATE_URL:-https://updates.dify.ai}
|
||||
OPENAI_API_BASE: ${OPENAI_API_BASE:-https://api.openai.com/v1}
|
||||
MIGRATION_ENABLED: ${MIGRATION_ENABLED:-true}
|
||||
MIGRATION_LOCK_TTL: ${MIGRATION_LOCK_TTL:-3600}
|
||||
FILES_ACCESS_TIMEOUT: ${FILES_ACCESS_TIMEOUT:-300}
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES: ${ACCESS_TOKEN_EXPIRE_MINUTES:-60}
|
||||
REFRESH_TOKEN_EXPIRE_DAYS: ${REFRESH_TOKEN_EXPIRE_DAYS:-30}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user