example to auto rollback (#26200)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Asuka Minato 2025-10-15 00:42:55 +09:00 committed by GitHub
parent 7b51320346
commit 8ddc4f2292
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 59 additions and 66 deletions

View File

@ -1,8 +1,11 @@
import datetime import datetime
import logging import logging
import time import time
from collections.abc import Sequence
import click import click
from sqlalchemy import select
from sqlalchemy.orm import Session, sessionmaker
import app import app
from configs import dify_config from configs import dify_config
@ -35,50 +38,53 @@ def clean_workflow_runlogs_precise():
retention_days = dify_config.WORKFLOW_LOG_RETENTION_DAYS retention_days = dify_config.WORKFLOW_LOG_RETENTION_DAYS
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days) cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
session_factory = sessionmaker(db.engine, expire_on_commit=False)
try: try:
total_workflow_runs = db.session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count() with session_factory.begin() as session:
if total_workflow_runs == 0: total_workflow_runs = session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count()
logger.info("No expired workflow run logs found") if total_workflow_runs == 0:
return logger.info("No expired workflow run logs found")
logger.info("Found %s expired workflow run logs to clean", total_workflow_runs) return
logger.info("Found %s expired workflow run logs to clean", total_workflow_runs)
total_deleted = 0 total_deleted = 0
failed_batches = 0 failed_batches = 0
batch_count = 0 batch_count = 0
while True: while True:
workflow_runs = ( with session_factory.begin() as session:
db.session.query(WorkflowRun.id).where(WorkflowRun.created_at < cutoff_date).limit(BATCH_SIZE).all() workflow_run_ids = session.scalars(
) select(WorkflowRun.id)
.where(WorkflowRun.created_at < cutoff_date)
.order_by(WorkflowRun.created_at, WorkflowRun.id)
.limit(BATCH_SIZE)
).all()
if not workflow_runs: if not workflow_run_ids:
break
workflow_run_ids = [run.id for run in workflow_runs]
batch_count += 1
success = _delete_batch_with_retry(workflow_run_ids, failed_batches)
if success:
total_deleted += len(workflow_run_ids)
failed_batches = 0
else:
failed_batches += 1
if failed_batches >= MAX_RETRIES:
logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
break break
batch_count += 1
success = _delete_batch(session, workflow_run_ids, failed_batches)
if success:
total_deleted += len(workflow_run_ids)
failed_batches = 0
else: else:
# Calculate incremental delay times: 5, 10, 15 minutes failed_batches += 1
retry_delay_minutes = failed_batches * 5 if failed_batches >= MAX_RETRIES:
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes) logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
time.sleep(retry_delay_minutes * 60) break
continue else:
# Calculate incremental delay times: 5, 10, 15 minutes
retry_delay_minutes = failed_batches * 5
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
time.sleep(retry_delay_minutes * 60)
continue
logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted) logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted)
except Exception: except Exception:
db.session.rollback()
logger.exception("Unexpected error in workflow log cleanup") logger.exception("Unexpected error in workflow log cleanup")
raise raise
@ -87,69 +93,56 @@ def clean_workflow_runlogs_precise():
click.echo(click.style(f"Cleaned workflow run logs from db success latency: {execution_time:.2f}s", fg="green")) click.echo(click.style(f"Cleaned workflow run logs from db success latency: {execution_time:.2f}s", fg="green"))
def _delete_batch_with_retry(workflow_run_ids: list[str], attempt_count: int) -> bool: def _delete_batch(session: Session, workflow_run_ids: Sequence[str], attempt_count: int) -> bool:
"""Delete a single batch with a retry mechanism and complete cascading deletion""" """Delete a single batch of workflow runs and all related data within a nested transaction."""
try: try:
with db.session.begin_nested(): with session.begin_nested():
message_data = ( message_data = (
db.session.query(Message.id, Message.conversation_id) session.query(Message.id, Message.conversation_id)
.where(Message.workflow_run_id.in_(workflow_run_ids)) .where(Message.workflow_run_id.in_(workflow_run_ids))
.all() .all()
) )
message_id_list = [msg.id for msg in message_data] message_id_list = [msg.id for msg in message_data]
conversation_id_list = list({msg.conversation_id for msg in message_data if msg.conversation_id}) conversation_id_list = list({msg.conversation_id for msg in message_data if msg.conversation_id})
if message_id_list: if message_id_list:
db.session.query(AppAnnotationHitHistory).where( message_related_models = [
AppAnnotationHitHistory.message_id.in_(message_id_list) AppAnnotationHitHistory,
).delete(synchronize_session=False) MessageAgentThought,
MessageChain,
MessageFile,
MessageAnnotation,
MessageFeedback,
]
for model in message_related_models:
session.query(model).where(model.message_id.in_(message_id_list)).delete(synchronize_session=False) # type: ignore
# error: "DeclarativeAttributeIntercept" has no attribute "message_id". But this type is only in lib
# and these 6 types all have the message_id field.
db.session.query(MessageAgentThought).where(MessageAgentThought.message_id.in_(message_id_list)).delete( session.query(Message).where(Message.workflow_run_id.in_(workflow_run_ids)).delete(
synchronize_session=False synchronize_session=False
) )
db.session.query(MessageChain).where(MessageChain.message_id.in_(message_id_list)).delete( session.query(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(workflow_run_ids)).delete(
synchronize_session=False
)
db.session.query(MessageFile).where(MessageFile.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(MessageAnnotation).where(MessageAnnotation.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(MessageFeedback).where(MessageFeedback.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(Message).where(Message.workflow_run_id.in_(workflow_run_ids)).delete(
synchronize_session=False
)
db.session.query(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(workflow_run_ids)).delete(
synchronize_session=False synchronize_session=False
) )
db.session.query(WorkflowNodeExecutionModel).where( session.query(WorkflowNodeExecutionModel).where(
WorkflowNodeExecutionModel.workflow_run_id.in_(workflow_run_ids) WorkflowNodeExecutionModel.workflow_run_id.in_(workflow_run_ids)
).delete(synchronize_session=False) ).delete(synchronize_session=False)
if conversation_id_list: if conversation_id_list:
db.session.query(ConversationVariable).where( session.query(ConversationVariable).where(
ConversationVariable.conversation_id.in_(conversation_id_list) ConversationVariable.conversation_id.in_(conversation_id_list)
).delete(synchronize_session=False) ).delete(synchronize_session=False)
db.session.query(Conversation).where(Conversation.id.in_(conversation_id_list)).delete( session.query(Conversation).where(Conversation.id.in_(conversation_id_list)).delete(
synchronize_session=False synchronize_session=False
) )
db.session.query(WorkflowRun).where(WorkflowRun.id.in_(workflow_run_ids)).delete(synchronize_session=False) session.query(WorkflowRun).where(WorkflowRun.id.in_(workflow_run_ids)).delete(synchronize_session=False)
db.session.commit() return True
return True
except Exception: except Exception:
db.session.rollback()
logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1) logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1)
return False return False