From bfc4fe1a9ae7a3525fe7b093ec259c1dccd73f3a Mon Sep 17 00:00:00 2001 From: kenwoodjw Date: Tue, 26 Aug 2025 15:44:52 +0800 Subject: [PATCH] fix infinite loop when clean unused dataset (#24542) Signed-off-by: kenwoodjw Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/schedule/clean_unused_datasets_task.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py index 1141451011..63e6132b6a 100644 --- a/api/schedule/clean_unused_datasets_task.py +++ b/api/schedule/clean_unused_datasets_task.py @@ -45,6 +45,7 @@ def clean_unused_datasets_task(): plan_filter = config["plan_filter"] add_logs = config["add_logs"] + page = 1 while True: try: # Subquery for counting new documents @@ -86,12 +87,12 @@ def clean_unused_datasets_task(): .order_by(Dataset.created_at.desc()) ) - datasets = db.paginate(stmt, page=1, per_page=50) + datasets = db.paginate(stmt, page=page, per_page=50, error_out=False) except SQLAlchemyError: raise - if datasets.items is None or len(datasets.items) == 0: + if datasets is None or datasets.items is None or len(datasets.items) == 0: break for dataset in datasets: @@ -150,5 +151,7 @@ def clean_unused_datasets_task(): except Exception as e: click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red")) + page += 1 + end_at = time.perf_counter() click.echo(click.style(f"Cleaned unused dataset from db success latency: {end_at - start_at}", fg="green"))