diff --git a/api/commands.py b/api/commands.py
index 5c13667efe..93ddc379fb 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -2630,6 +2630,8 @@ def clean_expired_messages(
     """
     Clean expired messages and related data for tenants based on clean policy.
     """
+    from extensions.otel.runtime import flush_telemetry
+
     click.echo(click.style("clean_messages: start clean messages.", fg="green"))
 
     start_at = time.perf_counter()
@@ -2671,6 +2673,8 @@ def clean_expired_messages(
             )
         )
         raise
+    finally:
+        flush_telemetry()
 
     click.echo(click.style("messages cleanup completed.", fg="green"))
 
diff --git a/api/services/retention/conversation/messages_clean_service.py b/api/services/retention/conversation/messages_clean_service.py
index f7836a2b14..86bf57c64e 100644
--- a/api/services/retention/conversation/messages_clean_service.py
+++ b/api/services/retention/conversation/messages_clean_service.py
@@ -1,16 +1,16 @@
 import datetime
 import logging
-import os
 import random
 import time
 from collections.abc import Sequence
-from typing import cast
+from typing import TYPE_CHECKING, cast
 
 import sqlalchemy as sa
 from sqlalchemy import delete, select, tuple_
 from sqlalchemy.engine import CursorResult
 from sqlalchemy.orm import Session
 
+from configs import dify_config
 from extensions.ext_database import db
 from models.model import (
     App,
@@ -32,6 +32,127 @@ from services.retention.conversation.messages_clean_policy import (
 logger = logging.getLogger(__name__)
 
 
+if TYPE_CHECKING:
+    from opentelemetry.metrics import Counter, Histogram
+
+
+class MessagesCleanupMetrics:
+    """
+    Records low-cardinality OpenTelemetry metrics for expired message cleanup jobs.
+
+    We keep labels stable (dry_run/window_mode/status) so these metrics remain
+    dashboard-friendly for long-running CronJob executions.
+    """
+
+    _job_runs_total: "Counter | None"
+    _batches_total: "Counter | None"
+    _messages_scanned_total: "Counter | None"
+    _messages_filtered_total: "Counter | None"
+    _messages_deleted_total: "Counter | None"
+    _job_duration_seconds: "Histogram | None"
+    _batch_duration_seconds: "Histogram | None"
+    _base_attributes: dict[str, str]
+
+    def __init__(self, *, dry_run: bool, has_window: bool) -> None:
+        self._job_runs_total = None
+        self._batches_total = None
+        self._messages_scanned_total = None
+        self._messages_filtered_total = None
+        self._messages_deleted_total = None
+        self._job_duration_seconds = None
+        self._batch_duration_seconds = None
+        self._base_attributes = {
+            "job_name": "messages_cleanup",
+            "dry_run": str(dry_run).lower(),
+            "window_mode": "between" if has_window else "before_cutoff",
+        }
+        self._init_instruments()
+
+    def _init_instruments(self) -> None:
+        try:
+            from opentelemetry.metrics import get_meter
+
+            meter = get_meter("messages_cleanup", version=dify_config.project.version)
+            self._job_runs_total = meter.create_counter(
+                "messages_cleanup_jobs_total",
+                description="Total number of expired message cleanup jobs by status.",
+                unit="{job}",
+            )
+            self._batches_total = meter.create_counter(
+                "messages_cleanup_batches_total",
+                description="Total number of message cleanup batches processed.",
+                unit="{batch}",
+            )
+            self._messages_scanned_total = meter.create_counter(
+                "messages_cleanup_scanned_messages_total",
+                description="Total messages scanned by cleanup jobs.",
+                unit="{message}",
+            )
+            self._messages_filtered_total = meter.create_counter(
+                "messages_cleanup_filtered_messages_total",
+                description="Total messages selected by cleanup policy.",
+                unit="{message}",
+            )
+            self._messages_deleted_total = meter.create_counter(
+                "messages_cleanup_deleted_messages_total",
+                description="Total messages deleted by cleanup jobs.",
+                unit="{message}",
+            )
+            self._job_duration_seconds = meter.create_histogram(
+                "messages_cleanup_job_duration_seconds",
+                description="Duration of expired message cleanup jobs in seconds.",
+                unit="s",
+            )
+            self._batch_duration_seconds = meter.create_histogram(
+                "messages_cleanup_batch_duration_seconds",
+                description="Duration of expired message cleanup batch processing in seconds.",
+                unit="s",
+            )
+        except Exception:
+            logger.exception("messages_cleanup_metrics: failed to initialize instruments")
+
+    def _attrs(self, **extra: str) -> dict[str, str]:
+        return {**self._base_attributes, **extra}
+
+    @staticmethod
+    def _add(counter: "Counter | None", value: int, attributes: dict[str, str]) -> None:
+        if not counter or value <= 0:
+            return
+        try:
+            counter.add(value, attributes)
+        except Exception:
+            logger.exception("messages_cleanup_metrics: failed to add counter value")
+
+    @staticmethod
+    def _record(histogram: "Histogram | None", value: float, attributes: dict[str, str]) -> None:
+        if not histogram:
+            return
+        try:
+            histogram.record(value, attributes)
+        except Exception:
+            logger.exception("messages_cleanup_metrics: failed to record histogram value")
+
+    def record_batch(
+        self,
+        *,
+        scanned_messages: int,
+        filtered_messages: int,
+        deleted_messages: int,
+        batch_duration_seconds: float,
+    ) -> None:
+        attributes = self._attrs()
+        self._add(self._batches_total, 1, attributes)
+        self._add(self._messages_scanned_total, scanned_messages, attributes)
+        self._add(self._messages_filtered_total, filtered_messages, attributes)
+        self._add(self._messages_deleted_total, deleted_messages, attributes)
+        self._record(self._batch_duration_seconds, batch_duration_seconds, attributes)
+
+    def record_completion(self, *, status: str, job_duration_seconds: float) -> None:
+        attributes = self._attrs(status=status)
+        self._add(self._job_runs_total, 1, attributes)
+        self._record(self._job_duration_seconds, job_duration_seconds, attributes)
+
+
 class MessagesCleanService:
     """
     Service for cleaning expired messages based on retention policies.
@@ -63,6 +184,7 @@ class MessagesCleanService:
         self._start_from = start_from
         self._batch_size = batch_size
         self._dry_run = dry_run
+        self._metrics = MessagesCleanupMetrics(dry_run=dry_run, has_window=bool(start_from))
 
     @classmethod
     def from_time_range(
@@ -161,7 +283,18 @@ class MessagesCleanService:
         Returns:
             Dict with statistics: batches, filtered_messages, total_deleted
         """
-        return self._clean_messages_by_time_range()
+        status = "success"
+        run_start = time.monotonic()
+        try:
+            return self._clean_messages_by_time_range()
+        except Exception:
+            status = "failed"
+            raise
+        finally:
+            self._metrics.record_completion(
+                status=status,
+                job_duration_seconds=time.monotonic() - run_start,
+            )
 
     def _clean_messages_by_time_range(self) -> dict[str, int]:
         """
@@ -196,11 +329,14 @@ class MessagesCleanService:
             self._end_before,
         )
 
-        max_batch_interval_ms = int(os.environ.get("SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL", 200))
+        max_batch_interval_ms = dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL
 
         while True:
             stats["batches"] += 1
             batch_start = time.monotonic()
+            batch_scanned_messages = 0
+            batch_filtered_messages = 0
+            batch_deleted_messages = 0
 
             # Step 1: Fetch a batch of messages using cursor
             with Session(db.engine, expire_on_commit=False) as session:
@@ -239,9 +375,16 @@ class MessagesCleanService:
 
                 # Track total messages fetched across all batches
                 stats["total_messages"] += len(messages)
+                batch_scanned_messages = len(messages)
 
                 if not messages:
                     logger.info("clean_messages (batch %s): no more messages to process", stats["batches"])
+                    self._metrics.record_batch(
+                        scanned_messages=batch_scanned_messages,
+                        filtered_messages=batch_filtered_messages,
+                        deleted_messages=batch_deleted_messages,
+                        batch_duration_seconds=time.monotonic() - batch_start,
+                    )
                     break
 
                 # Update cursor to the last message's (created_at, id)
@@ -267,6 +410,12 @@ class MessagesCleanService:
 
             if not apps:
                 logger.info("clean_messages (batch %s): no apps found, skip", stats["batches"])
+                self._metrics.record_batch(
+                    scanned_messages=batch_scanned_messages,
+                    filtered_messages=batch_filtered_messages,
+                    deleted_messages=batch_deleted_messages,
+                    batch_duration_seconds=time.monotonic() - batch_start,
+                )
                 continue
 
             # Build app_id -> tenant_id mapping
@@ -285,9 +434,16 @@ class MessagesCleanService:
 
             if not message_ids_to_delete:
                 logger.info("clean_messages (batch %s): no messages to delete, skip", stats["batches"])
+                self._metrics.record_batch(
+                    scanned_messages=batch_scanned_messages,
+                    filtered_messages=batch_filtered_messages,
+                    deleted_messages=batch_deleted_messages,
+                    batch_duration_seconds=time.monotonic() - batch_start,
+                )
                 continue
 
             stats["filtered_messages"] += len(message_ids_to_delete)
+            batch_filtered_messages = len(message_ids_to_delete)
 
             # Step 4: Batch delete messages and their relations
             if not self._dry_run:
@@ -308,6 +464,7 @@ class MessagesCleanService:
                     commit_ms = int((time.monotonic() - commit_start) * 1000)
 
                     stats["total_deleted"] += messages_deleted
+                    batch_deleted_messages = messages_deleted
 
                     logger.info(
                         "clean_messages (batch %s): processed %s messages, deleted %s messages",
@@ -342,6 +499,13 @@ class MessagesCleanService:
                 for msg_id in sampled_ids:
                     logger.info("clean_messages (batch %s, dry_run) sample: message_id=%s", stats["batches"], msg_id)
 
+            self._metrics.record_batch(
+                scanned_messages=batch_scanned_messages,
+                filtered_messages=batch_filtered_messages,
+                deleted_messages=batch_deleted_messages,
+                batch_duration_seconds=time.monotonic() - batch_start,
+            )
+
         logger.info(
             "clean_messages completed: total batches: %s, total messages: %s, filtered messages: %s, total deleted: %s",
             stats["batches"],
diff --git a/api/tests/unit_tests/services/test_messages_clean_service.py b/api/tests/unit_tests/services/test_messages_clean_service.py
index 67ae2c9142..b8e7d2fb01 100644
--- a/api/tests/unit_tests/services/test_messages_clean_service.py
+++ b/api/tests/unit_tests/services/test_messages_clean_service.py
@@ -625,3 +625,53 @@ class TestMessagesCleanServiceFromDays:
         assert service._end_before == expected_end_before
         assert service._batch_size == 1000  # default
         assert service._dry_run is False  # default
+
+
+class TestMessagesCleanServiceRun:
+    """Unit tests for MessagesCleanService.run instrumentation behavior."""
+
+    def test_run_records_completion_metrics_on_success(self):
+        # Arrange
+        service = MessagesCleanService(
+            policy=BillingDisabledPolicy(),
+            start_from=datetime.datetime(2024, 1, 1),
+            end_before=datetime.datetime(2024, 1, 2),
+            batch_size=100,
+            dry_run=False,
+        )
+        expected_stats = {
+            "batches": 1,
+            "total_messages": 10,
+            "filtered_messages": 5,
+            "total_deleted": 5,
+        }
+        service._clean_messages_by_time_range = MagicMock(return_value=expected_stats)  # type: ignore[method-assign]
+        completion_calls: list[dict[str, object]] = []
+        service._metrics.record_completion = lambda **kwargs: completion_calls.append(kwargs)  # type: ignore[method-assign]
+
+        # Act
+        result = service.run()
+
+        # Assert
+        assert result == expected_stats
+        assert len(completion_calls) == 1
+        assert completion_calls[0]["status"] == "success"
+
+    def test_run_records_completion_metrics_on_failure(self):
+        # Arrange
+        service = MessagesCleanService(
+            policy=BillingDisabledPolicy(),
+            start_from=datetime.datetime(2024, 1, 1),
+            end_before=datetime.datetime(2024, 1, 2),
+            batch_size=100,
+            dry_run=False,
+        )
+        service._clean_messages_by_time_range = MagicMock(side_effect=RuntimeError("clean failed"))  # type: ignore[method-assign]
+        completion_calls: list[dict[str, object]] = []
+        service._metrics.record_completion = lambda **kwargs: completion_calls.append(kwargs)  # type: ignore[method-assign]
+
+        # Act & Assert
+        with pytest.raises(RuntimeError, match="clean failed"):
+            service.run()
+        assert len(completion_calls) == 1
+        assert completion_calls[0]["status"] == "failed"