From 33b3197be7c02ff03712090ff2e1c78d1f37657b Mon Sep 17 00:00:00 2001
From: -LAN- <laipz8200@outlook.com>
Date: Wed, 22 Apr 2026 14:29:51 +0800
Subject: [PATCH] refactor(api): simplify llm quota helpers

Remove the temporary generic model-type quota helpers now that system-billed models are LLM-only.

Keep the deprecated ModelInstance wrappers as LLM-specific adapters with explicit non-LLM guards and update the quota tests to match the narrower invariant.
---
 api/core/app/llm/__init__.py                  |   4 -
 api/core/app/llm/quota.py                     |  80 ++++--------
 .../unit_tests/core/app/test_llm_quota.py     | 115 +++++++++---------
 3 files changed, 81 insertions(+), 118 deletions(-)

diff --git a/api/core/app/llm/__init__.py b/api/core/app/llm/__init__.py
index 85f342de5d..d20a5b2344 100644
--- a/api/core/app/llm/__init__.py
+++ b/api/core/app/llm/__init__.py
@@ -3,17 +3,13 @@
 from .quota import (
     deduct_llm_quota,
     deduct_llm_quota_for_model,
-    deduct_model_quota,
     ensure_llm_quota_available,
     ensure_llm_quota_available_for_model,
-    ensure_model_quota_available,
 )
 
 __all__ = [
     "deduct_llm_quota",
     "deduct_llm_quota_for_model",
-    "deduct_model_quota",
     "ensure_llm_quota_available",
     "ensure_llm_quota_available_for_model",
-    "ensure_model_quota_available",
 ]
diff --git a/api/core/app/llm/quota.py b/api/core/app/llm/quota.py
index b66749a467..3793d0ed0f 100644
--- a/api/core/app/llm/quota.py
+++ b/api/core/app/llm/quota.py
@@ -1,8 +1,9 @@
-"""Tenant-scoped helpers for checking and deducting provider model quota.
+"""Tenant-scoped helpers for checking and deducting LLM provider quota.
 
-The public billing identity is ``tenant_id + provider + model_type + model``.
-LLM callers still use thin adapters that compute quota usage from ``LLMUsage``
-so the workflow layer does not need to know generic billing details.
+System-hosted quota accounting is currently defined only for LLM models. Keep
+the public helpers LLM-specific so callers do not carry unused model-type
+plumbing, and fail loudly if the deprecated ``ModelInstance`` wrappers are used
+with a non-LLM model.
 """
 
 import warnings
@@ -33,14 +34,14 @@ def _get_provider_configuration(*, tenant_id: str, provider: str):
     return provider_configuration
 
 
-def ensure_model_quota_available(*, tenant_id: str, provider: str, model_type: ModelType, model: str) -> None:
-    """Raise when a tenant-bound system provider model is already out of quota."""
+def ensure_llm_quota_available_for_model(*, tenant_id: str, provider: str, model: str) -> None:
+    """Raise when a tenant-bound LLM model is already out of quota."""
     provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider)
     if provider_configuration.using_provider_type != ProviderType.SYSTEM:
         return
 
     provider_model = provider_configuration.get_provider_model(
-        model_type=model_type,
+        model_type=ModelType.LLM,
         model=model,
     )
     if provider_model and provider_model.status == ModelStatus.QUOTA_EXCEEDED:
@@ -71,14 +72,8 @@ def _resolve_llm_used_quota(*, system_configuration, model: str, usage: LLMUsage
     return used_quota
 
 
-def _deduct_model_quota_with_configuration(
-    *,
-    tenant_id: str,
-    provider: str,
-    provider_configuration,
-    used_quota: int | None,
-) -> None:
-    """Apply a resolved quota charge against the current provider quota bucket."""
+def _deduct_used_llm_quota(*, tenant_id: str, provider: str, provider_configuration, used_quota: int | None) -> None:
+    """Apply a resolved LLM quota charge against the current provider quota bucket."""
     if provider_configuration.using_provider_type != ProviderType.SYSTEM:
         return
 
@@ -120,36 +115,6 @@ def _deduct_model_quota_with_configuration(
                     session.execute(stmt)
 
 
-def deduct_model_quota(
-    *,
-    tenant_id: str,
-    provider: str,
-    model_type: ModelType,
-    model: str,
-    used_quota: int | None,
-) -> None:
-    """Deduct quota for the resolved tenant/provider/model identity."""
-    _ = model_type
-    _ = model
-    provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider)
-    _deduct_model_quota_with_configuration(
-        tenant_id=tenant_id,
-        provider=provider,
-        provider_configuration=provider_configuration,
-        used_quota=used_quota,
-    )
-
-
-def ensure_llm_quota_available_for_model(*, tenant_id: str, provider: str, model: str) -> None:
-    """Raise when a tenant-bound LLM model is already out of quota."""
-    ensure_model_quota_available(
-        tenant_id=tenant_id,
-        provider=provider,
-        model_type=ModelType.LLM,
-        model=model,
-    )
-
-
 def deduct_llm_quota_for_model(*, tenant_id: str, provider: str, model: str, usage: LLMUsage) -> None:
     """Deduct tenant-bound quota for the resolved LLM model identity."""
     provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider)
@@ -158,15 +123,20 @@ def deduct_llm_quota_for_model(*, tenant_id: str, provider: str, model: str, usa
         model=model,
         usage=usage,
     )
-    deduct_model_quota(
+    _deduct_used_llm_quota(
         tenant_id=tenant_id,
         provider=provider,
-        model_type=ModelType.LLM,
-        model=model,
+        provider_configuration=provider_configuration,
         used_quota=used_quota,
     )
 
 
+def _require_llm_model_instance(model_instance: ModelInstance) -> None:
+    """Reject deprecated wrapper calls that pass a non-LLM model instance."""
+    if model_instance.model_type_instance.model_type != ModelType.LLM:
+        raise ValueError("LLM quota helpers only support LLM model instances.")
+
+
 def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None:
     """Deprecated compatibility wrapper for callers that still pass ModelInstance."""
     warnings.warn(
@@ -175,10 +145,10 @@ def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None:
         DeprecationWarning,
         stacklevel=2,
     )
-    ensure_model_quota_available(
+    _require_llm_model_instance(model_instance)
+    ensure_llm_quota_available_for_model(
         tenant_id=model_instance.provider_model_bundle.configuration.tenant_id,
         provider=model_instance.provider,
-        model_type=model_instance.model_type_instance.model_type,
         model=model_instance.model_name,
     )
 
@@ -191,14 +161,10 @@ def deduct_llm_quota(*, tenant_id: str, model_instance: ModelInstance, usage: LL
         DeprecationWarning,
         stacklevel=2,
     )
-    deduct_model_quota(
+    _require_llm_model_instance(model_instance)
+    deduct_llm_quota_for_model(
         tenant_id=tenant_id,
         provider=model_instance.provider,
-        model_type=model_instance.model_type_instance.model_type,
         model=model_instance.model_name,
-        used_quota=_resolve_llm_used_quota(
-            system_configuration=model_instance.provider_model_bundle.configuration.system_configuration,
-            model=model_instance.model_name,
-            usage=usage,
-        ),
+        usage=usage,
     )
diff --git a/api/tests/unit_tests/core/app/test_llm_quota.py b/api/tests/unit_tests/core/app/test_llm_quota.py
index bace6cde87..6abeb3e698 100644
--- a/api/tests/unit_tests/core/app/test_llm_quota.py
+++ b/api/tests/unit_tests/core/app/test_llm_quota.py
@@ -6,10 +6,8 @@ import pytest
 from core.app.llm.quota import (
     deduct_llm_quota,
     deduct_llm_quota_for_model,
-    deduct_model_quota,
     ensure_llm_quota_available,
     ensure_llm_quota_available_for_model,
-    ensure_model_quota_available,
 )
 from core.entities.model_entities import ModelStatus
 from core.entities.provider_entities import ProviderQuotaType, QuotaUnit
@@ -19,7 +17,7 @@ from graphon.model_runtime.entities.model_entities import ModelType
 from models.provider import ProviderType
 
 
-def test_ensure_model_quota_available_raises_when_system_model_is_exhausted() -> None:
+def test_ensure_llm_quota_available_for_model_raises_when_system_model_is_exhausted() -> None:
     provider_configuration = SimpleNamespace(
         using_provider_type=ProviderType.SYSTEM,
         get_provider_model=MagicMock(return_value=SimpleNamespace(status=ModelStatus.QUOTA_EXCEEDED)),
@@ -31,37 +29,21 @@ def test_ensure_model_quota_available_raises_when_system_model_is_exhausted() ->
         patch("core.app.llm.quota.create_plugin_provider_manager", return_value=provider_manager),
         pytest.raises(QuotaExceededError, match="Model provider openai quota exceeded."),
     ):
-        ensure_model_quota_available(
-            tenant_id="tenant-id",
-            provider="openai",
-            model_type=ModelType.TEXT_EMBEDDING,
-            model="gpt-4o",
-        )
-
-    provider_configuration.get_provider_model.assert_called_once_with(
-        model_type=ModelType.TEXT_EMBEDDING,
-        model="gpt-4o",
-    )
-
-
-def test_ensure_llm_quota_available_for_model_delegates_with_llm_model_type() -> None:
-    with patch("core.app.llm.quota.ensure_model_quota_available") as mock_ensure:
         ensure_llm_quota_available_for_model(
             tenant_id="tenant-id",
             provider="openai",
             model="gpt-4o",
         )
 
-    mock_ensure.assert_called_once_with(
-        tenant_id="tenant-id",
-        provider="openai",
+    provider_configuration.get_provider_model.assert_called_once_with(
         model_type=ModelType.LLM,
         model="gpt-4o",
     )
 
 
-def test_deduct_model_quota_uses_identity_based_trial_billing() -> None:
-
+def test_deduct_llm_quota_for_model_uses_identity_based_trial_billing() -> None:
+    usage = LLMUsage.empty_usage()
+    usage.total_tokens = 42
     provider_configuration = SimpleNamespace(
         using_provider_type=ProviderType.SYSTEM,
         system_configuration=SimpleNamespace(
@@ -82,12 +64,11 @@ def test_deduct_model_quota_uses_identity_based_trial_billing() -> None:
         patch("core.app.llm.quota.create_plugin_provider_manager", return_value=provider_manager),
         patch("services.credit_pool_service.CreditPoolService.check_and_deduct_credits") as mock_deduct_credits,
     ):
-        deduct_model_quota(
+        deduct_llm_quota_for_model(
             tenant_id="tenant-id",
             provider="openai",
-            model_type=ModelType.TEXT_EMBEDDING,
             model="gpt-4o",
-            used_quota=42,
+            usage=usage,
         )
 
     mock_deduct_credits.assert_called_once_with(
@@ -96,10 +77,11 @@ def test_deduct_model_quota_uses_identity_based_trial_billing() -> None:
     )
 
 
-def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() -> None:
+def test_deduct_llm_quota_for_model_reuses_resolved_provider_configuration_for_deduction() -> None:
     usage = LLMUsage.empty_usage()
     usage.total_tokens = 42
     provider_configuration = SimpleNamespace(
+        using_provider_type=ProviderType.SYSTEM,
         system_configuration=SimpleNamespace(
             current_quota_type=ProviderQuotaType.TRIAL,
             quota_configurations=[
@@ -114,7 +96,7 @@ def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() ->
 
     with (
         patch("core.app.llm.quota._get_provider_configuration", return_value=provider_configuration),
-        patch("core.app.llm.quota.deduct_model_quota") as mock_deduct,
+        patch("core.app.llm.quota._deduct_used_llm_quota") as mock_deduct,
     ):
         deduct_llm_quota_for_model(
             tenant_id="tenant-id",
@@ -126,13 +108,33 @@ def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() ->
     mock_deduct.assert_called_once_with(
         tenant_id="tenant-id",
         provider="openai",
-        model_type=ModelType.LLM,
-        model="gpt-4o",
+        provider_configuration=provider_configuration,
         used_quota=42,
     )
 
 
-def test_ensure_llm_quota_available_wrapper_warns_and_delegates_with_model_type() -> None:
+def test_ensure_llm_quota_available_wrapper_warns_and_delegates() -> None:
+    model_instance = SimpleNamespace(
+        provider="openai",
+        model_name="gpt-4o",
+        provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace(tenant_id="tenant-id")),
+        model_type_instance=SimpleNamespace(model_type=ModelType.LLM),
+    )
+
+    with (
+        pytest.deprecated_call(match="ensure_llm_quota_available\\(model_instance=.*deprecated"),
+        patch("core.app.llm.quota.ensure_llm_quota_available_for_model") as mock_ensure,
+    ):
+        ensure_llm_quota_available(model_instance=model_instance)
+
+    mock_ensure.assert_called_once_with(
+        tenant_id="tenant-id",
+        provider="openai",
+        model="gpt-4o",
+    )
+
+
+def test_ensure_llm_quota_available_wrapper_rejects_non_llm_model_instances() -> None:
     model_instance = SimpleNamespace(
         provider="openai",
         model_name="gpt-4o",
@@ -142,44 +144,24 @@ def test_ensure_llm_quota_available_wrapper_warns_and_delegates_with_model_type(
 
     with (
         pytest.deprecated_call(match="ensure_llm_quota_available\\(model_instance=.*deprecated"),
-        patch("core.app.llm.quota.ensure_model_quota_available") as mock_ensure,
+        pytest.raises(ValueError, match="only support LLM model instances"),
     ):
         ensure_llm_quota_available(model_instance=model_instance)
 
-    mock_ensure.assert_called_once_with(
-        tenant_id="tenant-id",
-        provider="openai",
-        model_type=ModelType.TEXT_EMBEDDING,
-        model="gpt-4o",
-    )
 
-
-def test_deduct_llm_quota_wrapper_warns_and_delegates_with_model_type() -> None:
+def test_deduct_llm_quota_wrapper_warns_and_delegates() -> None:
     usage = LLMUsage.empty_usage()
     usage.total_tokens = 7
     model_instance = SimpleNamespace(
         provider="openai",
         model_name="gpt-4o",
         model_type_instance=SimpleNamespace(model_type=ModelType.LLM),
-        provider_model_bundle=SimpleNamespace(
-            configuration=SimpleNamespace(
-                system_configuration=SimpleNamespace(
-                    current_quota_type=ProviderQuotaType.TRIAL,
-                    quota_configurations=[
-                        SimpleNamespace(
-                            quota_type=ProviderQuotaType.TRIAL,
-                            quota_unit=QuotaUnit.TOKENS,
-                            quota_limit=100,
-                        )
-                    ],
-                )
-            )
-        ),
+        provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace()),
     )
 
     with (
         pytest.deprecated_call(match="deduct_llm_quota\\(tenant_id=.*deprecated"),
-        patch("core.app.llm.quota.deduct_model_quota") as mock_deduct,
+        patch("core.app.llm.quota.deduct_llm_quota_for_model") as mock_deduct,
     ):
         deduct_llm_quota(
             tenant_id="tenant-id",
@@ -190,7 +172,26 @@ def test_deduct_llm_quota_wrapper_warns_and_delegates_with_model_type() -> None:
     mock_deduct.assert_called_once_with(
         tenant_id="tenant-id",
         provider="openai",
-        model_type=ModelType.LLM,
         model="gpt-4o",
-        used_quota=7,
+        usage=usage,
     )
+
+
+def test_deduct_llm_quota_wrapper_rejects_non_llm_model_instances() -> None:
+    usage = LLMUsage.empty_usage()
+    model_instance = SimpleNamespace(
+        provider="openai",
+        model_name="gpt-4o",
+        model_type_instance=SimpleNamespace(model_type=ModelType.TEXT_EMBEDDING),
+        provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace()),
+    )
+
+    with (
+        pytest.deprecated_call(match="deduct_llm_quota\\(tenant_id=.*deprecated"),
+        pytest.raises(ValueError, match="only support LLM model instances"),
+    ):
+        deduct_llm_quota(
+            tenant_id="tenant-id",
+            model_instance=model_instance,
+            usage=usage,
+        )