From 33b3197be7c02ff03712090ff2e1c78d1f37657b Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 22 Apr 2026 14:29:51 +0800 Subject: [PATCH] refactor(api): simplify llm quota helpers Remove the temporary generic model-type quota helpers now that system-billed models are LLM-only. Keep the deprecated ModelInstance wrappers as LLM-specific adapters with explicit non-LLM guards and update the quota tests to match the narrower invariant. --- api/core/app/llm/__init__.py | 4 - api/core/app/llm/quota.py | 80 ++++-------- .../unit_tests/core/app/test_llm_quota.py | 115 +++++++++--------- 3 files changed, 81 insertions(+), 118 deletions(-) diff --git a/api/core/app/llm/__init__.py b/api/core/app/llm/__init__.py index 85f342de5d..d20a5b2344 100644 --- a/api/core/app/llm/__init__.py +++ b/api/core/app/llm/__init__.py @@ -3,17 +3,13 @@ from .quota import ( deduct_llm_quota, deduct_llm_quota_for_model, - deduct_model_quota, ensure_llm_quota_available, ensure_llm_quota_available_for_model, - ensure_model_quota_available, ) __all__ = [ "deduct_llm_quota", "deduct_llm_quota_for_model", - "deduct_model_quota", "ensure_llm_quota_available", "ensure_llm_quota_available_for_model", - "ensure_model_quota_available", ] diff --git a/api/core/app/llm/quota.py b/api/core/app/llm/quota.py index b66749a467..3793d0ed0f 100644 --- a/api/core/app/llm/quota.py +++ b/api/core/app/llm/quota.py @@ -1,8 +1,9 @@ -"""Tenant-scoped helpers for checking and deducting provider model quota. +"""Tenant-scoped helpers for checking and deducting LLM provider quota. -The public billing identity is ``tenant_id + provider + model_type + model``. -LLM callers still use thin adapters that compute quota usage from ``LLMUsage`` -so the workflow layer does not need to know generic billing details. +System-hosted quota accounting is currently defined only for LLM models. Keep +the public helpers LLM-specific so callers do not carry unused model-type +plumbing, and fail loudly if the deprecated ``ModelInstance`` wrappers are used +with a non-LLM model. """ import warnings @@ -33,14 +34,14 @@ def _get_provider_configuration(*, tenant_id: str, provider: str): return provider_configuration -def ensure_model_quota_available(*, tenant_id: str, provider: str, model_type: ModelType, model: str) -> None: - """Raise when a tenant-bound system provider model is already out of quota.""" +def ensure_llm_quota_available_for_model(*, tenant_id: str, provider: str, model: str) -> None: + """Raise when a tenant-bound LLM model is already out of quota.""" provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider) if provider_configuration.using_provider_type != ProviderType.SYSTEM: return provider_model = provider_configuration.get_provider_model( - model_type=model_type, + model_type=ModelType.LLM, model=model, ) if provider_model and provider_model.status == ModelStatus.QUOTA_EXCEEDED: @@ -71,14 +72,8 @@ def _resolve_llm_used_quota(*, system_configuration, model: str, usage: LLMUsage return used_quota -def _deduct_model_quota_with_configuration( - *, - tenant_id: str, - provider: str, - provider_configuration, - used_quota: int | None, -) -> None: - """Apply a resolved quota charge against the current provider quota bucket.""" +def _deduct_used_llm_quota(*, tenant_id: str, provider: str, provider_configuration, used_quota: int | None) -> None: + """Apply a resolved LLM quota charge against the current provider quota bucket.""" if provider_configuration.using_provider_type != ProviderType.SYSTEM: return @@ -120,36 +115,6 @@ def _deduct_model_quota_with_configuration( session.execute(stmt) -def deduct_model_quota( - *, - tenant_id: str, - provider: str, - model_type: ModelType, - model: str, - used_quota: int | None, -) -> None: - """Deduct quota for the resolved tenant/provider/model identity.""" - _ = model_type - _ = model - provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider) - _deduct_model_quota_with_configuration( - tenant_id=tenant_id, - provider=provider, - provider_configuration=provider_configuration, - used_quota=used_quota, - ) - - -def ensure_llm_quota_available_for_model(*, tenant_id: str, provider: str, model: str) -> None: - """Raise when a tenant-bound LLM model is already out of quota.""" - ensure_model_quota_available( - tenant_id=tenant_id, - provider=provider, - model_type=ModelType.LLM, - model=model, - ) - - def deduct_llm_quota_for_model(*, tenant_id: str, provider: str, model: str, usage: LLMUsage) -> None: """Deduct tenant-bound quota for the resolved LLM model identity.""" provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider) @@ -158,15 +123,20 @@ def deduct_llm_quota_for_model(*, tenant_id: str, provider: str, model: str, usa model=model, usage=usage, ) - deduct_model_quota( + _deduct_used_llm_quota( tenant_id=tenant_id, provider=provider, - model_type=ModelType.LLM, - model=model, + provider_configuration=provider_configuration, used_quota=used_quota, ) +def _require_llm_model_instance(model_instance: ModelInstance) -> None: + """Reject deprecated wrapper calls that pass a non-LLM model instance.""" + if model_instance.model_type_instance.model_type != ModelType.LLM: + raise ValueError("LLM quota helpers only support LLM model instances.") + + def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None: """Deprecated compatibility wrapper for callers that still pass ModelInstance.""" warnings.warn( @@ -175,10 +145,10 @@ def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None: DeprecationWarning, stacklevel=2, ) - ensure_model_quota_available( + _require_llm_model_instance(model_instance) + ensure_llm_quota_available_for_model( tenant_id=model_instance.provider_model_bundle.configuration.tenant_id, provider=model_instance.provider, - model_type=model_instance.model_type_instance.model_type, model=model_instance.model_name, ) @@ -191,14 +161,10 @@ def deduct_llm_quota(*, tenant_id: str, model_instance: ModelInstance, usage: LL DeprecationWarning, stacklevel=2, ) - deduct_model_quota( + _require_llm_model_instance(model_instance) + deduct_llm_quota_for_model( tenant_id=tenant_id, provider=model_instance.provider, - model_type=model_instance.model_type_instance.model_type, model=model_instance.model_name, - used_quota=_resolve_llm_used_quota( - system_configuration=model_instance.provider_model_bundle.configuration.system_configuration, - model=model_instance.model_name, - usage=usage, - ), + usage=usage, ) diff --git a/api/tests/unit_tests/core/app/test_llm_quota.py b/api/tests/unit_tests/core/app/test_llm_quota.py index bace6cde87..6abeb3e698 100644 --- a/api/tests/unit_tests/core/app/test_llm_quota.py +++ b/api/tests/unit_tests/core/app/test_llm_quota.py @@ -6,10 +6,8 @@ import pytest from core.app.llm.quota import ( deduct_llm_quota, deduct_llm_quota_for_model, - deduct_model_quota, ensure_llm_quota_available, ensure_llm_quota_available_for_model, - ensure_model_quota_available, ) from core.entities.model_entities import ModelStatus from core.entities.provider_entities import ProviderQuotaType, QuotaUnit @@ -19,7 +17,7 @@ from graphon.model_runtime.entities.model_entities import ModelType from models.provider import ProviderType -def test_ensure_model_quota_available_raises_when_system_model_is_exhausted() -> None: +def test_ensure_llm_quota_available_for_model_raises_when_system_model_is_exhausted() -> None: provider_configuration = SimpleNamespace( using_provider_type=ProviderType.SYSTEM, get_provider_model=MagicMock(return_value=SimpleNamespace(status=ModelStatus.QUOTA_EXCEEDED)), @@ -31,37 +29,21 @@ def test_ensure_model_quota_available_raises_when_system_model_is_exhausted() -> patch("core.app.llm.quota.create_plugin_provider_manager", return_value=provider_manager), pytest.raises(QuotaExceededError, match="Model provider openai quota exceeded."), ): - ensure_model_quota_available( - tenant_id="tenant-id", - provider="openai", - model_type=ModelType.TEXT_EMBEDDING, - model="gpt-4o", - ) - - provider_configuration.get_provider_model.assert_called_once_with( - model_type=ModelType.TEXT_EMBEDDING, - model="gpt-4o", - ) - - -def test_ensure_llm_quota_available_for_model_delegates_with_llm_model_type() -> None: - with patch("core.app.llm.quota.ensure_model_quota_available") as mock_ensure: ensure_llm_quota_available_for_model( tenant_id="tenant-id", provider="openai", model="gpt-4o", ) - mock_ensure.assert_called_once_with( - tenant_id="tenant-id", - provider="openai", + provider_configuration.get_provider_model.assert_called_once_with( model_type=ModelType.LLM, model="gpt-4o", ) -def test_deduct_model_quota_uses_identity_based_trial_billing() -> None: - +def test_deduct_llm_quota_for_model_uses_identity_based_trial_billing() -> None: + usage = LLMUsage.empty_usage() + usage.total_tokens = 42 provider_configuration = SimpleNamespace( using_provider_type=ProviderType.SYSTEM, system_configuration=SimpleNamespace( @@ -82,12 +64,11 @@ def test_deduct_model_quota_uses_identity_based_trial_billing() -> None: patch("core.app.llm.quota.create_plugin_provider_manager", return_value=provider_manager), patch("services.credit_pool_service.CreditPoolService.check_and_deduct_credits") as mock_deduct_credits, ): - deduct_model_quota( + deduct_llm_quota_for_model( tenant_id="tenant-id", provider="openai", - model_type=ModelType.TEXT_EMBEDDING, model="gpt-4o", - used_quota=42, + usage=usage, ) mock_deduct_credits.assert_called_once_with( @@ -96,10 +77,11 @@ def test_deduct_model_quota_uses_identity_based_trial_billing() -> None: ) -def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() -> None: +def test_deduct_llm_quota_for_model_reuses_resolved_provider_configuration_for_deduction() -> None: usage = LLMUsage.empty_usage() usage.total_tokens = 42 provider_configuration = SimpleNamespace( + using_provider_type=ProviderType.SYSTEM, system_configuration=SimpleNamespace( current_quota_type=ProviderQuotaType.TRIAL, quota_configurations=[ @@ -114,7 +96,7 @@ def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() -> with ( patch("core.app.llm.quota._get_provider_configuration", return_value=provider_configuration), - patch("core.app.llm.quota.deduct_model_quota") as mock_deduct, + patch("core.app.llm.quota._deduct_used_llm_quota") as mock_deduct, ): deduct_llm_quota_for_model( tenant_id="tenant-id", @@ -126,13 +108,33 @@ def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() -> mock_deduct.assert_called_once_with( tenant_id="tenant-id", provider="openai", - model_type=ModelType.LLM, - model="gpt-4o", + provider_configuration=provider_configuration, used_quota=42, ) -def test_ensure_llm_quota_available_wrapper_warns_and_delegates_with_model_type() -> None: +def test_ensure_llm_quota_available_wrapper_warns_and_delegates() -> None: + model_instance = SimpleNamespace( + provider="openai", + model_name="gpt-4o", + provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace(tenant_id="tenant-id")), + model_type_instance=SimpleNamespace(model_type=ModelType.LLM), + ) + + with ( + pytest.deprecated_call(match="ensure_llm_quota_available\\(model_instance=.*deprecated"), + patch("core.app.llm.quota.ensure_llm_quota_available_for_model") as mock_ensure, + ): + ensure_llm_quota_available(model_instance=model_instance) + + mock_ensure.assert_called_once_with( + tenant_id="tenant-id", + provider="openai", + model="gpt-4o", + ) + + +def test_ensure_llm_quota_available_wrapper_rejects_non_llm_model_instances() -> None: model_instance = SimpleNamespace( provider="openai", model_name="gpt-4o", @@ -142,44 +144,24 @@ def test_ensure_llm_quota_available_wrapper_warns_and_delegates_with_model_type( with ( pytest.deprecated_call(match="ensure_llm_quota_available\\(model_instance=.*deprecated"), - patch("core.app.llm.quota.ensure_model_quota_available") as mock_ensure, + pytest.raises(ValueError, match="only support LLM model instances"), ): ensure_llm_quota_available(model_instance=model_instance) - mock_ensure.assert_called_once_with( - tenant_id="tenant-id", - provider="openai", - model_type=ModelType.TEXT_EMBEDDING, - model="gpt-4o", - ) - -def test_deduct_llm_quota_wrapper_warns_and_delegates_with_model_type() -> None: +def test_deduct_llm_quota_wrapper_warns_and_delegates() -> None: usage = LLMUsage.empty_usage() usage.total_tokens = 7 model_instance = SimpleNamespace( provider="openai", model_name="gpt-4o", model_type_instance=SimpleNamespace(model_type=ModelType.LLM), - provider_model_bundle=SimpleNamespace( - configuration=SimpleNamespace( - system_configuration=SimpleNamespace( - current_quota_type=ProviderQuotaType.TRIAL, - quota_configurations=[ - SimpleNamespace( - quota_type=ProviderQuotaType.TRIAL, - quota_unit=QuotaUnit.TOKENS, - quota_limit=100, - ) - ], - ) - ) - ), + provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace()), ) with ( pytest.deprecated_call(match="deduct_llm_quota\\(tenant_id=.*deprecated"), - patch("core.app.llm.quota.deduct_model_quota") as mock_deduct, + patch("core.app.llm.quota.deduct_llm_quota_for_model") as mock_deduct, ): deduct_llm_quota( tenant_id="tenant-id", @@ -190,7 +172,26 @@ def test_deduct_llm_quota_wrapper_warns_and_delegates_with_model_type() -> None: mock_deduct.assert_called_once_with( tenant_id="tenant-id", provider="openai", - model_type=ModelType.LLM, model="gpt-4o", - used_quota=7, + usage=usage, ) + + +def test_deduct_llm_quota_wrapper_rejects_non_llm_model_instances() -> None: + usage = LLMUsage.empty_usage() + model_instance = SimpleNamespace( + provider="openai", + model_name="gpt-4o", + model_type_instance=SimpleNamespace(model_type=ModelType.TEXT_EMBEDDING), + provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace()), + ) + + with ( + pytest.deprecated_call(match="deduct_llm_quota\\(tenant_id=.*deprecated"), + pytest.raises(ValueError, match="only support LLM model instances"), + ): + deduct_llm_quota( + tenant_id="tenant-id", + model_instance=model_instance, + usage=usage, + )