refactor(api): simplify llm quota helpers

Remove the temporary generic model-type quota helpers now that system-billed models are LLM-only.

Keep the deprecated ModelInstance wrappers as LLM-specific adapters with explicit non-LLM guards and update the quota tests to match the narrower invariant.
This commit is contained in:
-LAN- 2026-04-22 14:29:51 +08:00
parent 53892af1f9
commit 33b3197be7
No known key found for this signature in database
GPG Key ID: 6BA0D108DED011FF
3 changed files with 81 additions and 118 deletions

View File

@ -3,17 +3,13 @@
from .quota import (
deduct_llm_quota,
deduct_llm_quota_for_model,
deduct_model_quota,
ensure_llm_quota_available,
ensure_llm_quota_available_for_model,
ensure_model_quota_available,
)
__all__ = [
"deduct_llm_quota",
"deduct_llm_quota_for_model",
"deduct_model_quota",
"ensure_llm_quota_available",
"ensure_llm_quota_available_for_model",
"ensure_model_quota_available",
]

View File

@ -1,8 +1,9 @@
"""Tenant-scoped helpers for checking and deducting provider model quota.
"""Tenant-scoped helpers for checking and deducting LLM provider quota.
The public billing identity is ``tenant_id + provider + model_type + model``.
LLM callers still use thin adapters that compute quota usage from ``LLMUsage``
so the workflow layer does not need to know generic billing details.
System-hosted quota accounting is currently defined only for LLM models. Keep
the public helpers LLM-specific so callers do not carry unused model-type
plumbing, and fail loudly if the deprecated ``ModelInstance`` wrappers are used
with a non-LLM model.
"""
import warnings
@ -33,14 +34,14 @@ def _get_provider_configuration(*, tenant_id: str, provider: str):
return provider_configuration
def ensure_model_quota_available(*, tenant_id: str, provider: str, model_type: ModelType, model: str) -> None:
"""Raise when a tenant-bound system provider model is already out of quota."""
def ensure_llm_quota_available_for_model(*, tenant_id: str, provider: str, model: str) -> None:
"""Raise when a tenant-bound LLM model is already out of quota."""
provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider)
if provider_configuration.using_provider_type != ProviderType.SYSTEM:
return
provider_model = provider_configuration.get_provider_model(
model_type=model_type,
model_type=ModelType.LLM,
model=model,
)
if provider_model and provider_model.status == ModelStatus.QUOTA_EXCEEDED:
@ -71,14 +72,8 @@ def _resolve_llm_used_quota(*, system_configuration, model: str, usage: LLMUsage
return used_quota
def _deduct_model_quota_with_configuration(
*,
tenant_id: str,
provider: str,
provider_configuration,
used_quota: int | None,
) -> None:
"""Apply a resolved quota charge against the current provider quota bucket."""
def _deduct_used_llm_quota(*, tenant_id: str, provider: str, provider_configuration, used_quota: int | None) -> None:
"""Apply a resolved LLM quota charge against the current provider quota bucket."""
if provider_configuration.using_provider_type != ProviderType.SYSTEM:
return
@ -120,36 +115,6 @@ def _deduct_model_quota_with_configuration(
session.execute(stmt)
def deduct_model_quota(
*,
tenant_id: str,
provider: str,
model_type: ModelType,
model: str,
used_quota: int | None,
) -> None:
"""Deduct quota for the resolved tenant/provider/model identity."""
_ = model_type
_ = model
provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider)
_deduct_model_quota_with_configuration(
tenant_id=tenant_id,
provider=provider,
provider_configuration=provider_configuration,
used_quota=used_quota,
)
def ensure_llm_quota_available_for_model(*, tenant_id: str, provider: str, model: str) -> None:
"""Raise when a tenant-bound LLM model is already out of quota."""
ensure_model_quota_available(
tenant_id=tenant_id,
provider=provider,
model_type=ModelType.LLM,
model=model,
)
def deduct_llm_quota_for_model(*, tenant_id: str, provider: str, model: str, usage: LLMUsage) -> None:
"""Deduct tenant-bound quota for the resolved LLM model identity."""
provider_configuration = _get_provider_configuration(tenant_id=tenant_id, provider=provider)
@ -158,15 +123,20 @@ def deduct_llm_quota_for_model(*, tenant_id: str, provider: str, model: str, usa
model=model,
usage=usage,
)
deduct_model_quota(
_deduct_used_llm_quota(
tenant_id=tenant_id,
provider=provider,
model_type=ModelType.LLM,
model=model,
provider_configuration=provider_configuration,
used_quota=used_quota,
)
def _require_llm_model_instance(model_instance: ModelInstance) -> None:
"""Reject deprecated wrapper calls that pass a non-LLM model instance."""
if model_instance.model_type_instance.model_type != ModelType.LLM:
raise ValueError("LLM quota helpers only support LLM model instances.")
def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None:
"""Deprecated compatibility wrapper for callers that still pass ModelInstance."""
warnings.warn(
@ -175,10 +145,10 @@ def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None:
DeprecationWarning,
stacklevel=2,
)
ensure_model_quota_available(
_require_llm_model_instance(model_instance)
ensure_llm_quota_available_for_model(
tenant_id=model_instance.provider_model_bundle.configuration.tenant_id,
provider=model_instance.provider,
model_type=model_instance.model_type_instance.model_type,
model=model_instance.model_name,
)
@ -191,14 +161,10 @@ def deduct_llm_quota(*, tenant_id: str, model_instance: ModelInstance, usage: LL
DeprecationWarning,
stacklevel=2,
)
deduct_model_quota(
_require_llm_model_instance(model_instance)
deduct_llm_quota_for_model(
tenant_id=tenant_id,
provider=model_instance.provider,
model_type=model_instance.model_type_instance.model_type,
model=model_instance.model_name,
used_quota=_resolve_llm_used_quota(
system_configuration=model_instance.provider_model_bundle.configuration.system_configuration,
model=model_instance.model_name,
usage=usage,
),
usage=usage,
)

View File

@ -6,10 +6,8 @@ import pytest
from core.app.llm.quota import (
deduct_llm_quota,
deduct_llm_quota_for_model,
deduct_model_quota,
ensure_llm_quota_available,
ensure_llm_quota_available_for_model,
ensure_model_quota_available,
)
from core.entities.model_entities import ModelStatus
from core.entities.provider_entities import ProviderQuotaType, QuotaUnit
@ -19,7 +17,7 @@ from graphon.model_runtime.entities.model_entities import ModelType
from models.provider import ProviderType
def test_ensure_model_quota_available_raises_when_system_model_is_exhausted() -> None:
def test_ensure_llm_quota_available_for_model_raises_when_system_model_is_exhausted() -> None:
provider_configuration = SimpleNamespace(
using_provider_type=ProviderType.SYSTEM,
get_provider_model=MagicMock(return_value=SimpleNamespace(status=ModelStatus.QUOTA_EXCEEDED)),
@ -31,37 +29,21 @@ def test_ensure_model_quota_available_raises_when_system_model_is_exhausted() ->
patch("core.app.llm.quota.create_plugin_provider_manager", return_value=provider_manager),
pytest.raises(QuotaExceededError, match="Model provider openai quota exceeded."),
):
ensure_model_quota_available(
tenant_id="tenant-id",
provider="openai",
model_type=ModelType.TEXT_EMBEDDING,
model="gpt-4o",
)
provider_configuration.get_provider_model.assert_called_once_with(
model_type=ModelType.TEXT_EMBEDDING,
model="gpt-4o",
)
def test_ensure_llm_quota_available_for_model_delegates_with_llm_model_type() -> None:
with patch("core.app.llm.quota.ensure_model_quota_available") as mock_ensure:
ensure_llm_quota_available_for_model(
tenant_id="tenant-id",
provider="openai",
model="gpt-4o",
)
mock_ensure.assert_called_once_with(
tenant_id="tenant-id",
provider="openai",
provider_configuration.get_provider_model.assert_called_once_with(
model_type=ModelType.LLM,
model="gpt-4o",
)
def test_deduct_model_quota_uses_identity_based_trial_billing() -> None:
def test_deduct_llm_quota_for_model_uses_identity_based_trial_billing() -> None:
usage = LLMUsage.empty_usage()
usage.total_tokens = 42
provider_configuration = SimpleNamespace(
using_provider_type=ProviderType.SYSTEM,
system_configuration=SimpleNamespace(
@ -82,12 +64,11 @@ def test_deduct_model_quota_uses_identity_based_trial_billing() -> None:
patch("core.app.llm.quota.create_plugin_provider_manager", return_value=provider_manager),
patch("services.credit_pool_service.CreditPoolService.check_and_deduct_credits") as mock_deduct_credits,
):
deduct_model_quota(
deduct_llm_quota_for_model(
tenant_id="tenant-id",
provider="openai",
model_type=ModelType.TEXT_EMBEDDING,
model="gpt-4o",
used_quota=42,
usage=usage,
)
mock_deduct_credits.assert_called_once_with(
@ -96,10 +77,11 @@ def test_deduct_model_quota_uses_identity_based_trial_billing() -> None:
)
def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() -> None:
def test_deduct_llm_quota_for_model_reuses_resolved_provider_configuration_for_deduction() -> None:
usage = LLMUsage.empty_usage()
usage.total_tokens = 42
provider_configuration = SimpleNamespace(
using_provider_type=ProviderType.SYSTEM,
system_configuration=SimpleNamespace(
current_quota_type=ProviderQuotaType.TRIAL,
quota_configurations=[
@ -114,7 +96,7 @@ def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() ->
with (
patch("core.app.llm.quota._get_provider_configuration", return_value=provider_configuration),
patch("core.app.llm.quota.deduct_model_quota") as mock_deduct,
patch("core.app.llm.quota._deduct_used_llm_quota") as mock_deduct,
):
deduct_llm_quota_for_model(
tenant_id="tenant-id",
@ -126,13 +108,33 @@ def test_deduct_llm_quota_for_model_delegates_with_llm_model_type_and_usage() ->
mock_deduct.assert_called_once_with(
tenant_id="tenant-id",
provider="openai",
model_type=ModelType.LLM,
model="gpt-4o",
provider_configuration=provider_configuration,
used_quota=42,
)
def test_ensure_llm_quota_available_wrapper_warns_and_delegates_with_model_type() -> None:
def test_ensure_llm_quota_available_wrapper_warns_and_delegates() -> None:
model_instance = SimpleNamespace(
provider="openai",
model_name="gpt-4o",
provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace(tenant_id="tenant-id")),
model_type_instance=SimpleNamespace(model_type=ModelType.LLM),
)
with (
pytest.deprecated_call(match="ensure_llm_quota_available\\(model_instance=.*deprecated"),
patch("core.app.llm.quota.ensure_llm_quota_available_for_model") as mock_ensure,
):
ensure_llm_quota_available(model_instance=model_instance)
mock_ensure.assert_called_once_with(
tenant_id="tenant-id",
provider="openai",
model="gpt-4o",
)
def test_ensure_llm_quota_available_wrapper_rejects_non_llm_model_instances() -> None:
model_instance = SimpleNamespace(
provider="openai",
model_name="gpt-4o",
@ -142,44 +144,24 @@ def test_ensure_llm_quota_available_wrapper_warns_and_delegates_with_model_type(
with (
pytest.deprecated_call(match="ensure_llm_quota_available\\(model_instance=.*deprecated"),
patch("core.app.llm.quota.ensure_model_quota_available") as mock_ensure,
pytest.raises(ValueError, match="only support LLM model instances"),
):
ensure_llm_quota_available(model_instance=model_instance)
mock_ensure.assert_called_once_with(
tenant_id="tenant-id",
provider="openai",
model_type=ModelType.TEXT_EMBEDDING,
model="gpt-4o",
)
def test_deduct_llm_quota_wrapper_warns_and_delegates_with_model_type() -> None:
def test_deduct_llm_quota_wrapper_warns_and_delegates() -> None:
usage = LLMUsage.empty_usage()
usage.total_tokens = 7
model_instance = SimpleNamespace(
provider="openai",
model_name="gpt-4o",
model_type_instance=SimpleNamespace(model_type=ModelType.LLM),
provider_model_bundle=SimpleNamespace(
configuration=SimpleNamespace(
system_configuration=SimpleNamespace(
current_quota_type=ProviderQuotaType.TRIAL,
quota_configurations=[
SimpleNamespace(
quota_type=ProviderQuotaType.TRIAL,
quota_unit=QuotaUnit.TOKENS,
quota_limit=100,
)
],
)
)
),
provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace()),
)
with (
pytest.deprecated_call(match="deduct_llm_quota\\(tenant_id=.*deprecated"),
patch("core.app.llm.quota.deduct_model_quota") as mock_deduct,
patch("core.app.llm.quota.deduct_llm_quota_for_model") as mock_deduct,
):
deduct_llm_quota(
tenant_id="tenant-id",
@ -190,7 +172,26 @@ def test_deduct_llm_quota_wrapper_warns_and_delegates_with_model_type() -> None:
mock_deduct.assert_called_once_with(
tenant_id="tenant-id",
provider="openai",
model_type=ModelType.LLM,
model="gpt-4o",
used_quota=7,
usage=usage,
)
def test_deduct_llm_quota_wrapper_rejects_non_llm_model_instances() -> None:
usage = LLMUsage.empty_usage()
model_instance = SimpleNamespace(
provider="openai",
model_name="gpt-4o",
model_type_instance=SimpleNamespace(model_type=ModelType.TEXT_EMBEDDING),
provider_model_bundle=SimpleNamespace(configuration=SimpleNamespace()),
)
with (
pytest.deprecated_call(match="deduct_llm_quota\\(tenant_id=.*deprecated"),
pytest.raises(ValueError, match="only support LLM model instances"),
):
deduct_llm_quota(
tenant_id="tenant-id",
model_instance=model_instance,
usage=usage,
)