Capability-driven AI model routing with automatic failover
Evaluates whether active models should move to standby status. The evaluator is triggered after each request completion or on state change events such as quota exhaustion, error threshold breach, budget cap, or maintenance window entry. It inspects the current model snapshot and returns a boolean decision along with an optional reason code that is recorded in the model’s state history.
Depends on: ModelState, RotationPolicyService
from __future__ import annotations
from typing import Any, Optional
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
class DeactivationReason(Enum):
"""Reason code recorded when a model is moved to standby."""
ERROR_THRESHOLD = "error_threshold"
QUOTA_EXHAUSTED = "quota_exhausted"
BUDGET_EXCEEDED = "budget_exceeded"
TOKEN_LIMIT = "token_limit"
REQUEST_LIMIT = "request_limit"
MAINTENANCE_WINDOW = "maintenance_window"
MANUAL = "manual"
@dataclass
class ModelSnapshot:
"""Point-in-time view of a model's runtime state used for evaluation."""
model_id: str
provider_id: str
status: str
failure_count: int
error_rate: float
cooldown_remaining: float
quota_used: int
tokens_used: int
cost_accumulated: float
latency_history: list[float] = field(default_factory=list)
last_request: Optional[datetime] = None
last_failure: Optional[datetime] = None
deactivation_reason: Optional[str] = None
class DeactivationEvaluator:
"""Evaluates whether an active model should move to standby.
Triggered after each request or on state change (quota exhausted,
error threshold reached, budget exceeded, maintenance window entered).
"""
def should_deactivate(self, snapshot: ModelSnapshot) -> bool:
"""Return True if the model should move to standby.
Checks all configured thresholds (error rate, consecutive failures,
quota usage, token consumption, budget, and maintenance windows)
against the current model snapshot.
Args:
snapshot: Current point-in-time state of the model.
Returns:
True if any deactivation condition is met.
"""
...
def get_reason(self, snapshot: ModelSnapshot) -> Optional[DeactivationReason]:
"""Return the specific reason the model should be deactivated.
When multiple conditions are met simultaneously, returns the
highest-priority reason in the order: MAINTENANCE_WINDOW,
BUDGET_EXCEEDED, QUOTA_EXHAUSTED, ERROR_THRESHOLD, TOKEN_LIMIT,
REQUEST_LIMIT.
Args:
snapshot: Current point-in-time state of the model.
Returns:
The deactivation reason, or None if no condition is met.
"""
...
/** Reason code recorded when a model is moved to standby. */
enum DeactivationReason {
ERROR_THRESHOLD = "error_threshold",
QUOTA_EXHAUSTED = "quota_exhausted",
BUDGET_EXCEEDED = "budget_exceeded",
TOKEN_LIMIT = "token_limit",
REQUEST_LIMIT = "request_limit",
MAINTENANCE_WINDOW = "maintenance_window",
MANUAL = "manual",
}
/** Point-in-time view of a model's runtime state used for evaluation. */
interface ModelSnapshot {
model_id: string;
provider_id: string;
status: string;
failure_count: number;
error_rate: number;
cooldown_remaining: number;
quota_used: number;
tokens_used: number;
cost_accumulated: number;
latency_history: number[];
last_request?: Date;
last_failure?: Date;
deactivation_reason?: string;
}
/** Evaluates whether an active model should move to standby. */
class DeactivationEvaluator {
/**
* Return true if the model should move to standby.
*
* Checks all configured thresholds against the current model snapshot.
*/
shouldDeactivate(snapshot: ModelSnapshot): boolean {
throw new Error("Not implemented");
}
/**
* Return the specific reason the model should be deactivated.
*
* Returns null if no deactivation condition is met.
*/
getReason(snapshot: ModelSnapshot): DeactivationReason | null {
throw new Error("Not implemented");
}
}
Parameters configured per pool under the deactivation key. See SystemConfiguration.md – Pools for full YAML reference.
| Parameter | Type | Description |
|---|---|---|
deactivation.retry_limit |
integer | Consecutive failures before deactivation. |
deactivation.error_rate_threshold |
float | Error rate over sliding window (0.0–1.0) that triggers deactivation. |
deactivation.error_codes |
list | HTTP status codes that count toward deactivation (e.g., [429, 500, 503]). |
deactivation.request_limit |
integer | Maximum requests before deactivation (free-tier cap). |
deactivation.token_limit |
integer | Maximum tokens consumed before deactivation. |
deactivation.budget_limit |
number | Maximum spend in USD before deactivation. |
deactivation.quota_window |
string | Deactivate on quota period expiry: monthly, daily. |
deactivation.maintenance_window |
string | Scheduled deactivation expressed as a cron expression. |