Capability-driven AI model routing with automatic failover
Get productive with ModelMesh in 5 minutes. This guide covers everything a developer needs for day-to-day usage: making requests, handling errors, testing, debugging routing, and tracking costs. For the full YAML configuration reference, see the configuration docs.
import modelmesh
client = modelmesh.create("chat-completion")
response = client.chat.completions.create(
model="chat-completion",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
import { create } from '@nistrapa/modelmesh-core';
const client = create('chat-completion');
const response = await client.chat.completions.create({
model: 'chat-completion',
messages: [{ role: 'user', content: 'Hello!' }],
});
console.log(response.choices[0].message?.content);
The model parameter is a virtual model name — it maps to a capability pool, not a specific provider model. ModelMesh routes it to the best available provider automatically.
Always use a context manager for production code to ensure clean shutdown:
import modelmesh
# Sync
with modelmesh.create("chat-completion") as client:
response = client.chat.completions.create(
model="chat-completion",
messages=[{"role": "user", "content": "Hello!"}],
)
# shutdown() called automatically
# Async
async with modelmesh.create("chat-completion") as client:
response = client.chat.completions.create(
model="chat-completion",
messages=[{"role": "user", "content": "Hello!"}],
)
const client = create('chat-completion');
try {
const response = await client.chat.completions.create({ ... });
} finally {
client.close();
}
ModelMesh provides structured exceptions so you can handle specific failure modes:
from modelmesh.exceptions import (
ModelMeshError,
NoActiveModelError,
RateLimitError,
AllProvidersExhaustedError,
BudgetExceededError,
)
try:
response = client.chat.completions.create(
model="chat-completion",
messages=[{"role": "user", "content": "Hello"}],
)
except RateLimitError as e:
print(f"Rate limited by {e.provider_id}, retry in {e.retry_after}s")
except NoActiveModelError:
print("No models available right now")
except AllProvidersExhaustedError as e:
print(f"All {e.attempts} attempts failed: {e.last_error}")
except BudgetExceededError as e:
print(f"Over budget: {e.limit_type} limit ${e.limit_value}")
except ModelMeshError as e:
if e.retryable:
print("Transient error, safe to retry")
else:
print(f"Permanent error: {e}")
import {
ModelMeshError,
RateLimitError,
NoActiveModelError,
} from '@nistrapa/modelmesh-core';
try {
const response = await client.chat.completions.create({ ... });
} catch (e) {
if (e instanceof RateLimitError) {
console.log(`Rate limited, retry in ${e.retryAfter}s`);
} else if (e instanceof NoActiveModelError) {
console.log('No models available');
} else if (e instanceof ModelMeshError) {
console.log(`Error (retryable: ${e.retryable}): ${e.message}`);
}
}
See Error Handling Guide for the full exception hierarchy.
Add logging, transforms, or caching without modifying library internals:
from modelmesh import Middleware
class LoggingMiddleware(Middleware):
async def before_request(self, request, context):
print(f"→ {context.model_id} via {context.provider_id}")
return request
async def after_response(self, response, context):
print(f"← {response.usage.total_tokens} tokens")
return response
client = modelmesh.create("chat", middleware=[LoggingMiddleware()])
import { Middleware, create } from '@nistrapa/modelmesh-core';
class LoggingMiddleware extends Middleware {
async beforeRequest(request, context) {
console.log(`→ ${context.modelId} via ${context.providerId}`);
return request;
}
async afterResponse(response, context) {
console.log(`← ${response.usage?.totalTokens} tokens`);
return response;
}
}
const client = create('chat', { middleware: [new LoggingMiddleware()] });
See Middleware Guide for advanced patterns.
Use the built-in mock client for unit tests — no API keys needed:
from modelmesh.testing import mock_client, MockResponse
client = mock_client(responses=[
MockResponse(content="Hello!", model="gpt-4o", tokens=10),
])
response = client.chat.completions.create(
model="chat-completion",
messages=[{"role": "user", "content": "Hi"}],
)
assert response.choices[0].message.content == "Hello!"
assert len(client.calls) == 1
assert client.calls[0].messages[0]["content"] == "Hi"
import { mockClient } from '@nistrapa/modelmesh-core/testing';
const client = mockClient({
responses: [{ content: 'Hello!', model: 'gpt-4o', tokens: 10 }],
});
const response = await client.chat.completions.create({
model: 'chat-completion',
messages: [{ role: 'user', content: 'Hi' }],
});
expect(response.choices[0].message?.content).toBe('Hello!');
expect(client.calls.length).toBe(1);
See Testing Guide for full mock client API.
Debug routing decisions without making actual API calls:
explanation = client.explain(model="chat-completion")
print(explanation["pool_name"]) # "chat-completion"
print(explanation["strategy"]) # "stick-until-failure"
print(explanation["selected_model"]) # "gpt-4o"
print(explanation["candidates"]) # [CandidateInfo(...), ...]
print(explanation["reason"]) # Why this model was selected
const explanation = client.explain({ model: 'chat-completion' });
console.log(explanation.poolName); // "chat-completion"
console.log(explanation.strategy); // "stick-until-failure"
console.log(explanation.selectedModel); // "gpt-4o"
Don’t memorize capability paths — use the discovery API:
import modelmesh
# List all capabilities
caps = modelmesh.capabilities.list_all()
# ['chat-completion', 'code-generation', 'image-to-text', ...]
# Resolve alias to full path
path = modelmesh.capabilities.resolve("chat-completion")
# 'generation.text-generation.chat-completion'
# Search by keyword
matches = modelmesh.capabilities.search("text")
# ['text-embeddings', 'text-generation', 'text-to-image', 'text-to-speech']
# View hierarchy
tree = modelmesh.capabilities.tree()
# {'generation': {'text-generation': {'chat-completion': {}, ...}, ...}}
import * as capabilities from '@nistrapa/modelmesh-core/capabilities';
const caps = capabilities.listAll();
const path = capabilities.resolve('chat-completion');
const matches = capabilities.search('text');
const tree = capabilities.tree();
Monitor costs and tokens in real time:
client = modelmesh.create("chat")
# ... after some requests ...
print(f"Total cost: ${client.usage.total_cost:.4f}")
print(f"Total tokens: {client.usage.total_tokens}")
# Breakdown by model
for model_id, usage in client.usage.by_model.items():
print(f" {model_id}: ${usage.total_cost:.4f}")
# Check budget
status = client.usage.budget_status
if status and status.exceeded:
print("Budget exceeded!")
console.log(`Total cost: $${client.usage.totalCost.toFixed(4)}`);
console.log(`Total tokens: ${client.usage.totalTokens}`);
See what’s behind the virtual model:
# Human-readable summary
print(client.describe())
# Pool "chat-completion" (strategy: stick-until-failure)
# → gpt-4o [openai.llm.v1] (active)
# claude-sonnet-4 [anthropic.claude.v1] (active)
# Structured pool status
status = client.pool_status()
# {'chat-completion': {'active': 2, 'standby': 0, 'total': 2, ...}}
# Provider list
providers = client.active_providers()
# ['openai.llm.v1', 'anthropic.claude.v1']
| Task | Python | TypeScript |
|---|---|---|
| Create client | modelmesh.create("chat") |
create('chat') |
| Chat completion | client.chat.completions.create(...) |
client.chat.completions.create(...) |
| Streaming | stream=True |
stream: true |
| Context manager | with client: / async with client: |
client.close() |
| Error handling | except ModelMeshError |
catch (e) { if (e instanceof ModelMeshError) } |
| Middleware | middleware=[MyMiddleware()] |
middleware: [new MyMiddleware()] |
| Mock testing | mock_client(responses=[...]) |
mockClient({ responses: [...] }) |
| Explain routing | client.explain(model="...") |
client.explain({ model: '...' }) |
| Capabilities | modelmesh.capabilities.list_all() |
capabilities.listAll() |
| Usage tracking | client.usage.total_cost |
client.usage.totalCost |
| Pool status | client.pool_status() |
client.poolStatus() |
| Describe | client.describe() |
client.describe() |