Configuration guide for LiteLLM proxy server.
litellm_config.yaml)model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
model_list:
# OpenAI
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
rpm: 100 # Rate limit: requests per minute
# Anthropic
- model_name: claude-sonnet
litellm_params:
model: anthropic/claude-sonnet-4-20250514
api_key: os.environ/ANTHROPIC_API_KEY
rpm: 60
# Azure OpenAI
- model_name: azure-gpt-4o
litellm_params:
model: azure/gpt-4o
api_base: https://your-endpoint.openai.azure.com/
api_key: os.environ/AZURE_API_KEY
api_version: "2025-01-01-preview"
rpm: 50
# Google Vertex AI
- model_name: gemini-pro
litellm_params:
model: vertex_ai/gemini-pro
vertex_project: your-project-id
vertex_location: us-central1
# General settings
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
database_url: os.environ/DATABASE_URL
# LiteLLM settings
litellm_settings:
set_verbose: true
drop_params: true
# Router settings (for load balancing)
router_settings:
routing_strategy: simple-shuffle
redis_host: os.environ/REDIS_HOST
redis_port: 6379
redis_password: os.environ/REDIS_PASSWORD
# API Keys
export OPENAI_API_KEY="sk-..."
export ANTHROPIC_API_KEY="sk-ant-..."
export AZURE_API_KEY="..."
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json"
# LiteLLM Settings
export LITELLM_MASTER_KEY="sk-1234"
export LITELLM_SALT_KEY="sk-5678"
export DATABASE_URL="postgresql://user:pass@host:5432/dbname"
# Redis (for caching and load balancing)
export REDIS_HOST="localhost"
export REDIS_PASSWORD="your-password"
# Optional
export LITELLM_LOCAL_MODEL_COST_MAP="True" # Faster cold start
export KEEPALIVE_TIMEOUT="75"
export MAX_REQUESTS_BEFORE_RESTART="10000"
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
rpm: 100 # Requests per minute
tpm: 100000 # Tokens per minute
- model_name: claude-sonnet
litellm_params:
model: anthropic/claude-sonnet
api_key: os.environ/ANTHROPIC_API_KEY
rpm: 60
tpm: 50000
litellm_settings:
cache: true
cache_params:
type: redis
host: your-redis-host
port: 6379
password: os.environ/REDIS_PASSWORD
router_settings:
redis_host: your-redis-host
redis_password: os.environ/REDIS_PASSWORD
redis_port: 6379
# Enable transaction buffer for high throughput
general_settings:
use_redis_transaction_buffer: true
model_list:
# Multiple deployments of the same model
- model_name: gpt-4o
litellm_params:
model: azure/gpt-4o-ca
api_base: https://canada-endpoint.openai.azure.com/
api_key: os.environ/CA_AZURE_KEY
rpm: 100
- model_name: gpt-4o
litellm_params:
model: azure/gpt-4o-us
api_base: https://us-endpoint.openai.azure.com/
api_key: os.environ/US_AZURE_KEY
rpm: 100
router_settings:
routing_strategy: simple-shuffle # or: least-busy, latency-based
redis_host: your-redis-host
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
- model_name: gpt-4o-fallback
litellm_params:
model: anthropic/claude-sonnet
api_key: os.environ/ANTHROPIC_API_KEY
litellm_settings:
fallbacks:
- "gpt-4o": ["gpt-4o-fallback"]
allowed_fallbacks:
- "gpt-4o": ["gpt-4o-fallback"]
# Generate self-signed cert (for testing)
openssl req -x509 -newkey rsa:4096 -keyout keyfile.key -out certfile.crt -days 365 -nodes
Start with SSL:
litellm --config litellm_config.yaml \
--ssl_keyfile_path ssl_test/keyfile.key \
--ssl_certfile_path ssl_test/certfile.crt
# Set number of workers
litellm --config litellm_config.yaml --num_workers 8
# Via CLI
litellm --config litellm_config.yaml --keepalive_timeout 75
# Via environment
export KEEPALIVE_TIMEOUT=75
# Enable verbose logging
litellm --config litellm_config.yaml --detailed_debug