Complete Docker deployment guide for LiteLLM proxy server.
| Image | Tag | Use Case |
|---|---|---|
docker.litellm.ai/berriai/litellm |
main-stable |
Recommended - Stable release |
docker.litellm.ai/berriai/litellm |
main-latest |
Latest build |
docker.litellm.ai/berriai/litellm-database |
main-stable |
With database support |
docker.litellm.ai/berriai/litellm-non_root |
main-stable |
Non-root user |
Create litellm_config.yaml:
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
docker run -d \
--name litellm-proxy \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
-e OPENAI_API_KEY="sk-your-openai-api-key" \
-p 4000:4000 \
docker.litellm.ai/berriai/litellm:main-stable \
--config /app/config.yaml
curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-d '{
"model": "gpt-4o",
"messages": [{"role": "user", "content": "Hello!"}]
}'
docker logs -f litellm-proxy
curl -O https://raw.githubusercontent.com/BerriAI/litellm/main/docker-compose.yml
curl -O https://raw.githubusercontent.com/BerriAI/litellm/main/prometheus.yml
cat > .env << EOF
LITELLM_MASTER_KEY="sk-1234"
LITELLM_SALT_KEY="sk-5678"
OPENAI_API_KEY="sk-your-openai-api-key"
EOF
⚠️ Important: Use strong random keys for
LITELLM_SALT_KEY. Use a password generator.
Create litellm_config.yaml:
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
docker compose up -d
docker compose ps
curl http://localhost:4000/health/liveliness
version: "3.9"
services:
litellm:
image: docker.litellm.ai/berriai/litellm:main-stable
ports:
- "4000:4000"
volumes:
- ./litellm_config.yaml:/app/config.yaml
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY}
- DATABASE_URL=${DATABASE_URL}
command:
- "--config"
- "/app/config.yaml"
- "--port"
- "4000"
- "--num_workers"
- "8"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4000/health/liveliness"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# Optional: Redis for caching and rate limiting
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis-data:/data
command: redis-server --appendonly yes
restart: unless-stopped
# Optional: PostgreSQL for analytics
postgres:
image: postgres:15-alpine
environment:
POSTGRES_USER: litellm
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: litellm
volumes:
- postgres-data:/var/lib/postgresql/data
ports:
- "5432:5432"
restart: unless-stopped
volumes:
redis-data:
postgres-data:
Create litellm_config.yaml:
model_list:
# OpenAI
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
# Anthropic
- model_name: claude-sonnet
litellm_params:
model: anthropic/claude-sonnet-4-20250514
api_key: os.environ/ANTHROPIC_API_KEY
# Azure OpenAI
- model_name: azure-gpt-4o
litellm_params:
model: azure/gpt-4o
api_base: https://your-endpoint.openai.azure.com/
api_key: os.environ/AZURE_API_KEY
api_version: "2025-01-01-preview"
# Google Vertex AI
- model_name: gemini-pro
litellm_params:
model: vertex_ai/gemini-pro
vertex_project: your-project
vertex_location: us-central1
# Redis configuration (for caching and load balancing)
router_settings:
redis_host: redis
redis_password: ${REDIS_PASSWORD}
redis_port: 6379
# Enable caching
litellm_settings:
cache: true
cache_params:
type: redis
host: redis
port: 6379
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
For production with analytics and key management:
Use a managed PostgreSQL service (Supabase, Neon, RDS, etc.):
DATABASE_URL="postgresql://user:password@host:port/dbname"
docker run -d \
--name litellm-db \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
-e LITELLM_MASTER_KEY="sk-1234" \
-e DATABASE_URL="postgresql://user:pass@host:5432/dbname" \
-e OPENAI_API_KEY="sk-..." \
-p 4000:4000 \
docker.litellm.ai/berriai/litellm-database:main-stable \
--config /app/config.yaml
docker run -d \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
-v $(pwd)/ssl:/app/ssl \
-e OPENAI_API_KEY="sk-..." \
-p 443:4000 \
docker.litellm.ai/berriai/litellm:main-stable \
--config /app/config.yaml \
--ssl_keyfile_path /app/ssl/keyfile.key \
--ssl_certfile_path /app/ssl/certfile.crt
Create custom Dockerfile:
FROM docker.litellm.ai/berriai/litellm:main-stable
WORKDIR /app
COPY config.yaml .
# Install hypercorn for HTTP/2
RUN pip install hypercorn
EXPOSE 4000/tcp
CMD ["--port", "4000", "--config", "config.yaml", "--run_hypercorn"]
docker run -d \
-e MAX_REQUESTS_BEFORE_RESTART=10000 \
docker.litellm.ai/berriai/litellm:main-stable \
--config /app/config.yaml
# GCS Bucket
docker run -d \
-e DATABASE_URL="postgresql://..." \
-e LITELLM_CONFIG_BUCKET_NAME="litellm-proxy" \
-e LITELLM_CONFIG_BUCKET_OBJECT_KEY="proxy_config.yaml" \
-e LITELLM_CONFIG_BUCKET_TYPE="gcs" \
-p 4000:4000 \
docker.litellm.ai/berriai/litellm-database:main-stable
# S3 Bucket
docker run -d \
-e DATABASE_URL="postgresql://..." \
-e LITELLM_CONFIG_BUCKET_NAME="litellm-proxy" \
-e LITELLM_CONFIG_BUCKET_OBJECT_KEY="proxy_config.yaml" \
-p 4000:4000 \
docker.litellm.ai/berriai/litellm-database:main-stable
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-deployment
spec:
replicas: 3
selector:
matchLabels:
app: litellm
template:
metadata:
labels:
app: litellm
spec:
containers:
- name: litellm
image: docker.litellm.ai/berriai/litellm:main-stable
imagePullPolicy: Always
env:
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: litellm-secrets
key: openai-api-key
- name: LITELLM_MASTER_KEY
valueFrom:
secretKeyRef:
name: litellm-secrets
key: master-key
args:
- "--config"
- "/app/config.yaml"
volumeMounts:
- name: config-volume
mountPath: /app/config.yaml
subPath: config.yaml
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
volumes:
- name: config-volume
configMap:
name: litellm-config
apiVersion: v1
kind: Service
metadata:
name: litellm-service
spec:
selector:
app: litellm
ports:
- protocol: TCP
port: 4000
targetPort: 4000
type: LoadBalancer
kubectl apply -f deployment.yaml
kubectl apply -f service.yaml
# Pull the chart
helm pull oci://docker.litellm.ai/berriai/litellm-helm
# Extract
tar -zxvf litellm-helm-0.1.2.tgz
# Install
helm install lite-helm ./litellm-helm
# Install with custom master key
helm install --set masterkey=sk-1234 mydeploy ./litellm-helm
# Expose
kubectl port-forward service/mydeploy-litellm-helm 4000:4000
| Endpoint | Purpose |
|---|---|
GET /health/liveliness |
Liveness probe |
GET /health/readiness |
Readiness probe |
Example:
curl http://localhost:4000/health/liveliness
curl http://localhost:4000/health/readiness
| Setting | Recommendation |
|---|---|
| CPU | Minimum 4 cores |
| RAM | Minimum 8 GB |
| Image Tag | Use versioned tags instead of main-stable |
| Database | PostgreSQL required for production |
| Redis | Required for 1000+ RPS |
| Workers | Set --num_workers based on CPU cores |
| Debug Mode | Disable --detailed_debug in production |
| Replicas | Run 3+ replicas for high availability |
# Check logs
docker logs litellm-proxy
# Verify config file
docker exec litellm-proxy cat /app/config.yaml
# Check if port is in use
netstat -tlnp | grep 4000
# Check container is running
docker ps | grep litellm
# Enable detailed debug
docker run ... docker.litellm.ai/berriai/litellm:main-stable --detailed_debug
# Check Redis connection
docker exec litellm-proxy redis-cli -h redis ping