This guide covers monitoring dnsdist with Prometheus, Grafana, the built-in web interface, and other tools.
dnsdist provides multiple monitoring options:
Lua:
-- Basic web interface
webserver("127.0.0.1:8083", "secure_password")
-- With ACL
setACL({"127.0.0.0/8", "::1/128"})
YAML:
webserver:
address: "127.0.0.1:8083"
password: "secure_password"
acl:
- "127.0.0.0/8"
- "::1/128"
http://localhost:8083/
Default credentials:
dnsdistwebserver()# Get server statistics
curl -u dnsdist:password \
http://localhost:8083/api/v1/servers/localhost/statistics
# Get backend status
curl -u dnsdist:password \
http://localhost:8083/api/v1/servers/localhost/servers
# Get active rules
curl -u dnsdist:password \
http://localhost:8083/api/v1/servers/localhost/rules
Metrics are automatically available via the web interface at /metrics.
Lua:
webserver("0.0.0.0:8083", "password")
YAML:
webserver:
address: "0.0.0.0:8083"
password: "password"
# Access metrics endpoint
curl http://localhost:8083/metrics
# View specific metrics
curl http://localhost:8083/metrics | grep dnsdist_queries
# Continuous monitoring
watch -n 1 'curl -s http://localhost:8083/metrics | grep -E "^dnsdist_(queries|responses)_total"'
| Metric | Type | Description |
|---|---|---|
dnsdist_queries_total |
Counter | Total queries received |
dnsdist_queries_udp |
Counter | Queries over UDP |
dnsdist_queries_tcp |
Counter | Queries over TCP |
dnsdist_queries_dot |
Counter | Queries over DoT |
dnsdist_queries_doh |
Counter | Queries over DoH |
| Metric | Type | Description |
|---|---|---|
dnsdist_responses_total |
Counter | Total responses sent |
dnsdist_responses_udp |
Counter | Responses over UDP |
dnsdist_responses_tcp |
Counter | Responses over TCP |
dnsdist_servfail_total |
Counter | SERVFAIL responses |
dnsdist_nxdomain_total |
Counter | NXDOMAIN responses |
| Metric | Type | Description |
|---|---|---|
dnsdist_cache_hits_total |
Counter | Cache hits |
dnsdist_cache_misses_total |
Counter | Cache misses |
dnsdist_cache_size |
Gauge | Current cache size |
dnsdist_cache_entries |
Gauge | Number of cache entries |
| Metric | Type | Description |
|---|---|---|
dnsdist_downstream_servers |
Gauge | Backend server status |
dnsdist_downstream_queries_total |
Counter | Queries per backend |
dnsdist_downstream_latency |
Gauge | Backend latency |
| Metric | Type | Description |
|---|---|---|
dnsdist_rate_drops_total |
Counter | Queries dropped by rate limiting |
dnsdist_dynamic_blocks |
Gauge | Active dynamic blocks |
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'dnsdist'
static_configs:
- targets: ['dns1.example.com:8083', 'dns2.example.com:8083']
metrics_path: /metrics
scrape_timeout: 10s
# Connect to console
sudo dnsdist
# Show all statistics
showStats()
# Show backend status
showServers()
# Show active rules
showRules()
# Show dynamic blocks
showDynamicBlocks()
# Show client statistics
showClients()
# Show BPF filters (if enabled)
showBPFStats()
# One-time statistics
dnsdist -e "showStats()"
# Backend status
dnsdist -e "showServers()"
# Rules
dnsdist -e "showRules()"
# Dynamic blocks
dnsdist -e "showDynamicBlocks()"
# Get all statistics
curl -u dnsdist:password \
http://localhost:8083/api/v1/servers/localhost/statistics
# Get specific statistic
curl -u dnsdist:password \
http://localhost:8083/api/v1/servers/localhost/statistics/queries
http://prometheus:9090Save as dnsdist-dashboard.json:
{
"dashboard": {
"title": "dnsdist",
"panels": [
{
"title": "Queries per Second",
"type": "graph",
"targets": [
{
"expr": "rate(dnsdist_queries_total[5m])",
"legendFormat": "QPS"
}
]
},
{
"title": "Cache Hit Rate",
"type": "graph",
"targets": [
{
"expr": "rate(dnsdist_cache_hits_total[5m]) / rate(dnsdist_queries_total[5m]) * 100",
"legendFormat": "Hit Rate %"
}
]
},
{
"title": "Response Codes",
"type": "graph",
"targets": [
{
"expr": "rate(dnsdist_responses_total[5m])",
"legendFormat": "Responses"
},
{
"expr": "rate(dnsdist_servfail_total[5m])",
"legendFormat": "SERVFAIL"
},
{
"expr": "rate(dnsdist_nxdomain_total[5m])",
"legendFormat": "NXDOMAIN"
}
]
},
{
"title": "Backend Servers",
"type": "graph",
"targets": [
{
"expr": "dnsdist_downstream_servers",
"legendFormat": "{{server}}"
}
]
},
{
"title": "Rate Limiting",
"type": "graph",
"targets": [
{
"expr": "rate(dnsdist_rate_drops_total[5m])",
"legendFormat": "Dropped QPS"
},
{
"expr": "dnsdist_dynamic_blocks",
"legendFormat": "Active Blocks"
}
]
},
{
"title": "Protocol Distribution",
"type": "piechart",
"targets": [
{
"expr": "rate(dnsdist_queries_udp[5m])",
"legendFormat": "UDP"
},
{
"expr": "rate(dnsdist_queries_tcp[5m])",
"legendFormat": "TCP"
},
{
"expr": "rate(dnsdist_queries_dot[5m])",
"legendFormat": "DoT"
},
{
"expr": "rate(dnsdist_queries_doh[5m])",
"legendFormat": "DoH"
}
]
}
]
}
}
# Using Grafana API
curl -X POST \
-H "Content-Type: application/json" \
-u admin:admin \
-d @dnsdist-dashboard.json \
http://localhost:3000/api/dashboards/db
# dnsdist-alerts.yml
groups:
- name: dnsdist
rules:
# Service Down
- alert: DNSdistDown
expr: up{job="dnsdist"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "dnsdist instance is down"
description: "{{ $labels.instance }} has been down for more than 1 minute."
# High Query Rate
- alert: DNSdistHighQueryRate
expr: rate(dnsdist_queries_total[1m]) > 100000
for: 5m
labels:
severity: warning
annotations:
summary: "High DNS query rate"
description: "Query rate is {{ $value }} queries/sec"
# High Error Rate
- alert: DNSdistHighErrorRate
expr: rate(dnsdist_servfail_total[5m]) / rate(dnsdist_queries_total[5m]) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "High DNS error rate"
description: "Error rate is {{ $value | humanizePercentage }}"
# Backend Down
- alert: DNSdistBackendDown
expr: dnsdist_downstream_servers{state="down"} > 0
for: 1m
labels:
severity: critical
annotations:
summary: "DNS backend server is down"
description: "Backend {{ $labels.server }} is down"
# Rate Limiting Active
- alert: DNSdistRateLimitingActive
expr: rate(dnsdist_rate_drops_total[1m]) > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "Rate limiting is active"
description: "{{ $value }} queries being dropped per second"
# High Dynamic Blocks
- alert: DNSdistHighDynamicBlocks
expr: dnsdist_dynamic_blocks > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Many dynamic blocks active"
description: "{{ $value }} clients currently blocked"
# Low Cache Hit Rate
- alert: DNSdistLowCacheHitRate
expr: rate(dnsdist_cache_hits_total[5m]) / rate(dnsdist_queries_total[5m]) < 0.3
for: 10m
labels:
severity: warning
annotations:
summary: "Low cache hit rate"
description: "Cache hit rate is {{ $value | humanizePercentage }}"
# alertmanager.yml
global:
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'alertmanager@example.com'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'email-notifications'
routes:
- match:
severity: critical
receiver: 'pagerduty'
- match:
severity: warning
receiver: 'email-notifications'
receivers:
- name: 'email-notifications'
email_configs:
- to: 'dns-team@example.com'
- name: 'pagerduty'
pagerduty_configs:
- service_key: 'your-pagerduty-key'
Lua:
-- Enable query logging (use with caution in production)
addRuleLogging("/var/log/dnsdist/queries.log", true)
-- Log specific queries
addAction(
SuffixRule("log.example.com"),
LogAction("/var/log/dnsdist/specific.log", true, true, true)
)
# View systemd logs
sudo journalctl -u dnsdist -f
# Filter by priority
sudo journalctl -u dnsdist -p err -f
# View recent errors
sudo journalctl -u dnsdist -p err --since "1 hour ago"
Promtail Configuration:
# promtail.yml
positions:
filename: /tmp/positions.yaml
server:
http_listen_port: 9080
scrape_configs:
- job_name: dnsdist
static_configs:
- targets:
- localhost
labels:
job: dnsdist
__path__: /var/log/journal/*/dnsdist.service.log
| Metric | Target | Warning | Critical |
|---|---|---|---|
| Query Latency | < 50ms | > 100ms | > 200ms |
| Cache Hit Rate | > 40% | < 30% | < 10% |
| Error Rate | < 1% | > 5% | > 10% |
| Backend Availability | > 99.9% | < 99% | < 95% |
| Service Uptime | > 99.9% | < 99% | < 95% |
{
"panels": [
{
"title": "Query Latency",
"targets": [
{
"expr": "dnsdist_downstream_latency",
"legendFormat": "{{server}}"
}
]
},
{
"title": "Throughput",
"targets": [
{
"expr": "rate(dnsdist_queries_total[1m])",
"legendFormat": "Queries/sec"
}
]
},
{
"title": "Cache Efficiency",
"targets": [
{
"expr": "rate(dnsdist_cache_hits_total[1m]) / rate(dnsdist_queries_total[1m]) * 100",
"legendFormat": "Hit Rate %"
}
]
}
]
}
# View statistics
dnsdist -e "showStats()"
# Check backend status
dnsdist -e "showServers()"
# View active rules
dnsdist -e "showRules()"
# Test connectivity
curl -v http://localhost:8083/metrics
# Check for specific metrics
curl http://localhost:8083/metrics | grep -E "^dnsdist_queries_total"
# Verify Prometheus scraping
curl http://prometheus:9090/api/v1/targets | jq '.data.activeTargets[] | select(.labels.job=="dnsdist")'
# Check if web interface is running
sudo systemctl status dnsdist
# Check if port is listening
ss -tlnp | grep 8083
# Check firewall
sudo ufw status | grep 8083
No Metrics Available:
# Check if web interface is enabled
grep -r "webserver" /etc/dnsdist/
# Check if port is listening
ss -tlnp | grep 8083
# Check firewall
sudo ufw status
High Latency:
# Check backend latency
dnsdist -e "showServers()"
# Check cache performance
dnsdist -e "showStats()" | grep -i cache
Backend Not Responding:
# Check backend status
dnsdist -e "showServers()"
# View backend statistics
curl -u dnsdist:password \
http://localhost:8083/api/v1/servers/localhost/servers
Questions? Find all contact information on our contact page.