This guide covers monitoring Knot Resolver with Prometheus, Grafana, and other tools.
Knot Resolver provides multiple monitoring options:
Version 6.x:
monitoring:
metrics: true
port: 8453
namespace: "resolver_"
Version 5.x:
modules = { 'stats', 'http' }
http.prometheus.namespace = 'resolver_'
# Access metrics endpoint
curl http://localhost:8453/metrics
# View specific metrics
curl http://localhost:8453/metrics | grep resolver_request
# Continuous monitoring
watch -n 1 'curl -s http://localhost:8453/metrics | grep -E "^resolver_(request|answer)_total"'
| Metric | Type | Description |
|---|---|---|
resolver_request_total |
Counter | Total DNS requests |
resolver_request_udp |
Counter | Requests over UDP |
resolver_request_tcp |
Counter | Requests over TCP |
resolver_request_dot |
Counter | Requests over DNS-over-TLS |
resolver_request_doh |
Counter | Requests over DNS-over-HTTPS |
resolver_request_internal |
Counter | Internal requests (e.g., DNSSEC updates) |
| Metric | Type | Description |
|---|---|---|
resolver_answer_total |
Counter | Total answered queries |
resolver_answer_cached |
Counter | Answers from cache |
resolver_answer_noerror |
Counter | NOERROR responses |
resolver_answer_nxdomain |
Counter | NXDOMAIN responses |
resolver_answer_servfail |
Counter | SERVFAIL responses |
resolver_answer_nodata |
Counter | NOERROR with no data |
| Metric | Type | Description |
|---|---|---|
resolver_latency_bucket |
Histogram | Query latency distribution |
resolver_latency_count |
Counter | Total latency measurements |
resolver_latency_sum_ms |
Counter | Sum of all latencies (ms) |
| Metric | Type | Description |
|---|---|---|
resolver_dnssec_validated |
Counter | DNSSEC validated responses |
resolver_dnssec_invalid |
Counter | DNSSEC validation failures |
resolver_dnssec_bogus |
Counter | Bogus DNSSEC responses |
| Metric | Type | Description |
|---|---|---|
resolver_cache_size |
Gauge | Current cache size (records) |
resolver_cache_bytes |
Gauge | Cache memory usage (bytes) |
resolver_cache_inserts |
Counter | Cache insertions |
resolver_cache_lookups |
Counter | Cache lookups |
resolver_cache_hits |
Counter | Cache hits |
resolver_cache_misses |
Counter | Cache misses |
| Metric | Type | Description |
|---|---|---|
resolver_workers |
Gauge | Number of worker threads |
resolver_uptime |
Counter | Service uptime (seconds) |
resolver_reload_total |
Counter | Configuration reloads |
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'knot-resolver'
static_configs:
- targets: ['dns1.example.com:8453', 'dns2.example.com:8453']
metrics_path: /metrics
scrape_timeout: 10s
Version 6.x:
# All statistics
kresctl stats
# Specific statistics
kresctl stats answer
kresctl stats request
kresctl stats cache
# Most frequent queries
kresctl stats frequent
# Recently contacted upstream servers
kresctl stats upstreams
Version 5.x:
# Connect to console
sudo kresctl
# Statistics commands
> stats() -- All statistics
> stats.list() -- List all metrics as JSON
> stats.list('answer') -- Filter by prefix
> stats.frequent() -- Most frequent queries
> stats.upstreams() -- Upstream server stats
# Get statistics via HTTP API
curl --unix-socket /run/knot-resolver/kres-api.sock \
http://localhost/stats
# Get specific statistics
curl --unix-socket /run/knot-resolver/kres-api.sock \
http://localhost/stats/answer
modules = {
graphite = {
prefix = hostname() .. worker.id,
host = '127.0.0.1',
port = 2003,
interval = 5 * sec,
tcp = false
}
}
InfluxDB Line Protocol:
modules = {
influxdb = {
host = '127.0.0.1',
port = 8086,
database = 'knot_resolver',
interval = 5 * sec
}
}
http://prometheus:9090Save as knot-resolver-dashboard.json:
{
"dashboard": {
"title": "Knot Resolver",
"panels": [
{
"title": "Total Queries",
"type": "graph",
"targets": [
{
"expr": "rate(resolver_request_total[5m])",
"legendFormat": "Queries/sec"
}
]
},
{
"title": "Cache Hit Rate",
"type": "graph",
"targets": [
{
"expr": "rate(resolver_cache_hits[5m]) / rate(resolver_cache_lookups[5m]) * 100",
"legendFormat": "Hit Rate %"
}
]
},
{
"title": "Response Codes",
"type": "graph",
"targets": [
{
"expr": "rate(resolver_answer_noerror[5m])",
"legendFormat": "NOERROR"
},
{
"expr": "rate(resolver_answer_nxdomain[5m])",
"legendFormat": "NXDOMAIN"
},
{
"expr": "rate(resolver_answer_servfail[5m])",
"legendFormat": "SERVFAIL"
}
]
},
{
"title": "Query Latency",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.50, rate(resolver_latency_bucket[5m]))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, rate(resolver_latency_bucket[5m]))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, rate(resolver_latency_bucket[5m]))",
"legendFormat": "p99"
}
]
},
{
"title": "DNSSEC Validation",
"type": "graph",
"targets": [
{
"expr": "rate(resolver_dnssec_validated[5m])",
"legendFormat": "Validated"
},
{
"expr": "rate(resolver_dnssec_invalid[5m])",
"legendFormat": "Invalid"
}
]
},
{
"title": "Cache Size",
"type": "graph",
"targets": [
{
"expr": "resolver_cache_size",
"legendFormat": "Records"
}
]
}
]
}
}
# Using Grafana API
curl -X POST \
-H "Content-Type: application/json" \
-u admin:admin \
-d @knot-resolver-dashboard.json \
http://localhost:3000/api/dashboards/db
# knot-resolver-alerts.yml
groups:
- name: knot-resolver
rules:
# Service Down
- alert: KnotResolverDown
expr: up{job="knot-resolver"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Knot Resolver instance is down"
description: "{{ $labels.instance }} has been down for more than 1 minute."
# High Query Rate
- alert: KnotResolverHighQueryRate
expr: rate(resolver_request_total[1m]) > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "High DNS query rate"
description: "Query rate is {{ $value }} queries/sec"
# High Error Rate
- alert: KnotResolverHighErrorRate
expr: rate(resolver_answer_servfail[5m]) / rate(resolver_request_total[5m]) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "High DNS error rate"
description: "Error rate is {{ $value | humanizePercentage }}"
# DNSSEC Validation Failures
- alert: KnotResolverDNSSECFailures
expr: rate(resolver_dnssec_invalid[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "DNSSEC validation failures detected"
description: "{{ $value }} DNSSEC failures per second"
# Low Cache Hit Rate
- alert: KnotResolverLowCacheHitRate
expr: rate(resolver_cache_hits[5m]) / rate(resolver_cache_lookups[5m]) < 0.3
for: 10m
labels:
severity: warning
annotations:
summary: "Low cache hit rate"
description: "Cache hit rate is {{ $value | humanizePercentage }}"
# High Latency
- alert: KnotResolverHighLatency
expr: histogram_quantile(0.95, rate(resolver_latency_bucket[5m])) > 500
for: 5m
labels:
severity: warning
annotations:
summary: "High query latency"
description: "95th percentile latency is {{ $value }}ms"
# Rate Limiting Active
- alert: KnotResolverRateLimiting
expr: rate(resolver_rate_limited_total[1m]) > 100
for: 5m
labels:
severity: info
annotations:
summary: "Rate limiting is active"
description: "{{ $value }} queries being rate limited per second"
# Cache Near Capacity
- alert: KnotResolverCacheNearFull
expr: resolver_cache_size / resolver_cache_max_size > 0.9
for: 10m
labels:
severity: warning
annotations:
summary: "Cache is near capacity"
description: "Cache is {{ $value | humanizePercentage }} full"
# alertmanager.yml
global:
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'alertmanager@example.com'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'email-notifications'
routes:
- match:
severity: critical
receiver: 'pagerduty'
- match:
severity: warning
receiver: 'email-notifications'
receivers:
- name: 'email-notifications'
email_configs:
- to: 'dns-team@example.com'
- name: 'pagerduty'
pagerduty_configs:
- service_key: 'your-pagerduty-key'
Version 6.x:
logging:
level: "notice" # crit, err, warning, notice, info, debug
Version 5.x:
log_level = 'notice'
# View logs in real-time
sudo journalctl -u knot-resolver -f
# Filter by priority
sudo journalctl -u knot-resolver -p err -f
# View recent errors
sudo journalctl -u knot-resolver -p err --since "1 hour ago"
# Export logs for analysis
sudo journalctl -u knot-resolver --since "2026-01-01" > knot-resolver.log
Promtail Configuration (for Loki):
# promtail.yml
positions:
filename: /tmp/positions.yaml
server:
http_listen_port: 9080
scrape_configs:
- job_name: knot-resolver
static_configs:
- targets:
- localhost
labels:
job: knot-resolver
__path__: /var/log/journal/*/knot-resolver.service.log
pipeline_stages:
- regex:
expression: '^(?P<timestamp>.*?) (?P<level>\w+): (?P<message>.*)'
- labels:
- level
# Count errors per hour
sum(rate({job="knot-resolver", level="err"}[1h])) by (level)
# Find DNSSEC validation failures
{job="knot-resolver"} |= "DNSSEC validation failed"
# Track configuration reloads
{job="knot-resolver"} |= "configuration reloaded"
| Metric | Target | Warning | Critical |
|---|---|---|---|
| Query Latency (p95) | < 100ms | > 200ms | > 500ms |
| Cache Hit Rate | > 50% | < 30% | < 10% |
| Error Rate | < 1% | > 5% | > 10% |
| DNSSEC Validation Rate | > 99% | < 95% | < 90% |
| Service Uptime | > 99.9% | < 99% | < 95% |
{
"panels": [
{
"title": "Query Performance",
"targets": [
{
"expr": "histogram_quantile(0.50, rate(resolver_latency_bucket[5m]))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, rate(resolver_latency_bucket[5m]))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, rate(resolver_latency_bucket[5m]))",
"legendFormat": "p99"
}
]
},
{
"title": "Throughput",
"targets": [
{
"expr": "rate(resolver_request_total[1m])",
"legendFormat": "Queries/sec"
}
]
},
{
"title": "Cache Efficiency",
"targets": [
{
"expr": "rate(resolver_cache_hits[1m]) / rate(resolver_cache_lookups[1m]) * 100",
"legendFormat": "Hit Rate %"
}
]
}
]
}
# Monitor memory usage
kresctl stats | grep cache
# Monitor with system tools
top -p $(pgrep -d',' -f knot-resolver)
# Docker container stats
docker stats knot-resolver
# Version 6.x
kresctl status
kresctl stats
# Version 5.x
sudo kresctl
> stats()
# Test connectivity
curl -v http://localhost:8453/metrics
# Check for specific metrics
curl http://localhost:8453/metrics | grep -E "^resolver_request_total"
# Verify Prometheus scraping
curl http://prometheus:9090/api/v1/targets | jq '.data.activeTargets[] | select(.labels.job=="knot-resolver")'
# Check if stats module is loaded
kresctl stats # Should return statistics
# Version 5.x - check modules
sudo kresctl
> modules.list()
# Restart service if needed
sudo systemctl restart knot-resolver
No Metrics Available:
# Check if monitoring is enabled
grep -A 3 "monitoring:" /etc/knot-resolver/config.yaml
# Check if port is listening
ss -tlnp | grep 8453
# Check firewall
sudo ufw status | grep 8453
High Latency:
# Check upstream latency
kresctl stats upstreams
# Check cache performance
kresctl cache stats
# Review query patterns
kresctl stats frequent
Cache Issues:
# Check cache size
kresctl cache stats
# Clear cache if needed
kresctl cache clear
# Monitor cache hit rate
curl http://localhost:8453/metrics | grep resolver_cache
Questions? Find all contact information on our contact page.