Netdata is a distributed, real-time performance and health monitoring system for servers, containers, and IoT devices. It provides per-second granularity data with minimal overhead.
| File/Directory | Path | Purpose |
|---|---|---|
| Main config | /etc/netdata/netdata.conf |
Core Netdata configuration |
| Health config | /etc/netdata/health/health.conf |
Alerting configuration |
| Health templates | /etc/netdata/health/ |
Health alert templates |
| Collectors | /etc/netdata/go.d/ |
Go.d plugin configurations |
| Python.d | /etc/netdata/python.d/ |
Python plugin configurations |
| Charts.d | /etc/netdata/charts.d/ |
Shell plugin configurations |
| Stream config | /etc/netdata/stream.conf |
Parent/child streaming |
| Alarm templates | /var/lib/netdata/health.d/ |
Alarm notification templates |
| Custom plugins | /usr/libexec/netdata/python.d/ |
Custom Python plugins |
| Custom go.d | /usr/libexec/netdata/go.d/ |
Custom Go plugins |
| Logs | /var/log/netdata/ |
Netdata log files |
| Cache | /var/cache/netdata/ |
RRD and cache files |
| Registry | /var/lib/netdata/registry/ |
Agent registry data |
# /etc/netdata/netdata.conf
[global]
# Hostname (defaults to system hostname)
hostname = my-server.example.com
# Run as user
run as user = netdata
# Debug mode (disable in production)
debug = no
# Error logs
error log = syslog
# Access logs
access log = /var/log/netdata/access.log
# Log level
log level = info
# Log facility (for syslog)
log facility = daemon
[web]
# Web server configuration
enabled = yes
bind to = 0.0.0.0
port = 19999
bind sockets to IP = 0.0.0.0
# SSL/TLS configuration
ssl key = /etc/netdata/ssl/netdata-key.pem
ssl certificate = /etc/netdata/ssl/netdata.pem
# Web files
web files owner = root
web files group = netdata
# Default web refresh time
default refresh timed ms = 1000
# Web server threads
web server threads = 5
# Compression
web compression = gzip
# Cache
web cache duration = 60
# Authentication
web auth type = basic
web auth realm = NetData Server Access
# Allow specific origins (CORS)
web allow origins from = *
# X-Frame-Options
web x-frame-options = DENY
[health]
# Health monitoring
enabled = yes
# Health configuration directory
health configuration path = /etc/netdata/health
# Health variables path
health variables path = /var/cache/netdata/health
# Alarm log
alarm log = /var/log/netdata/alarm.log
# Alarm notification scripts
alarm notification script = /usr/libexec/netdata/plugins/alarm-notify.sh
# Silence notifications (for maintenance)
# silent = yes
[ml]
# Machine Learning configuration
enabled = yes
# Training duration
train duration = 3600
# Predictions per training
predictions per training = 3600
# Detection ratio
detection ratio = 0.5
# Anomaly rate
anomaly rate = 0.01
[plugins]
# Enable/disable plugin categories
python.d = yes
charts.d = yes
go.d = yes
tc-qos-helper = yes
apps = yes
cgroups = yes
proc = yes
idlejitter = yes
slabinfo = yes
checks = yes
# Plugin paths
python.d config directory = /etc/netdata/python.d
charts.d config directory = /etc/netdata/charts.d
go.d config directory = /etc/netdata/go.d
[collectors]
# Collection frequency (in milliseconds)
update every = 1
# Collectors priority
python.d priority = 500
charts.d priority = 500
go.d priority = 500
[db]
# Database configuration
mode = dbengine
dbengine multihost disk space = 256
# Page cache size (in MB)
page cache size = 64
# Extent size (in pages)
dbengine extent size = 1024
# Retention (in seconds)
# 86400 = 1 day, 604800 = 1 week
retention = 86400
# DB files path
db files path = /var/cache/netdata/dbengine
# Memory mode
memory mode = dbengine
[backend]
# Backend data export (Prometheus, InfluxDB, etc.)
enabled = no
type = prometheus_remote_write
destination = http://prometheus:9090/api/v1/write
prefix = netdata
buffer on failures = 10
timeout ms = 10000
# InfluxDB backend
# type = influxdb
# destination = http://influxdb:8086/write?db=netdata
# Graphite backend
# type = graphite
# destination = localhost:2003
[stream]
# Streaming configuration (parent/child)
enabled = yes
# API key for authentication
api key = YOUR_STREAM_API_KEY
# Buffer size
buffer size bytes = 10485760
# Connect timeout
connect timeout seconds = 60
# Retry timeout
retry timeout seconds = 60
[registry]
# Agent registry
enabled = yes
registry to announce = yes
registry domain = https://registry.my-netdata.io
registry hostname = my-server.example.com
[cloud]
# Netdata Cloud connection
enabled = yes
# token = YOUR_CLOUD_TOKEN
[logs]
# Log management
preserve log days = 7
max log size = 104857600
# /etc/netdata/health/health.conf
[global]
# Global health settings
enabled = yes
run on health = yes
health configuration path = /etc/netdata/health
health variables path = /var/cache/netdata/health
alarm log = /var/log/netdata/alarm.log
alarm notification script = /usr/libexec/netdata/plugins/alarm-notify.sh
# Email notifications
[notification.email]
command = "/usr/libexec/netdata/plugins/alarm-notify.sh email"
recipient = admin@example.com
sender = netdata@example.com
smtp_server = smtp.example.com
smtp_port = 587
smtp_user = netdata@example.com
smtp_pass = EmailPassword123!
smtp_tls = yes
# Slack notifications
[notification.slack]
command = "/usr/libexec/netdata/plugins/alarm-notify.sh slack"
webhook_url = https://hooks.slack.com/services/XXX/YYY/ZZZ
channel = #alerts
username = Netdata
# PagerDuty notifications
[notification.pagerduty]
command = "/usr/libexec/netdata/plugins/alarm-notify.sh pagerduty"
service_key = YOUR_PAGERDUTY_KEY
# Microsoft Teams notifications
[notification.teams]
command = "/usr/libexec/netdata/plugins/alarm-notify.sh teams"
webhook_url = https://outlook.office.com/webhook/XXX/YYY/ZZZ
# Custom webhook
[notification.webhook]
command = "/usr/local/bin/netdata-webhook.sh"
url = https://api.example.com/alerts
method = POST
content_type = application/json
# /etc/netdata/health/cpu.conf
# CPU usage alerts
alarm: cpu_usage_warning
on: system.cpu
lookup: average -1m unaligned of user
every: 10s
warn: $this > 70
crit: $this > 85
info: CPU user utilization is above 70%
to: admin@example.com
alarm: cpu_usage_critical
on: system.cpu
lookup: average -1m unaligned of user
every: 10s
crit: $this > 90
info: CPU user utilization is above 90%
to: admin@example.com
exec: /usr/libexec/netdata/plugins/alarm-notify.sh
# CPU iowait alerts
alarm: cpu_iowait_warning
on: system.cpu
lookup: average -1m unaligned of iowait
every: 10s
warn: $this > 20
crit: $this > 40
info: CPU iowait is elevated
to: admin@example.com
# /etc/netdata/health/memory.conf
# Memory usage alerts
alarm: ram_usage_warning
on: system.ram
lookup: average -1m unaligned of used
every: 10s
warn: $this > 75
crit: $this > 90
info: RAM utilization is above 75%
to: admin@example.com
alarm: ram_usage_critical
on: system.ram
lookup: average -1m unaligned of used
every: 10s
crit: $this > 95
info: RAM utilization is above 95%
to: admin@example.com
exec: /usr/libexec/netdata/plugins/alarm-notify.sh
# Swap usage alerts
alarm: swap_usage_warning
on: mem.swap
lookup: average -1m unaligned of used
every: 10s
warn: $this > 50
crit: $this > 80
info: Swap usage is above 50%
to: admin@example.com
# /etc/netdata/health/disk.conf
# Disk space alerts
alarm: disk_space_warning
on: disk_space._
lookup: average -1m unaligned of used
every: 30s
warn: $this > 80
crit: $this > 90
info: Disk space is above 80%
to: admin@example.com
alarm: disk_space_critical
on: disk_space._
lookup: average -1m unaligned of used
every: 30s
crit: $this > 95
info: Disk space is above 95%
to: admin@example.com
exec: /usr/libexec/netdata/plugins/alarm-notify.sh
# Disk I/O alerts
alarm: disk_io_warning
on: disk_io._
lookup: average -1m unaligned of reads
every: 10s
warn: $this > 100000
info: High disk read activity
to: admin@example.com
# /etc/netdata/health/network.conf
# Network bandwidth alerts
alarm: net_bandwidth_warning
on: net.eth0
lookup: average -1m unaligned of received
every: 10s
warn: $this > 100000000
info: High network receive bandwidth
to: admin@example.com
# Network errors alerts
alarm: net_errors_warning
on: net_errors.eth0
lookup: average -1m unaligned of in
every: 10s
warn: $this > 10
crit: $this > 100
info: Network receive errors detected
to: admin@example.com
# /etc/netdata/go.d/go.d.conf
# Global settings
update_every: 1
autodetection_retry: 0
# Modules to load
modules:
- name: weblog
priority: 60000
- name: nginx
priority: 60000
- name: mysql
priority: 60000
- name: postgres
priority: 60000
- name: redis
priority: 60000
- name: mongodb
priority: 60000
- name: docker
priority: 60000
- name: k8s_state
priority: 60000
- name: prometheus
priority: 60000
- name: httpcheck
priority: 60000
- name: ping
priority: 60000
- name: sshcheck
priority: 60000
- name: tcpcheck
priority: 60000
# /etc/netdata/go.d/nginx.conf
jobs:
- name: local_nginx
url: http://localhost/stub_status
timeout: 2
update_every: 5
- name: remote_nginx
url: http://192.168.1.10/stub_status
timeout: 2
update_every: 5
# /etc/netdata/go.d/mysql.conf
jobs:
- name: local_mysql
dsn: netdata@unix(/var/run/mysqld/mysqld.sock)/
update_every: 5
- name: remote_mysql
dsn: monitoring:Password123@tcp(192.168.1.20:3306)/
update_every: 5
collect:
- global_status
- global_variables
- slave_status
- processlist
- innodb_metrics
# /etc/netdata/go.d/redis.conf
jobs:
- name: local_redis
address: unix:///var/run/redis/redis.sock
update_every: 5
- name: remote_redis
address: 192.168.1.30:6379
password: RedisPassword123!
update_every: 5
# /etc/netdata/go.d/docker.conf
jobs:
- name: local_docker
socket: unix:///var/run/docker.sock
update_every: 5
collect_containers: true
collect_volumes: true
collect_networks: true
# /etc/netdata/go.d/httpcheck.conf
jobs:
- name: website_main
url: https://example.com
timeout: 5
update_every: 30
status_accepted:
- 200
- 201
- 204
- 301
- 302
- name: api_health
url: https://api.example.com/health
timeout: 5
update_every: 30
status_accepted:
- 200
response_expected:
- '"status":"ok"'
- name: ssl_expiry
url: https://example.com
timeout: 5
update_every: 3600
check_ssl: true
ssl_expire_days_warn: 30
ssl_expire_days_crit: 7
# /etc/netdata/go.d/prometheus.conf
jobs:
- name: prometheus_local
url: http://localhost:9090/metrics
timeout: 5
update_every: 15
- name: node_exporter
url: http://localhost:9100/metrics
timeout: 5
update_every: 5
- name: cadvisor
url: http://localhost:8080/metrics
timeout: 5
update_every: 5
# /etc/netdata/python.d/python.d.conf
# Global settings
update_every: 1
autodetection_retry: 0
# Modules
python_modules:
- name: web_log
priority: 60000
- name: nginx
priority: 60000
- name: apache
priority: 60000
- name: mysql
priority: 60000
- name: postgres
priority: 60000
- name: redis
priority: 60000
- name: mongodb
priority: 60000
- name: docker
priority: 60000
- name: rabbitmq
priority: 60000
- name: elasticsearch
priority: 60000
- name: haproxy
priority: 60000
- name: squidlog
priority: 60000
- name: fail2ban
priority: 60000
# /etc/netdata/python.d/web_log.conf
local_nginx_access:
name: local_nginx
path: /var/log/nginx/access.log
type: nginx
local_nginx_error:
name: local_nginx_error
path: /var/log/nginx/error.log
type: nginx_error
local_apache_access:
name: local_apache
path: /var/log/apache2/access.log
type: apache
remote_apache:
name: remote_apache
path: /var/log/httpd/access_log
type: apache
# /etc/netdata/python.d/fail2ban.conf
local:
socket: /var/run/fail2ban/fail2ban.sock
jails:
- sshd
- nginx-http-auth
- apache-auth
# /etc/netdata/stream.conf
[11111111-2222-3333-4444-555555555555]
# Child API key
enabled = yes
default history = 3600
default memory mode = dbengine
health enabled by default = auto
allow streaming from = *
# /etc/netdata/stream.conf (on child node)
[stream]
enabled = yes
destination = parent.example.com:19999
api key = 11111111-2222-3333-4444-555555555555
buffer size bytes = 10485760
connect timeout seconds = 60
retry timeout seconds = 60
#!/bin/bash
# /usr/local/bin/netdata-custom-alert.sh
# Custom alert handler for Netdata
ALERT_STATUS="$1"
ALERT_NAME="$2"
ALERT_INFO="$3"
ALERT_VALUE="$4"
ALERT_UNIT="$5"
ALERT_FAMILY="$6"
ALERT_CONTEXT="$7"
ALERT_CHART="$8"
ALERT_HOST="$9"
WEBHOOK_URL="https://hooks.slack.com/services/XXX/YYY/ZZZ"
# Determine color based on status
case "$ALERT_STATUS" in
"CRITICAL")
COLOR="danger"
EMOJI=":rotating_light:"
;;
"WARNING")
COLOR="warning"
EMOJI=":warning:"
;;
"CLEAR")
COLOR="good"
EMOJI=":white_check_mark:"
;;
*)
COLOR="#808080"
EMOJI=":information_source:"
;;
esac
# Create payload
PAYLOAD=$(cat <<EOF
{
"attachments": [
{
"color": "$COLOR",
"title": "$EMOJI Netdata Alert: $ALERT_NAME",
"fields": [
{"title": "Status", "value": "$ALERT_STATUS", "short": true},
{"title": "Host", "value": "$ALERT_HOST", "short": true},
{"title": "Chart", "value": "$ALERT_CHART", "short": true},
{"title": "Value", "value": "$ALERT_VALUE $ALERT_UNIT", "short": true},
{"title": "Info", "value": "$ALERT_INFO", "short": false}
],
"footer": "Netdata Monitoring",
"ts": $(date +%s)
}
]
}
EOF
)
curl -X POST -H 'Content-type: application/json' --data "$PAYLOAD" "$WEBHOOK_URL"
#!/bin/bash
# /usr/libexec/netdata/plugins/alarm-notify.sh (email section)
send_email() {
local status="$1"
local name="$2"
local info="$3"
local value="$4"
local unit="$5"
local family="$6"
local context="$7"
local chart="$8"
local hostname="$9"
local recipient="${10}"
local subject="[$status] Netdata Alert: $name on $hostname"
local body="Netdata Alert Notification
Status: $status
Alert: $name
Host: $hostname
Chart: $chart
Family: $family
Context: $context
Value: $value $unit
Info: $info
Time: $(date)
--
Netdata Monitoring System
"
echo "$body" | mail -s "$subject" "$recipient"
}
# Check configuration syntax
sudo netdata -t
# Test with debug output
sudo netdata -W set global "debug" "yes" -t
# Check health configuration
sudo /usr/libexec/netdata/plugins/health -t
# Verify go.d plugins
sudo /usr/libexec/netdata/plugins/go.d -t -m all
# Verify python.d plugins
sudo /usr/libexec/netdata/plugins/python.d -t
# Restart Netdata
sudo systemctl restart netdata
# Check service status
sudo systemctl status netdata
# View logs
sudo journalctl -u netdata -f
sudo tail -f /var/log/netdata/error.log
# Reload configuration (without restart)
sudo killall -HUP netdata
# Enable a go.d module
sudo ln -s /etc/netdata/go.d/nginx.conf /etc/netdata/go.d/nginx.conf.enabled
# Disable a go.d module
sudo rm /etc/netdata/go.d/nginx.conf.enabled
# Enable a python.d module
sudo ln -s /etc/netdata/python.d/web_log.conf /etc/netdata/python.d/web_log.conf.enabled
# List available modules
ls /etc/netdata/go.d/
ls /etc/netdata/python.d/
# Check Netdata status
sudo systemctl status netdata
# Check if Netdata is listening
sudo netstat -tlnp | grep 19999
# Check running processes
ps aux | grep netdata
# Access web interface
curl http://localhost:19999
# Check API endpoint
curl http://localhost:19999/api/v1/info
# List all charts
curl http://localhost:19999/api/v1/charts
# Get metrics
curl http://localhost:19999/api/v1/allmetrics
# List running collectors
sudo /usr/libexec/netdata/plugins/go.d -d
# Check collector status via API
curl http://localhost:19999/api/v1/functions?context=collectors
# View collector logs
sudo tail -f /var/log/netdata/go.d.log
sudo tail -f /var/log/netdata/python.d.log
# Check active alarms
curl http://localhost:19999/api/v1/alarms
# Check alarm log
sudo tail -f /var/log/netdata/alarm.log
# Test notification
sudo /usr/libexec/netdata/plugins/alarm-notify.sh test
# Check streaming status (parent)
curl http://localhost:19999/api/v1/info | jq '.children'
# Check streaming status (child)
sudo tail -f /var/log/netdata/error.log | grep stream
Squeezing every bit of performance from your Netdata installation? Our experts help with:
Optimize your setup: office@linux-server-admin.com | Contact Us