Scrutiny is a web-based hard drive health monitoring tool that uses SMART data to predict drive failures. It provides a beautiful interface for monitoring drive health across multiple systems.
| File/Directory | Path | Purpose |
|---|---|---|
| Main config | /opt/scrutiny/config/scrutiny.yaml |
Main configuration |
| Database | /opt/scrutiny/data/scrutiny.db |
SQLite database |
| Web files | /opt/scrutiny/web/ |
Web interface files |
| Logs | /opt/scrutiny/logs/ |
Log files |
| Collector config | /opt/scrutiny/config/collector.yaml |
Collector settings |
| Systemd service | /etc/systemd/system/scrutiny.service |
Service definition |
| Docker config | docker-compose.yml |
Docker configuration |
# /opt/scrutiny/config/scrutiny.yaml
# ========================
# Web Interface Settings
# ========================
web:
# Listening address
host: 0.0.0.0
# Port
port: 8080
# Base path (for reverse proxy)
base_path: ""
# Enable HTTPS
ssl:
enabled: false
cert_file: /opt/scrutiny/config/ssl/cert.pem
key_file: /opt/scrutiny/config/ssl/key.pem
# Authentication
auth:
enabled: false
username: admin
password: AdminPassword123!
# Session settings
session:
lifetime: 86400 # 24 hours
secure: true
httponly: true
# ========================
# Database Settings
# ========================
database:
# Database type (sqlite, postgres, mysql)
type: sqlite
# SQLite configuration
sqlite:
path: /opt/scrutiny/data/scrutiny.db
# PostgreSQL configuration
postgres:
host: localhost
port: 5432
database: scrutiny
username: scrutiny
password: SecureDbPassword123!
ssl_mode: disable
# Connection pool
pool:
max_open_conns: 10
max_idle_conns: 5
conn_max_lifetime: 3600
# ========================
# Collector Settings
# ========================
collector:
# Enable collector
enabled: true
# Collection interval (hours)
interval: 24
# SMARTCTL path
smartctl_path: /usr/sbin/smartctl
# Devices to monitor
devices:
# Auto-detect all devices
- type: auto
# Or specify specific devices
# - device: /dev/sda
# type: scsi
# - device: /dev/sdb
# type: nvme
# - device: /dev/sdc
# type: sat
# Device ignore list
ignore:
- /dev/sr0 # Ignore CD/DVD drives
- /dev/sg* # Ignore generic SCSI
# SMART options
smart_options:
# Run long test
run_long_test: false
# Run short test
run_short_test: true
# Collect attributes
collect_attributes: true
# Collect error logs
collect_error_logs: true
# Collect self-test logs
collect_self_test_logs: true
# ========================
# Alert Settings
# ========================
alerts:
# Enable alerts
enabled: true
# Alert check interval (hours)
check_interval: 1
# Notification methods
notifications:
# Email notifications
email:
enabled: false
smtp_host: smtp.example.com
smtp_port: 587
smtp_user: scrutiny@example.com
smtp_password: EmailPassword123!
use_tls: true
from_address: scrutiny@example.com
from_name: Scrutiny
recipients:
- admin@example.com
# Slack notifications
slack:
enabled: false
webhook_url: https://hooks.slack.com/services/XXX/YYY/ZZZ
channel: "#alerts"
username: Scrutiny
# Discord notifications
discord:
enabled: false
webhook_url: https://discord.com/api/webhooks/XXX/YYY
# Telegram notifications
telegram:
enabled: false
bot_token: YOUR_BOT_TOKEN
chat_id: YOUR_CHAT_ID
# Webhook notifications
webhook:
enabled: false
url: https://api.example.com/alerts
method: POST
headers:
Content-Type: application/json
Authorization: Bearer YOUR_TOKEN
# Alert thresholds
thresholds:
# Critical SMART attributes
critical_attributes:
- 5 # Reallocated_Sector_Ct
- 187 # Reported_Uncorrect
- 188 # Command_Timeout
- 196 # Reallocated_Event_Count
- 197 # Current_Pending_Sector
- 198 # Offline_Uncorrectable
# Warning thresholds
warning_thresholds:
temperature: 50 # Celsius
power_on_hours: 35000 # Hours
spin_retry_count: 5
reallocated_sector_count: 10
# Critical thresholds
critical_thresholds:
temperature: 60 # Celsius
power_on_hours: 50000 # Hours
spin_retry_count: 20
reallocated_sector_count: 100
# ========================
# Metrics Settings
# ========================
metrics:
# Enable Prometheus metrics
prometheus:
enabled: false
port: 9090
path: /metrics
# Metrics to collect
collect:
- smart_status
- temperature
- power_on_hours
- spin_up_time
- spin_retry_count
- reallocated_sector_count
- seek_error_rate
- power_cycle_count
- g_sense_error_rate
- head_flying_hours
- host_reads
- host_writes
- read_error_rate
- write_error_rate
# ========================
# Logging Settings
# ========================
logging:
# Log level (debug, info, warn, error)
level: info
# Log format (json, text)
format: text
# Log output
output:
- file
- stdout
# File logging
file:
path: /opt/scrutiny/logs/scrutiny.log
max_size: 50 # MB
max_backups: 5
max_age: 30 # days
compress: true
# ========================
# Performance Settings
# ========================
performance:
# Worker threads
workers: 4
# Collection concurrency
collection_concurrency: 2
# Cache settings
cache:
enabled: true
ttl: 3600 # 1 hour
# ========================
# API Settings
# ========================
api:
# Enable API
enabled: true
# API version
version: v1
# API endpoint prefix
prefix: /api
# API authentication
auth:
enabled: false
api_key: your-api-key-here
# Rate limiting
rate_limit:
enabled: true
requests_per_minute: 60
# ========================
# Multi-Host Support
# ========================
hosts:
# Enable multi-host mode
enabled: false
# Host definitions
definitions:
- name: server1
address: 192.168.1.10
port: 8080
api_key: server1-api-key
enabled: true
- name: server2
address: 192.168.1.20
port: 8080
api_key: server2-api-key
enabled: true
# ========================
# Customization
# ========================
customization:
# Custom title
title: Scrutiny Drive Health
# Custom logo
logo:
enabled: false
url: /assets/logo.png
# Custom CSS
custom_css: ""
# Footer text
footer: Scrutiny Monitoring
# docker-compose.yml
version: '3.8'
services:
scrutiny:
image: ghcr.io/analogj/scrutiny:latest
container_name: scrutiny
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- ./scrutiny/config:/opt/scrutiny/config
- ./scrutiny/data:/opt/scrutiny/data
- ./scrutiny/logs:/opt/scrutiny/logs
environment:
- SCRUTINY_WEB_HOST=0.0.0.0
- SCRUTINY_WEB_PORT=8080
- SCRUTINY_DB_PATH=/opt/scrutiny/data/scrutiny.db
- SCRUTINY_LOG_LEVEL=info
devices:
- /dev/sda:/dev/sda
- /dev/sdb:/dev/sdb
cap_add:
- SYS_RAWIO
- SYS_ADMIN
security_opt:
- apparmor:unconfined
networks:
- monitoring
# Collector (optional - run on each host)
scrutiny-collector:
image: ghcr.io/analogj/scrutiny:latest
container_name: scrutiny-collector
restart: unless-stopped
command: collector run
volumes:
- ./scrutiny/config:/opt/scrutiny/config
environment:
- SCRUTINY_COLLECTOR_API_ENDPOINT=http://scrutiny:8080
devices:
- /dev/sda:/dev/sda
cap_add:
- SYS_RAWIO
- SYS_ADMIN
depends_on:
- scrutiny
networks:
- monitoring
networks:
monitoring:
external: true
# SMARTCTL options in collector.yaml
collector:
smartctl:
path: /usr/sbin/smartctl
# Default options
default_options:
- "-i" # Print drive info
- "-H" # Check drive health
- "-A" # Print drive attributes
- "-l" # Print drive logs
- "error" # Error log
- "-l" # Print drive logs
- "selftest" # Self-test log
# Device-specific options
device_options:
nvme:
- "-d"
- "nvme"
scsi:
- "-d"
- "scsi"
sat:
- "-d"
- "sat"
# Timeout settings
timeout: 60 # seconds
# Alert rules configuration
alerts:
rules:
- name: Drive Failing
condition: "smart_status == 'FAILED'"
severity: critical
notifications: [email, slack, discord]
- name: High Temperature
condition: "temperature > 55"
severity: warning
notifications: [email]
- name: Reallocated Sectors
condition: "reallocated_sector_count > 50"
severity: warning
notifications: [email, slack]
- name: Pending Sectors
condition: "current_pending_sector > 10"
severity: warning
notifications: [email]
- name: Uncorrectable Errors
condition: "offline_uncorrectable > 0"
severity: critical
notifications: [email, slack, discord]
# Email notification configuration
notifications:
email:
enabled: true
smtp_host: smtp.example.com
smtp_port: 587
smtp_user: scrutiny@example.com
smtp_password: EmailPassword123!
use_tls: true
from_address: scrutiny@example.com
from_name: Scrutiny Drive Monitor
recipients:
- admin@example.com
- storage-team@example.com
subject_template: "[Scrutiny] {{severity}}: {{drive_name}} on {{host}}"
body_template: |
Scrutiny Drive Alert
Host: {{host}}
Drive: {{drive_name}}
Model: {{drive_model}}
Serial: {{drive_serial}}
Status: {{smart_status}}
Temperature: {{temperature}}°C
Power On Hours: {{power_on_hours}}
Critical Attributes:
{{#each critical_attributes}}
- {{name}}: {{value}} (Threshold: {{threshold}})
{{/each}}
Please investigate.
--
Scrutiny Drive Health Monitor
# Slack notification configuration
notifications:
slack:
enabled: true
webhook_url: https://hooks.slack.com/services/XXX/YYY/ZZZ
channel: "#storage-alerts"
username: Scrutiny
icon_emoji: ":hard_drive:"
template:
attachments:
- color: "{{severity_color}}"
title: "Scrutiny Alert: {{severity}}"
fields:
- title: Host
value: "{{host}}"
short: true
- title: Drive
value: "{{drive_name}}"
short: true
- title: Model
value: "{{drive_model}}"
short: true
- title: Status
value: "{{smart_status}}"
short: true
- title: Temperature
value: "{{temperature}}°C"
short: true
- title: Power On Hours
value: "{{power_on_hours}}"
short: true
footer: Scrutiny Drive Health
ts: "{{timestamp_unix}}"
# Validate configuration
scrutiny config validate
# Check collector configuration
scrutiny collector validate
# Test SMARTCTL access
sudo smartctl -i /dev/sda
# Test database connection
sqlite3 /opt/scrutiny/data/scrutiny.db "SELECT 1;"
# Restart Scrutiny
sudo systemctl restart scrutiny
# Check status
sudo systemctl status scrutiny
# View logs
sudo journalctl -u scrutiny -f
sudo tail -f /opt/scrutiny/logs/scrutiny.log
# Run collector manually
scrutiny collector run
# Run with verbose output
scrutiny collector run --verbose
# Collect specific device
scrutiny collector run --device /dev/sda
# Check service status
sudo systemctl status scrutiny
# Check if listening
sudo netstat -tlnp | grep 8080
# Access web interface
curl http://localhost:8080
# Check API
curl http://localhost:8080/api/health
# Get device list
curl http://localhost:8080/api/devices
# Run manual collection
scrutiny collector run
# Check collected data
sqlite3 /opt/scrutiny/data/scrutiny.db "SELECT * FROM devices;"
# View SMART data
sudo smartctl -a /dev/sda
Every deployment is unique. We provide consulting for:
Get personalized assistance: office@linux-server-admin.com | Contact Page