Icinga
Icinga is a modern, open-source monitoring framework built on Nagios principles with distributed zones, REST API, and advanced features.
Installation
Ubuntu/Debian
# Add repository
curl https://packages.icinga.com/icinga.key | sudo apt-key add -
echo "deb https://packages.icinga.com/ubuntu icinga-focal main" | \
sudo tee /etc/apt/sources.list.d/icinga.list
# Install Icinga2
sudo apt update
sudo apt install icinga2
# Enable features
sudo icinga2 feature enable api
sudo icinga2 feature enable notification
sudo icinga2 feature enable graphite
# Start service
sudo systemctl start icinga2
sudo systemctl enable icinga2
# Verify
sudo icinga2 --version
Distributed Setup (Master/Satellite)
# Master node configuration
/etc/icinga2/icinga2.conf:
include "zones.conf"
# /etc/icinga2/zones.conf
object Zone "master" {
endpoints = ["master.example.com"]
}
object Endpoint "master.example.com" {
host = "192.168.1.10"
port = 5665
}
object Zone "satellite-1" {
endpoints = ["satellite-1.example.com"]
parent = "master"
}
object Endpoint "satellite-1.example.com" {
host = "192.168.1.20"
port = 5665
}
Configuration
Host Definitions
# /etc/icinga2/conf.d/hosts.conf
object Host "web-server-01" {
import "generic-host"
address = "192.168.1.10"
vars.os = "Linux"
vars.location = "Datacenter A"
groups = ["webservers"]
}
define Host "database-01" {
import "generic-host"
address = "192.168.1.20"
vars.db_type = "mysql"
vars.backup_enabled = true
}
Service Definitions
# Services
object Service "ping4" {
host_name = "web-server-01"
check_command = "ping4"
groups = ["basic"]
}
object Service "http" {
host_name = "web-server-01"
check_command = "http"
assign where match("web*", host.name)
}
object Service "load" {
host_name = "web-server-01"
check_command = "load"
vars.load_wload1 = 5
vars.load_cload1 = 10
}
Apply Rules
# Apply services based on conditions
apply Service "disk /" {
import "generic-service"
check_command = "disk"
assign where host.vars.os == "Linux"
ignore where host.vars.no_disk_checks == true
}
apply Service "mysql" {
import "generic-service"
check_command = "mysql"
assign where host.vars.db_type == "mysql"
}
# Apply based on custom attributes
apply Service "ssl-cert " {
import "generic-service"
check_command = "ssl-cert"
assign where host.vars.check_ssl == true
}
Custom Variables
# Variables in host definitions
object Host "prod-app" {
vars.monitoring = {
alert_handler = "pagerduty"
check_interval = 30s
retry_interval = 5s
}
vars.environment = "production"
vars.app_tier = "frontend"
}
# Use in services
apply Service "health-check" {
check_command = "http"
interval = host.vars.monitoring.check_interval
assign where host.vars.app_tier == "frontend"
}
Notifications
Contact Definitions
object User "sysadmin" {
display_name = "System Administrator"
email = "sysadmin@example.com"
pager = "+1-555-0123"
states = [Critical, Warning, Down, Unknown]
types = [Service, Host]
period = "24x7"
}
object UserGroup "oncall" {
members = ["sysadmin", "engineer1"]
}
Notification Commands
object NotificationCommand "mail-service" {
command = [ PluginDir + "/mail-notification" ]
arguments = {
"-c" = "$notification.type$"
"-e" = "$user.email$"
"-f" = "$notification.author$"
"-i" = "$service.state_id$"
"-l" = "$service.$macro$"
"-n" = "$service.display_name$"
"-o" = "$service.output$"
"-r" = "$icinga.long_date_time$"
"-s" = "$host.display_name$"
"-t" = "$notification.type$"
"-u" = "$service.state$"
}
}
object Notification "service-mail" {
import "mail-notification"
command = "mail-service"
users = ["sysadmin"]
assign where service.state != ServiceOK
}
Advanced Notifications
# Time-based escalation
object Notification "critical-escalation" {
import "mail-notification"
command = "mail-service"
users = ["sysadmin"]
times.begin = 0 # Now
times.duration = 1h # First hour
assign where service.state == ServiceCritical
}
# Integration with external systems
object NotificationCommand "slack-notification" {
command = ["/usr/local/bin/slack-notify.sh"]
arguments = {
"-u" = "$service.state$"
"-h" = "$host.name$"
"-s" = "$service.name$"
"-m" = "$service.output$"
}
}
Icinga Web Interface
Installation
# Install Icinga Web 2
sudo apt install icingaweb2 icingaweb2-module-doc icingaweb2-module-monitoring
# Enable modules
sudo icingacli module list
sudo icingacli module enable monitoring
sudo icingacli module enable reporting
# Setup wizard
sudo icingacli setup token create
sudo icingacli setup token show
# Visit: http://localhost/icingaweb2/setup
# Initial credentials: admin / icingaweb2
Using Web Interface
# Manage services via CLI
sudo icingacli monitoring list-hosts
sudo icingacli monitoring list-services --host=web-server-01
# Generate reports
sudo icingacli reporting list-reports
REST API
Authentication
# Setup API user
sudo icinga2 api setup
# Test API
curl -k --user "root:root" \
"https://localhost:5665/v1/status"
API Queries
# Get host status
curl -k --user "root:root" \
"https://localhost:5665/v1/objects/hosts/web-server-01" | jq '.'
# List services
curl -k --user "root:root" \
"https://localhost:5665/v1/objects/services?host=web-server-01" \
| jq '.results[]'
# Get service state
curl -k --user "root:root" \
"https://localhost:5665/v1/objects/services" \
--data-urlencode 'filter=service.host.name=="web-server-01"' \
| jq '.results[] | {name: .attrs.name, state: .attrs.state}'
API Commands
# Acknowledge problem
curl -k --user "root:root" \
-X POST "https://localhost:5665/v1/actions/acknowledge-problem" \
-H "Accept: application/json" \
-H "Content-Type: application/json" \
-d '{
"service": "web-server-01!http",
"author": "admin",
"comment": "Investigating issue"
}'
# Schedule downtime
curl -k --user "root:root" \
-X POST "https://localhost:5665/v1/actions/schedule-downtime" \
-H "Accept: application/json" \
-d '{
"type": "Host",
"filter": "host.name==\"web-server-01\"",
"start_time": 1234567890,
"end_time": 1234571490,
"duration": 3600,
"author": "admin",
"comment": "Maintenance window"
}'
Check Commands
Built-in Checks
# HTTP check
object Service "http" {
check_command = "http"
vars.http_uri = "/"
vars.http_ssl = true
}
# MySQL check
object Service "mysql" {
check_command = "mysql"
vars.mysql_user = "icinga"
vars.mysql_password = "password"
}
# PostgreSQL check
object Service "postgres" {
check_command = "postgres"
vars.postgres_query = "SELECT version();"
}
# NRPE check (remote)
object Service "disk" {
check_command = "nrpe"
vars.nrpe_command = "check_disk"
}
Performance Data & Graphing
Enable Performance Data
# /etc/icinga2/features-enabled/graphite.conf
object GraphiteWriter "graphite" {
host = "localhost"
port = 2003
enable_send_thresholds = true
enable_send_metadata = false
}
# Performance data format
object PerfdataWriter "perfdata" {
path = "/var/spool/icinga2/perfdata"
}
Graphite Integration
# Install Graphite
apt install graphite-web graphite-carbon
# Configure Carbon aggregator
vim /etc/carbon/carbon.conf
# View metrics at: http://localhost/graphite
Best Practices
- Use distributed zones for multi-datacenter monitoring
- Define custom variables for environment-specific configs
- Create reusable templates for similar hosts/services
- Set appropriate check intervals (critical: 5 min, regular: 30 min)
- Use apply rules instead of individual service definitions
- Configure meaningful notification escalations
- Monitor Icinga itself (meta-monitoring)
- Regular backups of configuration and data
- Use API for programmatic management
- Implement service dependencies to reduce alert noise