Skip to content

Icinga

Icinga is a modern, open-source monitoring framework built on Nagios principles with distributed zones, REST API, and advanced features.

Installation

Ubuntu/Debian

# Add repository
curl https://packages.icinga.com/icinga.key | sudo apt-key add -
echo "deb https://packages.icinga.com/ubuntu icinga-focal main" | \
  sudo tee /etc/apt/sources.list.d/icinga.list

# Install Icinga2
sudo apt update
sudo apt install icinga2

# Enable features
sudo icinga2 feature enable api
sudo icinga2 feature enable notification
sudo icinga2 feature enable graphite

# Start service
sudo systemctl start icinga2
sudo systemctl enable icinga2

# Verify
sudo icinga2 --version

Distributed Setup (Master/Satellite)

# Master node configuration
/etc/icinga2/icinga2.conf:
include "zones.conf"

# /etc/icinga2/zones.conf
object Zone "master" {
  endpoints = ["master.example.com"]
}

object Endpoint "master.example.com" {
  host = "192.168.1.10"
  port = 5665
}

object Zone "satellite-1" {
  endpoints = ["satellite-1.example.com"]
  parent = "master"
}

object Endpoint "satellite-1.example.com" {
  host = "192.168.1.20"
  port = 5665
}

Configuration

Host Definitions

# /etc/icinga2/conf.d/hosts.conf
object Host "web-server-01" {
  import "generic-host"
  address = "192.168.1.10"
  vars.os = "Linux"
  vars.location = "Datacenter A"

  groups = ["webservers"]
}

define Host "database-01" {
  import "generic-host"
  address = "192.168.1.20"
  vars.db_type = "mysql"
  vars.backup_enabled = true
}

Service Definitions

# Services
object Service "ping4" {
  host_name = "web-server-01"
  check_command = "ping4"
  groups = ["basic"]
}

object Service "http" {
  host_name = "web-server-01"
  check_command = "http"
  assign where match("web*", host.name)
}

object Service "load" {
  host_name = "web-server-01"
  check_command = "load"
  vars.load_wload1 = 5
  vars.load_cload1 = 10
}

Apply Rules

# Apply services based on conditions
apply Service "disk /" {
  import "generic-service"
  check_command = "disk"
  assign where host.vars.os == "Linux"
  ignore where host.vars.no_disk_checks == true
}

apply Service "mysql" {
  import "generic-service"
  check_command = "mysql"
  assign where host.vars.db_type == "mysql"
}

# Apply based on custom attributes
apply Service "ssl-cert " {
  import "generic-service"
  check_command = "ssl-cert"
  assign where host.vars.check_ssl == true
}

Custom Variables

# Variables in host definitions
object Host "prod-app" {
  vars.monitoring = {
    alert_handler = "pagerduty"
    check_interval = 30s
    retry_interval = 5s
  }
  vars.environment = "production"
  vars.app_tier = "frontend"
}

# Use in services
apply Service "health-check" {
  check_command = "http"
  interval = host.vars.monitoring.check_interval
  assign where host.vars.app_tier == "frontend"
}

Notifications

Contact Definitions

object User "sysadmin" {
  display_name = "System Administrator"
  email = "sysadmin@example.com"
  pager = "+1-555-0123"

  states = [Critical, Warning, Down, Unknown]
  types = [Service, Host]
  period = "24x7"
}

object UserGroup "oncall" {
  members = ["sysadmin", "engineer1"]
}

Notification Commands

object NotificationCommand "mail-service" {
  command = [ PluginDir + "/mail-notification" ]

  arguments = {
    "-c" = "$notification.type$"
    "-e" = "$user.email$"
    "-f" = "$notification.author$"
    "-i" = "$service.state_id$"
    "-l" = "$service.$macro$"
    "-n" = "$service.display_name$"
    "-o" = "$service.output$"
    "-r" = "$icinga.long_date_time$"
    "-s" = "$host.display_name$"
    "-t" = "$notification.type$"
    "-u" = "$service.state$"
  }
}

object Notification "service-mail" {
  import "mail-notification"
  command = "mail-service"
  users = ["sysadmin"]
  assign where service.state != ServiceOK
}

Advanced Notifications

# Time-based escalation
object Notification "critical-escalation" {
  import "mail-notification"

  command = "mail-service"
  users = ["sysadmin"]
  times.begin = 0  # Now
  times.duration = 1h  # First hour

  assign where service.state == ServiceCritical
}

# Integration with external systems
object NotificationCommand "slack-notification" {
  command = ["/usr/local/bin/slack-notify.sh"]
  arguments = {
    "-u" = "$service.state$"
    "-h" = "$host.name$"
    "-s" = "$service.name$"
    "-m" = "$service.output$"
  }
}

Icinga Web Interface

Installation

# Install Icinga Web 2
sudo apt install icingaweb2 icingaweb2-module-doc icingaweb2-module-monitoring

# Enable modules
sudo icingacli module list
sudo icingacli module enable monitoring
sudo icingacli module enable reporting

# Setup wizard
sudo icingacli setup token create
sudo icingacli setup token show
# Visit: http://localhost/icingaweb2/setup

# Initial credentials: admin / icingaweb2

Using Web Interface

# Manage services via CLI
sudo icingacli monitoring list-hosts
sudo icingacli monitoring list-services --host=web-server-01

# Generate reports
sudo icingacli reporting list-reports

REST API

Authentication

# Setup API user
sudo icinga2 api setup

# Test API
curl -k --user "root:root" \
  "https://localhost:5665/v1/status"

API Queries

# Get host status
curl -k --user "root:root" \
  "https://localhost:5665/v1/objects/hosts/web-server-01" | jq '.'

# List services
curl -k --user "root:root" \
  "https://localhost:5665/v1/objects/services?host=web-server-01" \
  | jq '.results[]'

# Get service state
curl -k --user "root:root" \
  "https://localhost:5665/v1/objects/services" \
  --data-urlencode 'filter=service.host.name=="web-server-01"' \
  | jq '.results[] | {name: .attrs.name, state: .attrs.state}'

API Commands

# Acknowledge problem
curl -k --user "root:root" \
  -X POST "https://localhost:5665/v1/actions/acknowledge-problem" \
  -H "Accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
    "service": "web-server-01!http",
    "author": "admin",
    "comment": "Investigating issue"
  }'

# Schedule downtime
curl -k --user "root:root" \
  -X POST "https://localhost:5665/v1/actions/schedule-downtime" \
  -H "Accept: application/json" \
  -d '{
    "type": "Host",
    "filter": "host.name==\"web-server-01\"",
    "start_time": 1234567890,
    "end_time": 1234571490,
    "duration": 3600,
    "author": "admin",
    "comment": "Maintenance window"
  }'

Check Commands

Built-in Checks

# HTTP check
object Service "http" {
  check_command = "http"
  vars.http_uri = "/"
  vars.http_ssl = true
}

# MySQL check
object Service "mysql" {
  check_command = "mysql"
  vars.mysql_user = "icinga"
  vars.mysql_password = "password"
}

# PostgreSQL check
object Service "postgres" {
  check_command = "postgres"
  vars.postgres_query = "SELECT version();"
}

# NRPE check (remote)
object Service "disk" {
  check_command = "nrpe"
  vars.nrpe_command = "check_disk"
}

Performance Data & Graphing

Enable Performance Data

# /etc/icinga2/features-enabled/graphite.conf
object GraphiteWriter "graphite" {
  host = "localhost"
  port = 2003
  enable_send_thresholds = true
  enable_send_metadata = false
}

# Performance data format
object PerfdataWriter "perfdata" {
  path = "/var/spool/icinga2/perfdata"
}

Graphite Integration

# Install Graphite
apt install graphite-web graphite-carbon

# Configure Carbon aggregator
vim /etc/carbon/carbon.conf

# View metrics at: http://localhost/graphite

Best Practices

  • Use distributed zones for multi-datacenter monitoring
  • Define custom variables for environment-specific configs
  • Create reusable templates for similar hosts/services
  • Set appropriate check intervals (critical: 5 min, regular: 30 min)
  • Use apply rules instead of individual service definitions
  • Configure meaningful notification escalations
  • Monitor Icinga itself (meta-monitoring)
  • Regular backups of configuration and data
  • Use API for programmatic management
  • Implement service dependencies to reduce alert noise