Manage Nagios with Scripts

Working at many different organisations over the past 10 years, I have been involved in the implementation and maintenance of many different monitoring implementations. These include commercial and open source implementations, such as :

– Nagios
– IP Monitor
– Uptime
– OpenNMS
– Zabbix

Although Nagios may not be the most scalable or dynamic solution, for some organisations that perhaps have 1-100 servers, Nagios may be the best solution.

Additionally, the ability to write custom plugins, as well as the inherent SSL / TLS encryption of the NRPE checks, it may be the most viable. There are pro’s and con’s for each solution out there, and it is completely dependant on the skill level, nature of environment and available time for management / maintenance.

During the course of utilising Nagios, we noticed that one of the most time consuming tasks was maintaining the flat file configuration for adding, removing and modifying hosts within Nagios.

As a result, it was decided to write a quick Perl based script to manage the day-to-day tasks of adding and removing hosts within Nagios. When all is said and done, it really does save ALOT of time. This script can be integrated with existing control based management situations or other automation scripts / solutions where command line options and external scripting / plugins are possible. This way, you can encompass a more rounded, standardised and reliable way of managing your systems in Nagios.

In order for the script to work, you need to have 3 types of servers :

– Windows
– Unix/Linux
– VPS (Virtual Private Server)

Obviously you can modify the script to encompass an unlimited number of categories. Basically the script has defined three pre-existing hosts in the nagios hosts.cfg / hostgroups.cfg and services.cfg files to model them when adding the new server, based on your input.

Please take a look at the script, hopefully it will help make your life a little easier! 😉

#!/usr/bin/perl

# Don't break me, I'm used by automated scripts.

###############################################################################
# Star Dot Hosting : www.stardothosting.com
# Nagios Config Manager
# Description: This program will add/remove entries from nagios.
# The files will be backed up in a archive before any changes are made.
###############################################################################
# Perl Libraries
use File::Copy;
use Switch;
###############################################################################
# Variables

###############################################################################
# Nagios file handlers
my $host_file = "/usr/local/nagios/etc/objects/hosts.cfg";
my $group_file = "/usr/local/nagios/etc/objects/hostgroups.cfg";
my $services_file = "/usr/local/nagios/etc/objects/services.cfg";
my $unixmatch = "sdh-unix" ;
my $windowsmatch  = "sdh-windows";
my $vpsmatch = "vps-server";
my $date = `date "+%d%m%y-%H%M%S"`;

###############################################################################
# Verify Arguments
if ((!$ARGV[0]) || (!$ARGV[1])) {
        &usage;
}

if (length($ARGV[1]) gt 1 ) {   print "Command options too long!n"; &usage; }

# Verify Nagios is working before we start
my $nagios = `nagios -v /usr/local/nagios/etc/nagios.cfg`;
        if ($nagios =~ /One or more problems was encountered while processing the config files/) {
        print "CRITICAL ERROR!nnNagios is already broken and we cannot continue!nPlease fix it!n";
                @error_array = split(/./, $nagios);
                for $error (@error_array) {
                $error=~s/^n//g;
                        print "$errorn" if  $error=~ /Error:/;
             }
        die "nnProgram Aborting before even starting due to nagios config error!n"

}


# Clean up any old tmp files.
unlink("/tmp/hosts.cfg.tmp");
unlink("/tmp/hostgroups.cfg.tmp");
unlink("/tmp/services.cfg.tmp");

###############################################################################
# The Main Program control statement.
###############################################################################
switch ($ARGV[1]) {

        case /d/i {     &delete;        }
        case /x/i {     &addEntry("x"); }
        case /w/i {     &addEntry("w"); }
        case /v/i {     &addEntry("v"); }
        else {
                print "Option: $ARGV[1] not found n";
                &usage;
        }
}
###############################################################################

###############################################################################
# Subroutines
###############################################################################

###############################################################################
## sub backup - Backs up the nagios config files that are to be modified
###############################################################################
sub backup {

# Backup The Nagios files into an archive.
        $date =~s/n//g;

        mkdir("/var/backup/nagios/$date", 0755 ) || die "Cannot create directory /var/backup/nagios/$daten";

        copy($host_file, "/var/backup/nagios/$date/hosts.cfg.bck"); #|| die "Cannot copy $host_file to /var/backup/nagios/$date/hosts.cfg.bckn";
        copy($group_file, "/var/backup/nagios/$date/hostgroups.cfg.bck"); #|| die "Cannot copy $host_file to /var/backup/nagios/$date/hostgroups.cfg.bckn";
        copy($services_file, "/var/backup/nagios/$date/services.cfg.bck"); #|| die "Cannot copy $service_file to /var/backup/nagios/$date/services.cfg.bckn";
}

###############################################################################
## sub openFile($filename) - returns the file to a buffer for parsing
###############################################################################
sub openFile {
        my $blob;
        my $file = shift;
        open (F, "< $file") or die "Can't open $file : $!";

        while(  ) {
                $blob .= $_;
        }

        close(F);
return $blob;

}

###############################################################################
###############################################################################


###############################################################################
## sub delete - Deletes the servername from the config files.
###############################################################################
sub delete {
        &backup;                # Backup the files before we do anything to them.
        &delete_host;
        &delete_hostgroup;
        &delete_services;
        &checkNagios;
}

###############################################################################
## sub delete_host - deletes the host entry from hosts.cfg
###############################################################################
sub delete_host {
        my $host_str = &openFile($host_file);
        my $pattern=$ARGV[0];   # The parser doesn't like the array so we just pass it to a variable.

        # parse the hosts.cfg file first
        # This regular expression is a defined host entry, if it can't find it
        # and assert that the hostname is part of that context, it will die.
        if ($host_str =~/define[^_]*.name.*(?s-i:$pattern)[^}]*./i) {
                print "command: $ARGV[1] : Deleting $ARGV[0] $1n" if $host_str =~s/define[^_]*.name.*(?s-i:$pattern)[^}]*.//g;
                print "Match: $ARGV[0]n" if $host_str =~/define[^_]*.name.*(?s-i:$pattern)[^}]*./i;
                print "Deleted $ARGV[0] from hosts.cfgn";

        # Write the successfull deleteion to a tmp file.
        open(HF, ">/tmp/hosts.cfg.tmp") || die "Cannot open /tmp/hosts.cfg.tmp";
        print HF $host_str;
    close(HF);

        } else { die "Could not find and entry for $ARGV[0] in hosts.cfgn"; };
}

###############################################################################
## sub delete_hostgroup - deletes the hostgroup entry
###############################################################################
sub delete_hostgroup {

        my $hostgrp_str = &openFile($group_file);
        my $pattern=$ARGV[0];   # The parser doesn't like the array so we just pass it to a variable.

        # search/replace the hostgroup.cfg file
        if ($hostgrp_str =~ /$pattern/i) {
                # If the server has a comma after it, we need to remove that too.. or breakage.
                if ($hostgrp_str =~ /$pattern,/i ) {
                        print "Deleted $ARGV[0], from hostgroups.cfgn" if $hostgrp_str =~ s/$pattern,//g;
                }

                        print "Deleted $ARGV[0] from hostgroups.cfgn" if $hostgrp_str =~ s/$pattern//g;

        } else {
                die "Could not find and entry for $ARGV[0] in hostgroups.cfgn";
        }
        open(HGF, ">/tmp/hostgroups.cfg.tmp") || die "Cannot open /tmp/hostgroups.cfg.tmp";
        print HGF $hostgrp_str;
        close(HGF);
}

###############################################################################
## sub delete_services - delete the serivices.cfg entry
###############################################################################
sub delete_services {

        my $services_str= &openFile($services_file);
        my $pattern=$ARGV[0];   # The parser doesn't like the array so we just pass it to a variable.


        # search/replace the hostgroup.cfg file
        if ($services_str =~ /$pattern/i) {
                # If the server has a comma after it, we need to remove that too.. or breakage.
                if ($services_str =~ /$pattern,/i ) {
                        print "Deleted $ARGV[0], from services.cfgn" if $services_str =~ s/$pattern,//g;
                }
                        print "Deleted $ARGV[0] from services.cfgn" if $services_str =~ s/$pattern//g;
         } else {
                die "Could not find and entry for $ARGV[0] in services.cfgn";
        }
        open(SF, ">/tmp/services.cfg.tmp") || die "Cannot open /tmp/services.cfg.tmp";
        print SF $services_str;
        close(SF);
}

###############################################################################
## sub checkNagios - checks nagios for errors and rolesback if so.
###############################################################################
sub checkNagios {


        copy("/tmp/hosts.cfg.tmp", $host_file) || print "Cannot copy /tmp/hosts.cfg.tmp to  $host_filen";
        copy("/tmp/hostgroups.cfg.tmp", $group_file) || print "Cannot copy /tmp/hostgroups.cfg.tmp to $host_filen";
        copy("/tmp/services.cfg.tmp", $services_file) || print "Cannot copy /tmp/services.cfg.tmp $service_filen";
        my $success = `nagios -v /etc/nagios/nagios.cfg`;

        if ($success =~ /One or more problems was encountered while processing the config files/) {
                print "CRITICAL FAILURE - See Errors!n";
                @error_array = split(/./, $success);
                for $error (@error_array) {
                $error=~s/^n//g;
                        print "$errorn" if  $error=~ /Error:/;
                        }
                print "nRestoring from backupnCheck /tmp/hosts.cfg /tmp/hostgroup.cfg /tmp/service.cfgn";
        copy("/var/backup/nagios/$date/hosts.cfg.bck", $host_file) || die "Cannot copy /var/backup/nagios/$date/hosts.cfg.bck to $host_filen";
        copy("/var/backup/nagios/$date/hostgroups.cfg.bck", $group_file) || die "Cannot copy /var/backup/nagios/$date/hostgroups.cfg.bck to $group_filen";
        copy("/var/backup/nagios/$date/services.cfg.bck", $services_file) || die "Cannot copy /var/backup/nagios/$date/services.cfg.bck $services_filen";

        } else {
                print "Nagios config reports success, restarting nagiosn";
                my $restart = `/etc/init.d/nagios reload`;
                print $restart;
        }

}

###############################################################################
## sub addEntry - adds the unix or windows host entry.
###############################################################################
sub addEntry {

        my $type = shift;
        my $pattern = $ARGV[0];
        my $host_str = &openFile($host_file);
        my $hostgrp_str = &openFile($group_file);
        my $services_str= &openFile($services_file);

        if ($host_str=~/$pattern/) { die "$ARGV[0] already in hosts.cfg, aborting!n"; }
        if ($hostgrp_str=~/$pattern/) { die "$ARGV[0] already in hostgroups.cfg, aborting!n"; }
if(($type eq 'w') || ($type eq 'x')) {
        if ($services_str=~/$pattern/) { die "$ARGV[0] already in services.cfg, aborting!n"; }
}
        # Some sanity checks to help prevent data entry errors
        if (!$ARGV[2]) { print "nNo Server Alias, aborting!nn"; &usage; }
        #if ($ARGV[2]=~/[0-9]{5,8}$/i) {} else { print "No Member ID!n"; exit 0}
        if (!$ARGV[3]) { print "nNo IP Address specified, aborting!nn"; &usage; }
        if ($ARGV[3]=~/[a-z]/i) { print "nIP Address $ARGV[3] is invalid, please double checknn"; exit 0; }
        if ($ARGV[3]=~/[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/) {
                @ip = split(/./, $ARGV[3]);
        } else {
                print "nIP Address $ARGV[3] is invalid, please double checknn"; exit 0;
        }

        if (($ip[0] > 254) || ($ip[1] > 254) ||
            ($ip[3] > 254) || ($ip[4] > 254)) {

        print "nIP Address $ARGV[3] is invalid, please double checknn"; exit 0;
        }

# Passes sanity checks, back up the mo fo.
&backup;
# Check if windows or unix

switch ($type) {

        case "x" {      print "Unix!n";
                        $hostgrp_str =~ s/$unixmatch/$unixmatch,$pattern/g;
                        $services_str =~ s/$unixmatch/$unixmatch,$pattern/g;  }
        case "w" {      print "Windowsn";
                        $hostgrp_str =~ s/$windowsmatch/$windowsmatch,$pattern/g;
                        $services_str =~ s/$windowsmatch/$windowsmatch,$pattern/g; }

        else {          print "VPSn";
                        $hostgrp_str =~ s/$vpsmatch/$vpsmatch,$pattern/g;
                                }
        } # end switch


        # Add it to the host_str buffer.
        $host_str .= "define host{
        use                     sdh-dedicated
        host_name               $ARGV[0]
        alias                   $ARGV[2]
        address                 $ARGV[3]
        }nn";
        open(HF, ">/tmp/hosts.cfg.tmp") || die "Cannot open /tmp/hosts.cfg.tmp";
        print HF $host_str;
        close(HF);
        open(HGF, ">/tmp/hostgroups.cfg.tmp") || die "Cannot open /tmp/hostgroups.cfg.tmp";
        print HGF $hostgrp_str;
        close(HGF);

if(($type eq 'w') || ($type eq 'x')) {
        open(SF, ">/tmp/services.cfg.tmp") || die "Cannot open /tmp/services.cfg.tmp";
        print SF $services_str;
        close(SF);
        }
        &checkNagios;

}
###############################################################################
## sub usage - prints the usage when things don't add up from args
###############################################################################
sub usage{
        print "Usage: /usr/local/bin/nagios-add.pl    nn";
        print "Optional Flags:n";
        print "td delete a servern";
        print "tw add a windows servern";
        print "tx add a unix servern";
        print "tv add a VPS servernn";
        print "eg delete:t./usr/local/bin/nagios-add.pl sdh-server12 dn";
        print "eg add:tt./usr/local/bin/nagios-add.pl sdh-server12 x "sdh-server12 sdh-server12.stardothosting.com MemID:155" 192.168.111.10n";
        exit 0;
}
Menu