#!/bin/perl

### CONFIGURE THIS FOR YOUR INSTALLATION ###
$nagiosConfig = "/usr/local/nagios/etc/nagios.cfg";
### SHOULDN'T NEED TO MODIFY ANYTHING BELOW HERE ###

#
# batch_by_ssh v1.0 - koreth-nagios@midwinter.com 2003/01/17
#
# Frontend to check_by_ssh to check a number of services on a remote host
# using a single ssh connection to cut down on overhead.
#
# Usage:
#	batch_by_ssh hostname [check_by_ssh-option ...]
#
# This script reads the Nagios configuration file to configure itself.  It
# depends on a number of new configuration options on hosts and services.
# Since the main Nagios executable doesn't like to run into options it
# doesn't understand, we have to use a special syntax to specify the new
# options such that Nagios thinks they're just comments.  Put "#<>" at the
# start of all the extra config options required by this script.  For
# example:
#
# define service {
#	host_name		foobar
#	active_checks_enabled	0
# #<>	batch_type		ssh
# #<>	batch_command		check_temperature!75!85
# }
#
# For hosts, this script uses the following options (new ones are indicated
# by "#<>"):
#
#	host_name (required)
#	address (required)
#		Standard config options.
#	#<>$xyz$ (optional)
#		Overrides macro "xyz" for this host.  Typically you'll set
#		$USER1$ to the path to the remote host's plugin (libexec)
#		directory.
#
# For services, the following options are significant:
#	service_description (required)
#		Standard config option.
#	host_name (semi-optional)
#	hostgroup_name (semi-optional)
#		Standard config options.  At least one of these two options
#		must be present, but either one is fine.
#	#<>batch_command (required)
#		Command to run on remote host.  Same syntax as check_command.
# 	#<>batch_type (optional)
#		If present, must be "ssh" or this script will ignore the
#		service (this is to allow for other batch mechanisms.)
#
# Here's an example configuration to check a couple services on a remote host.
# The host has its plugin binaries installed in /usr2/nagios.
#
# define host {
#	host_name		myserver
#	address			1.2.3.4
# #<>	$USER1$			/usr2/nagios
# }
#
# Here we define the command that calls batch_by_ssh.  This is just a normal
# command like any other.
#
# define command {
#	command_name		batch_by_ssh
#	command_line		$USER1$/batch_by_ssh $HOSTNAME$
# }
#
# These commands will be run on the remote host.  You probably already have
# them in your configuration -- they're specified the same way you'd do it
# on the local host.
#
# define command {
#	command_name		check_local_disk
#	command_line		$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
# }
#
# define command {
#	command_name		check_local_users
#	command_line		$USER1$/check_users -w $ARG1$ -c $ARG2$
# }
#
# Now we have an active service to check the remote host using batch_by_ssh.
# (This can replace a call to check_ssh, since it'll fail if ssh is down.)
#
# define service {
#	use			generic_service
#	service_description	ssh
#	host_name		myserver
#	active_checks_enabled	1
#	check_command		batch_by_ssh
#	normal_check_interval	5
#	retry_check_interval	1
# }
#
# Here we specify the two services we're checking on the remote host.  Note
# that you can specify a list of hosts or hostgroups just like regular
# services.  See the Nagios docs on freshness checks for information on
# what the check_command listed here should do.  We specify a freshness
# threshold that allows the active service's check interval, plus a couple
# of retries, to pass before we consider the service stale.
#
# define service {
#	use			generic_service
#	service_description	User Count
#	host_name		myserver,otherserver
#	active_checks_enabled	0
#	check_freshness		1
#	freshness_threshold	430	; 7 minutes = check interval + 2 retries
#	check_command		no_report
# #<>	batch_type		ssh
# #<>	batch_command		check_local_users!20!25
# }
#
# define service {
#	use			generic_service
#	service_description	/home disk space
#	host_name		myserver,otherserver
#	hostgroup_name		group1,group2
#	active_checks_enabled	0
#	check_freshness		1
#	freshness_threshold	430
#	check_command		no_report
# #<>	batch_type		ssh
# #<>	batch_command		check_local_disk!10%!5%!/home
# }
#
# And that's it!  Now Nagios will execute the "ssh" service once every 5
# minutes, and batch_by_ssh will probe myserver for disk space and user
# counts, which will be reported as passive service results.
#


use English;

#
# Exit with a status code and a message.  Prefix the message with a status
# name if it isn't already there.
#
# $_[0] = status name
# $_[1] = exit code
# $_[2] = message
#
sub doExit {
	$msg = $_[2];

	unlink $resultsFile if $resultsFile ne "";
	print $_[0] . ' - ' if $msg !~ /^$_[0]/;
	$msg =~ s/\n//;
	print $msg . "\n";
	exit $_[1];
}


#
# Expands a command specification to a real command line, including
# macro substitutions.
#
# $_[0] = command line to split
# $_[1] = hostname whose macros we should use
#
sub expandCommand {
	local($commandLine);

	@command = split(/!/,$_[0]);
	$commandLine = $checkCommands{$command[0]};

	if ($#command gt 0) {
		foreach $i (1..$#command) {
			$macros{'$ARG' . $i . '$'} = $command[$i];
		}
	}

	foreach $macro (keys %{$hostMacros{$_[1]}}) {
		$commandLine =~ s/\Q$macro\E/$hostMacros{$_[1]}{$macro}/g;
	}

	foreach $macro (keys %macros) {
		$commandLine =~ s/\Q$macro\E/$macros{$macro}/g;
	}

	return $commandLine;
}

#
# Given a hash of name/value pairs for a batch service, populate our internal
# data structures.
#
sub slurpBatchService {
	local($values);
	local($hostnames);
	local($hostGroup);
	local($serviceNames);
	local($batchType);
	local($commands,$services);
	local($command,$service);
	$values = $_[0];

	$batchType = $$values{"batch_type"};
	$command = $$values{"batch_command"};
	$service = $$values{"service_description"};

	# Don't touch non-batch entries.
	return if $command eq "";

	# Don't touch other batch mechanisms' entries.
	return if $batchType ne "" and $batchType ne "ssh";

	# Combine the host and hostgroup lists into a big list of hostnames.
	$hostnames = $$values{"host_name"};
	if ($$values{"hostgroup_name"} ne "") {
		foreach $hostGroup (split(/,/,$$values{"hostgroup_name"})) {
			foreach $member (split(/,/,$hostGroups{$hostGroup})) {
				$hostnames = $hostnames . "," . $member;
			}
		}
		$hostnames =~ s/^,//;
	}

	# For each host, add the command and description to its hash.
	foreach $host (split(/,/,$hostnames)) {
		$commandsByHost{$host}{$service} = $command;
	}
}

#
# Given a hash of host values, populate our internal data structures if
# appropriate.  We extract the host address and any user variables.
#
sub slurpHost {
	local($values);
	local($hostname);

	$values = $_[0];
	$hostname = $$values{"host_name"};

	if ($hostname ne "" and $$values{"address"} ne "")
	{
		$hostAddresses{$hostname} = $$values{"address"};
	}

	foreach $key (keys %{$values}) {
		# Is this key a macro name?
		if ($key =~ m/^(\$.*\$)$/) {
			$hostMacros{$hostname}{$key} = $$values{$key};
		}
	}
}

#
# Given an object type name and a hash of config items, populate our internal
# data structures if appropriate.
#
sub slurpObject {
	local($objectType);
	local($values);

	$objectType = $_[0];
	$values = $_[1];

	# If this object uses a template, copy the template's values.
	if ($$values{"use"} ne "") {
		local($template);

		$template = $templates{$objectType}{$$values{"use"}};
		foreach $key (keys %{$template}) {
			next if $key =~ m/^(register|use|name|)$/;
			if (! defined($$values{$key})) {
				$$values{$key} = $$template{$key};
			}
		}

		undef $$values{"use"};
	}

	# Stash template objects away
	if ($$values{"name"} ne "") {
		$templates{$objectType}{$$values{"name"}} = $values;
	}
	
	# Ignore non-registered objects
	return if $$values{"register"} eq "0";

	if ($objectType eq "command")
	{
		$checkCommands{$$values{"command_name"}} =
			$$values{"command_line"};
	}

	if ($objectType eq "host")
	{
		&slurpHost($values);
	}

	if ($objectType eq "hostgroup")
	{
		$hostGroups{$$values{"hostgroup_name"}} = $$values{"members"};
	}

	if ($objectType eq "service")
	{
		&slurpBatchService($values);
	}
}


#
# Read a Nagios object config file and slurp up any relevant entries.
# For now this is just check commands and host addresses.
#
# $_[0] = path of object config file.
#
sub slurpObjectConfig {
	local($objectType);

	$objectType = "";
	$argValues = undef;

	open(CONFIG, "<$_[0]") or &doExit("CRITICAL", 2, "$_[0]: $!");
	while (<CONFIG>) {
		chomp;

		# Trim off our magic pseudo-comment token (needed because
		# Nagios complains if it sees unknown keywords)
		$_ =~ s/^\s*#\<\>//;

		# Trim whitespace and comments
		$_ =~ s/^\s+//;
		$_ =~ s/;.*//;
		$_ =~ s/\s+$//;
		next if /^[#;]/;

		if ($objectType ne "") {
			if (m/^\}/) {
				&slurpObject($objectType, $argValues);

				$inObject = 0;
				$objectType = "";
				$argValues = undef;
				next;
			}

			# Remember the key and value.
			if (m/^(\S+)\s+(.*)/) {
				$$argValues{$1} = $2;
			}
		}

		if (m/^define\s+([^\s{]+)/) {
			$objectType = $1;
		}
	}

	close CONFIG;
}

#
# Reads a resource file and populates macro values.
#
# $_[0] = path of resource config file.
#
sub slurpResourceConfig {
	open(CONFIG, "<$_[0]") or &doExit("CRITICAL", 2, "$_[0]: $!");
	while (<CONFIG>) {
		chomp;
		$_ =~ s/^\s*//;
		next if /^#/ or /^$/;

		if (m/(\$[^=]*\$)=(.*)\s*$/) {
			$macros{$1} = $2;
		}
	}

	close CONFIG;
}

if ($#ARGV < 0) {
	print "CRITICAL: Usage: $0 hostname [check-by-ssh-opts]\n";
	exit(2);
}

if ($#ARGV > 0) {
	$extraArgs = join ' ', @ARGV[1..$#ARGV];
}

#
# Read the Nagios config file to find the list of object config files, and
# scan each of those for check command definitions.
#
open(NAGIOS, "<$nagiosConfig") or &doExit("CRITICAL", 2, "$nagiosConfig: $!");
while (<NAGIOS>) {
	if (m/^\s*cfg_file=\s*(.*)\s*$/) {
		&slurpObjectConfig($1);
	}
	if (m/^\s*command_file=\s*(.*)\s*$/) {
		$nagiosCommandFile = $1;
	}
	if (m/^\s*resource_file=\s*(.*)\s*$/) {
		&slurpResourceConfig($1);
	}
}
close NAGIOS;

#
# Now $commandsByHost{$ARGV[0]} is a hash of {service name => remote command
# line} for the host we want to probe.  Construct the check_by_ssh command line.
#

$resultsFile = "/tmp/.batch_by_ssh.$PID";

$host = $ARGV[0];
$macros{'$HOSTNAME$'} = $host;
$macros{'$HOSTADDRESS$'} = $hostAddresses{$host};

$commandLine = "$macros{'$USER1$'}/check_by_ssh -H $hostAddresses{$host} -n $host -O $resultsFile $extraArgs";

foreach $service (keys %{$commandsByHost{$host}}) {
	$serviceList .= ":" . $service;
	$commandLine .= " -C '" . &expandCommand($commandsByHost{$host}{$service}, $host) . "'";
}

# check_by_ssh doesn't submit passive-mode commands if only one service is
# specified, so add a dummy.
if ($serviceList !~ /:.*:/) {
	$commandLine .= " -C echo foo";
	$serviceList .= ":dummy";
}

$serviceList =~ s/^://;
$commandLine .= " -s '$serviceList'";

open(CHECK, "$commandLine|");
@output = <CHECK>;		# should be empty
close CHECK;

$exitCode = $? >> 8;
if ($? & 127) {
	$exitCode = 2;
}

#
# If the check command succeeded, dump the results into the Nagios command file.
#
if ($exitCode == 0) {
	open(RESULTS, "<$resultsFile") or &doExit("WARNING", 1, "$resultsFile: $!");
	open(COMMAND, ">$nagiosCommandFile") or &doExit("CRITICAL", 2, "$nagiosCommandFile: $!");
	while (<RESULTS>) {
		next if (m/;dummy;/);
		print COMMAND $_;
	}
	close RESULTS;
	close COMMAND;

	&doExit("OK", 0, "Remote probe succeeded") if ($output[0] eq "");
	&doExit("OK", 0, $output[0]);
}

#
# If the check command didn't succeed, dump out an error message and don't
# report any results we got (which might be bogus.)
#

if ($exitCode == 1) {
	&doExit("WARNING", 1, $output[0]);
}

if ($exitCode == 2) {
	&doExit("CRITICAL", 2, $output[0]);
}

&doExit("UNKNOWN", 3, $output[0]);
