Here's a Perl script I wrote which runs each evening after the collection window. It will send a report of any failures unless the device is in maintenance mode. Please feel free to adapt as needed. Note that this was done on Solaris 10 and makes use of the Solaris 'mailx' command, so you may need to adapt for other systems.
Al
#!/usr/bin/perl -w
# Generate and mail a list of devices which failed collection via NCM and the reason for the failure
# Written by Al Sorrell 6/24/2014
# Tested on Spectrum 9.2.2 on Solaris
# This uses the Solaris 'mailx' for sending email. Replace as needed 2 places
#
use Env;
Env::import();
use strict;
#
# =======================
our $MAILTO='your_email@demo.com';
my $DEBUG=1;
my $REPORT_FILE='/export/appl/spectrum/custom/scripts/Net/report_NCM_failures.txt';
my @LANDSCAPES=qw(0x400000 0x500000);
# ======================
my ($err,$mh,%dev_ip,%dev_err,%dev_mh,%dev_name,$x,@x);
#
# Loop through all landscapes
#
for my $land (@LANDSCAPES) {
`$SPECROOT/vnmsh/disconnect`; # Response goes to SYSERR - disconnect any current connection
`$SPECROOT/vnmsh/connect lh=$land`; # connect to CLI on specified landscape
#
# Get Global Sync task's model_handle
#
my @resp=`$SPECROOT/vnmsh/show models mth=0x82000c`;
print "Get Global Sync MH query returns\n@resp" if($DEBUG);
send_error('Could not retrieve sync MH') if($resp[1] !~ /Global Sync Task/);
my ($mh_gst)=split(/\s+/,$resp[1]);
print "\n========\nlandscape $land, mh_gst=$mh_gst\n" if($DEBUG);
#
# Get the model_handle of the failed devices; then the error status
#
my @failed=`$SPECROOT/vnmsh/show attributes attr=0x820045 mh=$mh_gst`;
my @errs=`$SPECROOT/vnmsh/show attributes attr=0x820072 mh=$mh_gst`;
#
# Loop through the failed devices and get the name and address (IP)
#
my $ll=-1;
foreach my $line (@failed) {
$ll++;
chomp $line;
print "\$failed[$ll]=$line\n" if($DEBUG);
next if($line !~ /NCM_Failed_Device_List/);
my (undef,undef,undef,$failed_mh)=split(/\s+/,$line);
next if(!defined($failed_mh));
print " \$failed_mh=$failed_mh\n" if($DEBUG);
@x=`$SPECROOT/vnmsh/show attributes attr=0x1006e mh=$failed_mh`;
my (undef,undef,$name)=split(/\s+/,$x[1]);
print " show attributes 0x1006e:\n \$x[0]=$x[0] \$x[1]=$x[1] \$name=$name\n" if($DEBUG);
@x=`$SPECROOT/vnmsh/show attributes attr=0x1027f mh=$failed_mh`;
my (undef,undef,$ip)=split(/\s+/,$x[1]);
print " show attributes 0x1027f:\n \$x[0]=$x[0] \$x[1]=$x[1] \$ip=$ip\n" if($DEBUG);
$dev_mh{$name}=$failed_mh;
$dev_ip{$failed_mh}=$ip;
print "\$dev_mh{$name}=$dev_mh{$name}, \$dev_ip{$failed_mh}=$dev_ip{$failed_mh}\n" if($DEBUG);
}
#
`$SPECROOT/vnmsh/disconnect`; # Disconnect from CLI
#
# Break out the errors
#
#Id Name Iid Value
#0x820072 NCM_Failed_Error_List 0 460959,0x17
#0x820072 NCM_Failed_Error_List 0 417ddc,0x821023,Capture Running Script Returned Error [
252]: Login error:
# Host 172.22.7.22:
# Login has failed: SSHAuthenticationError Login timed out. The input stream currently has the contents bellow: at /opt/SPECTRUM/lib/perl5/lib/site_perl/5.14.2/Expect.pm line 828
#
print "\n Begin processing returned \@errs array\n" if($DEBUG);
my $info='';
$ll=-1;
foreach my $line (@errs) {
$ll++;
chomp $line;
print "\$errs[$ll]=$line\n" if($DEBUG);
next if($line =~/^Id/); #skip headers
if($line =~ /NCM_Failed_Error_List/) { #Start new mh
#
# If we already have been accumulating info, this is a new device and we should store the old.
#
if($info) {
$mh='0x'.$mh;
$dev_err{"$mh"}=$info;
print "\nflushing info \$dev_err{$mh}=$dev_err{$mh}\n" if($DEBUG);
$info='';
}
if($line =~ /0x17/) { # 0x17 indicates "InMaintenance"
($mh)=($line =~ m/0x820072\s+NCM_Failed_Error_List\s+0\s+(\S+),/);
$info='Device is in maintenance';
print "$info, mh=$mh\n" if($DEBUG);
$dev_err{"0x$mh"}=$info;
# $info='';
} else {
($mh,$err,$info)=($line =~ m/0x820072\s+NCM_Failed_Error_List\s+0\s+(\S+),(\S+),(.*)/);
if(! defined($info)) {
($mh,$err)=($line =~ m/0x820072\s+NCM_Failed_Error_List\s+0\s+(\S+),(\S+)/);
if($err eq '0x82100e') {
$info='NCM Is disabled on this device because it is a proxy model';
} else {
$info='Unspecified Error';
}
}
print "Starting new \$mh=$mh, \$err=$err, \$info=$info\n" if($DEBUG);
}
} else { # this is a continuation of earlier mh/info
if($line =~ /The input stream/) { # cleanup
my ($info2)=($line=~m/(.*)\. The input stream/);
$info.=$info2;
} elsif ($line =~ / at \//) { # cleanup
my ($info2)=($line=~m/(.*) at \//);
$info.=$info2;
} else {
$info.=$line;
}
}
}
#
# Handle the last device
#
if(defined($mh)) {
$mh='0x'.$mh;
$dev_err{"$mh"}=$info;
print "\$dev_err{$mh}=$dev_err{$mh}\n" if($DEBUG);
}
}
#
# If we have failed devices OTHER than those InMaintenance, send an email report
#
my $fails=0;
my $maints=0;
my $proxies=0;
open(OUT,">$REPORT_FILE") or send_error("Unable to open $REPORT_FILE, $!");
foreach my $name (sort(keys(%dev_mh))) {
$mh=$dev_mh{$name};
print "$mh $name $dev_ip{$mh} $dev_err{$mh}\n\n";
printf OUT ("%s\t\%s\t%s\n\n",$name,$dev_ip{$mh},$dev_err{$mh});
if($dev_err{$mh} =~ /proxy/) {
$proxies++;
} elsif ($dev_err{$mh} =~ /maintenance/) {
$maints++;
} else {
$fails++;
}
}
close OUT;
#
# Send mail if there were failures (not counting InMaintenance)
#
`/usr/bin/mailx -s "Spectrum NCM Backup had $fails failures, $maints in Maintenance Mode,$proxies proxy models" $MAILTO <$REPORT_FILE` if($fails);
exit 0;
sub send_error {
my $err=shift;
`/usr/bin/mailx -s "Spectrum report_NCM_failures.pl had error:$err" $MAILTO < /dev/null`;
exit 1;
}