Dmesg Scan Macro
The dmesg_scan_alarms_macros.cfg is a script macro to scan dmesg output for troublesome or noteworthy entries.
dmesg_scan(BYPASS)
init
status =piktstatus
level =piktlevel
task "Scan the dmesg output for troublesome or noteworthy entries"
input proc "if [ -e =hstdir/log/dmesg=piktalert=_.bak ];
then =diff =hstdir/log/dmesg=piktalert=_.bak
=hstdir/log/dmesg=piktalert |
=egrep '>' | =cut -b 3- 2>/dev/null;
else =cat =hstdir/log/dmesg=piktalert 2>/dev/null;
fi"
begin
if $level() !~~ "emerg|alert"
=checkpoint(=lalim)
fi
doexec wait "=dmesg | =uniq > =hstdir/log/dmesg=piktalert"
rule
if $level() =~~ "emerg|alert" // alert == urgent
&& $inlin =~~ "=redflags"
leave
elsif $level() =~~ "critical"
&& $inlin =~~ "=yellowflags"
leave
elsif $alarm() eq "ScanDmesg"
&& $inlin =~~ "=redflags|=yellowflags"
leave
else
next
fi
rule // permanent bypasses
if $inlin =~~ "^[[:space:]]*[[:digit:]].*[[:digit:]][[:space:]]*$"
|| $inlin =~~ "^[[:space:]]*[[:digit:]]+[[:space:]]*$"
|| $inlin =~~ "^[[:space:]]*not found!"
next
fi
rule // log anything not bypassed
=output_alarm_log($inlin)
rule // ignore scheduled reboots
if =reboot_period(#daynumber(), #hour())
next
fi
rule // bypasses
if $inlin =~~ "(BYPASS)"
next
fi
#if codersys
rule
if $inlin =~~ "segfault"
if $alert() =~~ "coders"
=output_other_mail(DMESGSCAN,
'PIKT Dmesg Errors on =pikthostname',
=piktadmin =coders, $inlin)
fi
next
fi
#endif
rule
output mail $inlin
end
doexec wait "=mv =hstdir/log/dmesg=piktalert
=hstdir/log/dmesg=piktalert=_.bak"
quit
You might invoke the =dmesg_scan() macro in your alarms.cfg file thusly:
///////////////////////////////////////////////////////////////////////////////
//
// logs_system_alarms.cfg
//
///////////////////////////////////////////////////////////////////////////////
DmesgScan
#if munich
=dmesg_scan(hub 2-1|hub_port_status failed|reset low speed USB device|
=dmesgbypasses)
#else
=dmesg_scan(=dmesgbypasses)
#endif
///////////////////////////////////////////////////////////////////////////////
where '=dmesgbypasses' is a macro (defined in macros.cfg) of uninteresting dmesg output:
dmesgbypasses process.+nslookup.+is using obsolete setsockopt|
pcie_portdrv_probe->.+has invalid irq|exception support|
exception polling|obsolete setsockopt|reset.+speed usb|
man 2 wait|failed to allocate mem resource|sata link down|
scsi0: aen: warning|set_dentry_child_flags|write protect|
fixed bufsize|handling phase mismatch|
too many iterations.+nv_nic_irq|analog subsections not ready
Output from this script might look like, for example:
URGENT:
DmesgScan
Scan the dmesg output for troublesome or noteworthy entries
sd 2:0:0:0: SCSI error: return code = 0x8000002
sda: Current: sense key: Aborted Command
Additional sense: Scsi parity error
end_request: I/O error, dev sda, sector 118837504
Buffer I/O error on device sda, logical block 14854688
ata3: status=0x51 { DriveReady SeekComplete Error }
ata3: error=0x84 { DriveStatusError BadCRC }
Note how, on coder systems, we specially route e-mail to just the coders (also the piktadmin) by means of the =output_other_mail() macro. Note also how we add special bypasses for the munich system, which is troubled by recurring USB port failures.
For more examples, see Samples.