librenms/misc/alert_rules.json
Timothy Willey a000b4a6de ArubaOS - fix client count polling, add ap count polling. (#10231)
* fix client count polling, add ap count polling.

* Set low warning/critical thresholds for AP count sensors

* Add Aruba AP Count Warning and Critical Alerts to collection, lint alert_rules.json

* add newline at end of alert_rules.json

* add/update tests/data/arubaos*.json test data
2019-05-23 10:17:09 -05:00

416 lines
16 KiB
JSON

[
{
"rule": "macros.device_down = \"1\"",
"name": "Devices up/down",
"default": true
},
{
"rule": "devices.uptime < \"300\" && macros.device = \"1\"",
"name": "Device rebooted",
"extra": "{\"count\": 1}",
"default": true
},
{
"rule": "bgpPeers.bgpPeerState != \"established\" && macros.device_up = \"1\" && bgpPeers.bgpPeerAdminStatus != \"stop\"",
"name": "BGP Session down",
"extra": "{\"count\": 1}",
"default": true
},
{
"rule": "bgpPeers.bgpPeerFsmEstablishedTime < \"300\" && bgpPeers.bgpPeerState = \"established\" && macros.device_up = \"1\"",
"name": "BGP Session established",
"extra": "{\"count\": 1}",
"default": true
},
{
"rule": "macros.port_down = \"1\"",
"name": "Port status up/down",
"extra": "{\"count\": 1}",
"default": true
},
{
"rule": "macros.port_usage_perc >= \"80\" && macros.port_up = \"1\"",
"name": "Port utilisation over threshold",
"default": true
},
{
"rule": "sensors.sensor_current > `sensors.sensor_limit` && sensors.sensor_alert = \"1\" && macros.device_up = \"1\"",
"name": "Sensor over limit - Check Device Health Settings",
"default": true
},
{
"rule": "sensors.sensor_current < `sensors.sensor_limit_low` && sensors.sensor_alert = \"1\" && macros.device_up = \"1\"",
"name": "Sensor under limit - Check Device Health Settings",
"default": true
},
{
"rule": "services.service_status != \"0\" && macros.device_up = \"1\"",
"name": "Service up/down",
"default": true
},
{
"rule": "wireless_sensors.sensor_current >= `wireless_sensors.sensor_limit` && wireless_sensors.sensor_alert = \"1\" && macros.device_up = \"1\"",
"name": "Wireless Sensor over limit",
"default": true
},
{
"rule": "wireless_sensors.sensor_current <= `wireless_sensors.sensor_limit_low` && wireless_sensors.sensor_alert = \"1\" && macros.device_up = \"1\"",
"name": "Wireless Sensor under limit",
"default": true
},
{
"rule": "wireless_sensors.sensor_type == \"arubaos\" && wireless_sensors.sensor_class == \"ap-count\" && wireless_sensors.sensor_alert = \"1\" && macros.device_up = \"1\" && wireless_sensors.sensor_current <= `wireless_sensors.sensor_limit_low_warn` && wireless_sensors.sensor_current > `wireless_sensors.sensor_limit_low`",
"name": "Aruba Wireless AP Count Low Warning",
"severity": "warning",
"default": false
},
{
"rule": "wireless_sensors.sensor_type == \"arubaos\" && wireless_sensors.sensor_class == \"ap-count\" && wireless_sensors.sensor_alert = \"1\" && macros.device_up = \"1\" && wireless_sensors.sensor_current <= `wireless_sensors.sensor_limit_low`",
"name": "Aruba Wireless AP Count Low Critical",
"severity": "critical",
"default": false
},
{
"rule": "macros.state_sensor_critical && sensors.sensor_alert = 1",
"name": "State Sensor Critical",
"default": true
},
{
"rule": "macros.state_sensor_warning && sensors.sensor_alert = 1",
"name": "State Sensor Warning",
"default": false
},
{
"rule": "macros.bill_quota_over_quota >= \"75\"",
"name": "Quota bills over 75% used"
},
{
"rule": "macros.bill_cdr_over_quota >= \"75\"",
"name": "CDR bills over 75% used"
},
{
"rule": "ipsec_tunnels.tunnel_status != \"active\" && macros.device_up = \"1\"",
"name": "IPSec tunnels down"
},
{
"rule": "devices.last_polled_timetaken >= 290",
"name": "Device took too long to poll"
},
{
"rule": "macros.device_up = \"1\" && devices.os = \"asa\" && ciscoASA.data > \"5000\"",
"name": "Cisco ASA connections over 5000"
},
{
"rule": "processors.processor_usage > \"85\" && macros.device_up = \"1\"",
"name": "Processor usage over 85%"
},
{
"rule": "sensors.sensor_descr = \"Primary Unit.*\" && sensors.sensor_current = \"10\" && sensors.sensor_prev = \"9\"",
"name": "Cisco ASA Primary unit changed to standby"
},
{
"rule": "ports.ifOperStatus = \"down\" && ports.ifOperStatus_prev = \"up\" && macros.device_up = \"1\"",
"name": "Port status change from up to down"
},
{
"rule": "ports.ifOutErrors_rate >= \"100\" || ports.ifInErrors_rate >= \"100\"",
"name": "Interface Errors Rate greater than 100"
},
{
"rule": "eventlog.type = \"discovery\" && eventlog.message ~ \"@autodiscovered@\" && eventlog.datetime >= macros.past_60m",
"name": "Device discovered within the last 60 minutes"
},
{
"rule": "wireless_sensors.sensor_class = \"clients\" && wireless_sensors.sensor_current >= `wireless_sensors.sensor_limit` && wireless_sensors.sensor_alert = \"1\" && macros.device_up = \"1\"",
"name": "Too many wireless clients"
},
{
"rule": "syslog.timestamp >= macros.past_5m && syslog.msg ~ \"@authentication failure@\"",
"name": "Syslog, Authentication failure on Device"
},
{
"rule": "services.service_status = \"1\"",
"name": "Service warning"
},
{
"rule": "services.service_status = \"2\"",
"name": "Service critical"
},
{
"rule": "syslog.timestamp >= macros.past_5m && syslog.priority ~ \"alert\"",
"name": "Syslog, received Alert Priority Message"
},
{
"rule": "syslog.timestamp >= macros.past_5m && syslog.priority ~ \"emergency\"",
"name": "Syslog, received Emergency Priority Message"
},
{
"rule": "syslog.timestamp = macros.past_5m && syslog.msg ~ \"@arp table is full@\"",
"name": "Syslog, ARP table is full check on device "
},
{
"rule": "sensors.sensor_type = \"upsAdvBatteryReplaceIndicator\" && sensors.sensor_current = \"2\"",
"name": "APC UPS Battery Needs Replacement"
},
{
"rule": "sensors.sensor_current = \"3\" && sensors.sensor_type = \"upsBasicOutputStatus\"",
"name": "APC UPS Switched to Battery Power"
},
{
"rule": "sensors.sensor_current = \"10\" && sensors.sensor_type = \"upsBasicOutputStatus\"",
"name": "APC UPS in Hardware Failure Bypass Mode"
},
{
"rule": "sensors.sensor_current = \"16\" && sensors.sensor_type = \"upsBasicOutputStatus\"",
"name": "APC UPS in Emergency Static Bypass Mode"
},
{
"rule": "sensors.sensor_current = \"12\" && sensors.sensor_type = \"upsBasicOutputStatus\"",
"name": "APC UPS in Smart Trim Mode"
},
{
"rule": "sensors.sensor_oid ~ \".1.3.6.1.4.1.11.2.14.11.1.2.6.1.4.[2-5]\" && sensors.sensor_current = \"2\"",
"name": "HP Procurve Bad Power Supply"
},
{
"rule": "sensors.sensor_oid = \".1.3.6.1.4.1.11.2.14.11.1.2.6.1.4.1\" && sensors.sensor_current = \"2\"",
"name": "HP Procurve Fan Fault"
},
{
"rule": "sensors.sensor_current > sensors.sensor_limit && sensors.sensor_alert = \"1\" && macros.device_up = \"1\" && macros.sensor_port_link = \"1\"",
"name": "Sensor over limit with linked port"
},
{
"rule": "sensors.sensor_current < sensors.sensor_limit_low && sensors.sensor_alert = \"1\" && macros.device_up = \"1\" && macros.sensor_port_link = \"1\"",
"name": "Sensor under limit with linked port"
},
{
"rule": "sensors.sensor_current = \"4\" && sensors.sensor_type = \"upsOutputSourceState\"",
"name": "UPS is running on the bypass"
},
{
"rule": "sensors.sensor_current = \"5\" && sensors.sensor_type = \"upsOutputSourceState\"",
"name": "UPS is running on the battery"
},
{
"rule": "sensors.sensor_current = \"3\" && sensors.sensor_type = \"upsBatteryStatusState\"",
"name": "UPS has a low battery"
},
{
"rule": "sensors.sensor_current = \"4\" && sensors.sensor_type = \"upsBatteryStatusState\"",
"name": "UPS has a depleted battery"
},
{
"rule": "sensors.sensor_descr ~ \"Percentage load\" && sensors.sensor_current >= \"90\" && sensors.sensor_type = \"rfc1628\"",
"name": "UPS has a heavy load"
},
{
"rule": "sensors.sensor_current ~ \"[3-4]\" && sensors.sensor_oid = \".1.3.6.1.4.1.232.3.2.5.1.1.37.\"",
"name": "HPE iLo Server drive degraded/failure"
},
{
"rule": "sensors.sensor_current ~ \"[3-4]\" && sensors.sensor_oid = \".1.3.6.1.4.1.232.6.2.9.3.1.4.\"",
"name": "HPE iLo Server Power Supply degraded/failure"
},
{
"rule": "sensors.sensor_current ~ \"[3-4]\" && sensors.sensor_oid = \".1.3.6.1.4.1.232.6.2.6.7.1.9.\"",
"name": "HPE iLo Server Fan degraded/failure"
},
{
"rule": "sensors.sensor_current ~ \"[3-4]\" && sensors.sensor_oid = \".1.3.6.1.4.1.232.1.2.2.1.1.6.\"",
"name": "HPE iLo Server CPU degraded/failure"
},
{
"rule": "sensors.sensor_current ~ \"[3-4]\" && sensors.sensor_oid = \".1.3.6.1.4.1.232.6.2.14.13.1.20.\"",
"name": "HPE iLo Server Memory degraded/failure"
},
{
"rule": "applications.app_type = \"os-updates\" && applications.app_status >= \"10\"",
"name": "Applications OS-Updates, New Updates Available"
},
{
"rule": "devices.os = \"hpblmos\" && sensors.sensor_type = \"hpblmos_psustate\" && sensors.sensor_current = \"[3-4]\"",
"name": "HPE BladeSystem has a bad power supply"
},
{
"rule": "devices.os = \"hpblmos\" && sensors.sensor_type = \"hpblmos_fanstate\" && sensors.sensor_current = \"[3-4]\"",
"name": "HPE BladeSystem has a bad fan"
},
{
"rule": "devices.os = \"axiscam\" && sensors.sensor_type = \"tempSensorStatusState\" && sensors.sensor_current = \"2\"",
"name": "AXIS camera has a failed temperature sensor"
},
{
"rule": "devices.os = \"rittal-lcp\" && sensors.sensor_type = \"cmcIIIUnitStatus\" && sensors.sensor_current = \"2\"",
"name": "RITTAL LCP has a failed status"
},
{
"rule": "devices.os = \"rittal-lcp\" && sensors.sensor_type = \"cmcIIIUnitStatus\" && sensors.sensor_current = \"3\"",
"name": "RITTAL LCP has an overloaded status"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"systemStatusState\" && sensors.sensor_current = \"2\"",
"name": "Synology NAS has a failed status"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"powerStatusState\" && sensors.sensor_current = \"2\"",
"name": "Synology NAS has a failed power status"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"systemFanStatusState\" && sensors.sensor_current = \"2\"",
"name": "Synology NAS has a failed fan status"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"cpuFanStatusState\" && sensors.sensor_current = \"2\"",
"name": "Synology NAS has a failed CPU fan status"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"upgradeAvailableState\" && sensors.sensor_current = \"1\"",
"name": "Synology NAS has a new upgrade available"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"raidStatusState\" && sensors.sensor_current = \"[11-12]\"",
"name": "Synology NAS has a bad RAID status"
},
{
"rule": "devices.os = \"dsm\" && sensors.sensor_type = \"diskStatusState\" && sensors.sensor_current = \"[4-5]\"",
"name": "Synology NAS has a bad disk status"
},
{
"rule": "devices.os = \"f5\" && sensors.sensor_type = \"sysChassisPowerSupplyStatus\" && sensors.sensor_current = \"0\"",
"name": "F5 appliance has a bad power supply"
},
{
"rule": "devices.os = \"f5\" && sensors.sensor_type = \"sysChassisFanStatus\" && sensors.sensor_current = \"0\"",
"name": "F5 appliance has a bad fan"
},
{
"rule": "devices.os = \"nxos\" && sensors.sensor_type = \"cefcFanTrayOperStatus\" && sensors.sensor_current = \"[3-4]\"",
"name": "Cisco NX-OS device has a bad fan"
},
{
"rule": "devices.os = \"panos\" & sensors.sensor_type = \"panSysHAState\" && sensors.sensor_current = \"1\" && sensors.sensor_prev = \"2\"",
"name": "Palo Alto Networks passive firewall changed to active"
},
{
"rule": "sensors.sensor_current ~ \"[2|6]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10893.1.20.130.15.1.4.1\"",
"name": "Dell Server Raid Battery Failed/Degraded"
},
{
"rule": "sensors.sensor_current = \"2\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.1.1100.32.1.5\"",
"name": "Dell Server CPU Status Critical"
},
{
"rule": "sensors.sensor_current ~ \"[2|6]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10893.1.20.130.1.1.5\"",
"name": "Dell Server Disk Controller State Failed/Degraded"
},
{
"rule": "sensors.sensor_current ~ \"[2|5]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10893.1.20.130.4.1.4\"",
"name": "Dell Server Disk Array State Failed/Degraded"
},
{
"rule": "sensors.sensor_current ~ \"[5|6]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.1.600.12.1.5\"",
"name": "Dell Server PSU State Critical/NonRecvoverable"
},
{
"rule": "sensors.sensor_current ~ \"[2|6]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10893.1.20.140.1.1.4\"",
"name": "Dell Server Virtual Disk Failed/Degraded"
},
{
"rule": "sensors.sensor_current = \"2\" && sensors.sensor_oid = \".1.3.6.1.4.1.25506.8.35.9.1.1.1.2\"",
"name": "Comware Fan Status failed"
},
{
"rule": "sensors.sensor_current = \"2\" && sensors.sensor_oid = \".1.3.6.1.4.1.25506.8.35.9.1.2.1.2\"",
"name": "Comware PSU status failed"
},
{
"rule": "sensors.sensor_current = \"9\" && sensors.sensor_oid = \".1.3.6.1.4.1.9.9.13.1.4.1.3\"",
"name": "Cisco Fan Status failed "
},
{
"rule": "sensors.sensor_current = \"8\" && sensors.sensor_oid = \".1.3.6.1.4.1.9.9.13.1.5.1.3\"",
"name": "Cisco PSU status failed"
},
{
"rule": "sensors.sensor_current = \"3\" && sensors.sensor_oid = \".1.3.6.1.4.1.4413.1.1.43.1.15.1.2.1\"",
"name": "UBNT EdgeSwitch Chassis state failed"
},
{
"rule": "sensors.sensor_current ~ \"[3|4]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.5.5.1.20.140.1.1.4\"",
"name": "Dell iDRAC Virtual Disk Failed/Degraded"
},
{
"rule": "sensors.sensor_current ~ \"5\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.5.4.1100.30.1.5\"",
"name": "Dell iDRAC Processor Status Critical"
},
{
"rule": "sensors.sensor_current ~ \"5\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.5.4.1100.50.1.5\"",
"name": "Dell iDRAC Memory Status Critical"
},
{
"rule": "sensors.sensor_current ~ \"10\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.5.4.600.20.1.5\"",
"name": "Dell iDRAC Voltage Probe Status Failed"
},
{
"rule": "sensors.sensor_current ~ \"10\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.5.4.600.30.1.5\"",
"name": "Dell iDRAC Amperage Probe Status Failed"
},
{
"rule": "sensors.sensor_current ~ \"10\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.5.4.600.50.1.5\"",
"name": "Dell iDRAC Battery Status Failed"
},
{
"rule": "sensors.sensor_current ~ \"[5|6]\" && sensors.sensor_oid = \".1.3.6.1.4.1.674.10892.2.2.1\"",
"name": "Dell iDRAC Global System Status Critical/NonRecoverable"
},
{
"rule": "devices.os = \"Netscaler\" && sensors.sensor_type = \"sysHighAvailabilityMode\" && sensors.sensor_current != `sensors.sensor_prev` && sensors.lastupdate < \"DATE_SUB(NOW(),INTERVAL 5 MINUTE)\" && macros.device_up = \"1\"",
"name": "Netscaler HA node mode change"
},
{
"rule": "devices.os = \"Netscaler\" && sensors.sensor_type = \"haCurState\" && sensors.sensor_current ~ \"[1|8|9]\" && macros.device_up = \"1\"",
"name": "Netscaler HA node state Warning"
},
{
"rule": "devices.os = \"Netscaler\" && sensors.sensor_type = \"haCurState\" && sensors.sensor_current ~ \"[2|4|5|7|10|11]\" && macros.device_up = \"1\"",
"name": "Netscaler HA node state Critical"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.ssh_total_to>'5'",
"name": "SSH Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.http_total_to>'100'",
"name": "HTTP Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.https_total_to>'100'",
"name": "HTTPS Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.smtp_total_from>'10'",
"name": "SMTP Connections From"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.smtp_total_to>'30'",
"name": "SMTP Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.ftp_total_to>'5'",
"name": "FTP Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.imap_total_to>'20'",
"name": "IMAP Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.imaps_total_to>'20'",
"name": "IMAPS Connections To"
},
{
"rule": "%applications.app_type='portactivity' && %applications_metrics.imaps_total_from>'0'",
"name": "IRCD Connections From"
}
]