需求:
pod中的容器重启一次则报警通知
pod非Runing 状态则报警
pod中的容器非true状态则报警
三个需求其实是有点重叠的
pod重启期间pod肯定会有非Running状态,只要有重启报警那么pod非Runing也会报警,pod非Runing容器状态肯定非true也会报警
所有报警设置为:
pod重启一次就报警
pod非Runing and 容器非true (#3) and pod非删除 =报警
zabbix server中建一个模板
<?xml version="1.0" encoding="UTF-8"?> <zabbix_export> <version>3.2</version> <date>2017-11-23T07:48:53Z</date> <groups> <group> <name>OpenShift</name> </group> </groups> <templates> <template> <template>OC Pods</template> <name>OC Pods</name> <description/> <groups> <group> <name>OpenShift</name> </group> </groups> <applications> <application> <name>restartCount</name> </application> <application> <name>RunningStatus</name> </application> </applications> <items/> <discovery_rules> <discovery_rule> <name>OC Pods Discover</name> <type>0</type> <snmp_community/> <snmp_oid/> <key>oc.pod.discover</key> <delay>300</delay> <status>1</status> <allowed_hosts/> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <delay_flex/> <params/> <ipmi_sensor/> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <filter> <evaltype>0</evaltype> <formula/> <conditions/> </filter> <lifetime>7</lifetime> <description/> <item_prototypes> <item_prototype> <name>Pod {#POD_NAME} Restarts</name> <type>0</type> <snmp_community/> <multiplier>0</multiplier> <snmp_oid/> <key>oc.pod.status[{#POD_NAME},restarts]</key> <delay>30</delay> <history>30</history> <trends>0</trends> <status>0</status> <value_type>4</value_type> <allowed_hosts/> <units/> <delta>0</delta> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <formula>1</formula> <delay_flex/> <params/> <ipmi_sensor/> <data_type>0</data_type> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <description/> <inventory_link>0</inventory_link> <applications> <application> <name>restartCount</name> </application> </applications> <valuemap/> <logtimefmt/> <application_prototypes/> </item_prototype> <item_prototype> <name>Pod {#POD_NAME} Running</name> <type>0</type> <snmp_community/> <multiplier>0</multiplier> <snmp_oid/> <key>oc.pod.status[{#POD_NAME},running]</key> <delay>30</delay> <history>30</history> <trends>0</trends> <status>0</status> <value_type>4</value_type> <allowed_hosts/> <units/> <delta>0</delta> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <formula>1</formula> <delay_flex/> <params/> <ipmi_sensor/> <data_type>0</data_type> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <description/> <inventory_link>0</inventory_link> <applications> <application> <name>RunningStatus</name> </application> </applications> <valuemap/> <logtimefmt/> <application_prototypes/> </item_prototype> <item_prototype> <name>Pod {#POD_NAME} Running True</name> <type>0</type> <snmp_community/> <multiplier>1</multiplier> <snmp_oid/> <key>oc.pod.status[{#POD_NAME},running_true]</key> <delay>30</delay> <history>30</history> <trends>365</trends> <status>0</status> <value_type>3</value_type> <allowed_hosts/> <units/> <delta>0</delta> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <formula>1</formula> <delay_flex/> <params/> <ipmi_sensor/> <data_type>0</data_type> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <description/> <inventory_link>0</inventory_link> <applications> <application> <name>RunningStatus</name> </application> </applications> <valuemap/> <logtimefmt/> <application_prototypes/> </item_prototype> </item_prototypes> <trigger_prototypes> <trigger_prototype> <expression>{OC Pods:oc.pod.status[{#POD_NAME},running].str(Running_true)}=0 and {OC Pods:oc.pod.status[{#POD_NAME},running].str(Pod deleted)}=0 and {OC Pods:oc.pod.status[{#POD_NAME},running_true].last(#5)}=0</expression> <recovery_mode>0</recovery_mode> <recovery_expression/> <name>Pod {#POD_NAME} No Running</name> <correlation_mode>0</correlation_mode> <correlation_tag/> <url/> <status>0</status> <priority>1</priority> <description/> <type>0</type> <manual_close>1</manual_close> <dependencies/> <tags/> </trigger_prototype> <trigger_prototype> <expression>{OC Pods:oc.pod.status[{#POD_NAME},restarts].str(Warning)}=1</expression> <recovery_mode>1</recovery_mode> <recovery_expression>{OC Pods:oc.pod.status[{#POD_NAME},restarts].str(Warning,#3)}=0</recovery_expression> <name>Pod {#POD_NAME} restarted Warning</name> <correlation_mode>0</correlation_mode> <correlation_tag/> <url/> <status>0</status> <priority>1</priority> <description/> <type>0</type> <manual_close>1</manual_close> <dependencies/> <tags/> </trigger_prototype> </trigger_prototypes> <graph_prototypes/> <host_prototypes/> </discovery_rule> </discovery_rules> <httptests/> <macros/> <templates/> <screens/> </template> </templates> </zabbix_export>