需求:

pod中的容器重启一次则报警通知

pod非Runing 状态则报警

pod中的容器非true状态则报警

三个需求其实是有点重叠的

pod重启期间pod肯定会有非Running状态,只要有重启报警那么pod非Runing也会报警,pod非Runing容器状态肯定非true也会报警

所有报警设置为:

pod重启一次就报警

pod非Runing  and 容器非true (#3)  and pod非删除 =报警

 

zabbix server中建一个模板

<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
    <version>3.2</version>
    <date>2017-11-23T07:48:53Z</date>
    <groups>
        <group>
            <name>OpenShift</name>
        </group>
    </groups>
    <templates>
        <template>
            <template>OC Pods</template>
            <name>OC Pods</name>
            <description/>
            <groups>
                <group>
                    <name>OpenShift</name>
                </group>
            </groups>
            <applications>
                <application>
                    <name>restartCount</name>
                </application>
                <application>
                    <name>RunningStatus</name>
                </application>
            </applications>
            <items/>
            <discovery_rules>
                <discovery_rule>
                    <name>OC Pods Discover</name>
                    <type>0</type>
                    <snmp_community/>
                    <snmp_oid/>
                    <key>oc.pod.discover</key>
                    <delay>300</delay>
                    <status>1</status>
                    <allowed_hosts/>
                    <snmpv3_contextname/>
                    <snmpv3_securityname/>
                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
                    <snmpv3_authpassphrase/>
                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
                    <snmpv3_privpassphrase/>
                    <delay_flex/>
                    <params/>
                    <ipmi_sensor/>
                    <authtype>0</authtype>
                    <username/>
                    <password/>
                    <publickey/>
                    <privatekey/>
                    <port/>
                    <filter>
                        <evaltype>0</evaltype>
                        <formula/>
                        <conditions/>
                    </filter>
                    <lifetime>7</lifetime>
                    <description/>
                    <item_prototypes>
                        <item_prototype>
                            <name>Pod {#POD_NAME} Restarts</name>
                            <type>0</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>oc.pod.status[{#POD_NAME},restarts]</key>
                            <delay>30</delay>
                            <history>30</history>
                            <trends>0</trends>
                            <status>0</status>
                            <value_type>4</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>restartCount</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                            <application_prototypes/>
                        </item_prototype>
                        <item_prototype>
                            <name>Pod {#POD_NAME} Running</name>
                            <type>0</type>
                            <snmp_community/>
                            <multiplier>0</multiplier>
                            <snmp_oid/>
                            <key>oc.pod.status[{#POD_NAME},running]</key>
                            <delay>30</delay>
                            <history>30</history>
                            <trends>0</trends>
                            <status>0</status>
                            <value_type>4</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>RunningStatus</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                            <application_prototypes/>
                        </item_prototype>
                        <item_prototype>
                            <name>Pod {#POD_NAME} Running True</name>
                            <type>0</type>
                            <snmp_community/>
                            <multiplier>1</multiplier>
                            <snmp_oid/>
                            <key>oc.pod.status[{#POD_NAME},running_true]</key>
                            <delay>30</delay>
                            <history>30</history>
                            <trends>365</trends>
                            <status>0</status>
                            <value_type>3</value_type>
                            <allowed_hosts/>
                            <units/>
                            <delta>0</delta>
                            <snmpv3_contextname/>
                            <snmpv3_securityname/>
                            <snmpv3_securitylevel>0</snmpv3_securitylevel>
                            <snmpv3_authprotocol>0</snmpv3_authprotocol>
                            <snmpv3_authpassphrase/>
                            <snmpv3_privprotocol>0</snmpv3_privprotocol>
                            <snmpv3_privpassphrase/>
                            <formula>1</formula>
                            <delay_flex/>
                            <params/>
                            <ipmi_sensor/>
                            <data_type>0</data_type>
                            <authtype>0</authtype>
                            <username/>
                            <password/>
                            <publickey/>
                            <privatekey/>
                            <port/>
                            <description/>
                            <inventory_link>0</inventory_link>
                            <applications>
                                <application>
                                    <name>RunningStatus</name>
                                </application>
                            </applications>
                            <valuemap/>
                            <logtimefmt/>
                            <application_prototypes/>
                        </item_prototype>
                    </item_prototypes>
                    <trigger_prototypes>
                        <trigger_prototype>
                            <expression>{OC Pods:oc.pod.status[{#POD_NAME},running].str(Running_true)}=0&#13;
and&#13;
{OC Pods:oc.pod.status[{#POD_NAME},running].str(Pod deleted)}=0&#13;
and&#13;
{OC Pods:oc.pod.status[{#POD_NAME},running_true].last(#5)}=0</expression>
                            <recovery_mode>0</recovery_mode>
                            <recovery_expression/>
                            <name>Pod {#POD_NAME} No Running</name>
                            <correlation_mode>0</correlation_mode>
                            <correlation_tag/>
                            <url/>
                            <status>0</status>
                            <priority>1</priority>
                            <description/>
                            <type>0</type>
                            <manual_close>1</manual_close>
                            <dependencies/>
                            <tags/>
                        </trigger_prototype>
                        <trigger_prototype>
                            <expression>{OC Pods:oc.pod.status[{#POD_NAME},restarts].str(Warning)}=1</expression>
                            <recovery_mode>1</recovery_mode>
                            <recovery_expression>{OC Pods:oc.pod.status[{#POD_NAME},restarts].str(Warning,#3)}=0</recovery_expression>
                            <name>Pod {#POD_NAME} restarted Warning</name>
                            <correlation_mode>0</correlation_mode>
                            <correlation_tag/>
                            <url/>
                            <status>0</status>
                            <priority>1</priority>
                            <description/>
                            <type>0</type>
                            <manual_close>1</manual_close>
                            <dependencies/>
                            <tags/>
                        </trigger_prototype>
                    </trigger_prototypes>
                    <graph_prototypes/>
                    <host_prototypes/>
                </discovery_rule>
            </discovery_rules>
            <httptests/>
            <macros/>
            <templates/>
            <screens/>
        </template>
    </templates>
</zabbix_export>
模板文件

相关文章: