先描述一下需求,我要对公网IP进行监控,检查响应情况,大概实现起来就是循环执行curl IP/xxx.html ,然后查看http code和响应时间。现在我想把它弄成exporter,接到prometheus上,能够在响应时间超时,或者502时候发出告警,顺便grafana出个图
prometheus_client文档地址(完全看不懂orz):https://github.com/prometheus/client_python

import prometheus_client
from prometheus_client import Gauge,start_http_server,Counter
import pycurl
import time
import threading
from io import BytesIO
#创建client_python里提供的prometheus Counter数据类型
url_http_code = Counter("url_http_code", "request http_code of the host",['code','url'])
url_http_request_time = Counter("url_http_request_time", "request http_request_time of the host",['le','url'])
http_request_total = Counter("http_request_total", "request request total of the host",['url'])
#curl url,返回状态码和总共耗时
def test_website(url):
    buffer_curl = BytesIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, buffer_curl)
    c.setopt(c.CONNECTTIMEOUT, 3)
    c.setopt(c.TIMEOUT, 3)
    try:
        c.perform()
    except pycurl.error:
        http_code = 500
        http_total_time = 999
    else:
        http_code = c.getinfo(pycurl.HTTP_CODE)
        http_total_time = c.getinfo(pycurl.TOTAL_TIME)
    return http_code, http_total_time
#根据curl返回值,统计放到exporter显示的数据
def count_metric(url):
    http_code, http_total_time = test_website(url)
    if http_code >= 100 and http_code < 200 :
        url_http_code.labels('1xx',url).inc()
    elif http_code >= 200 and http_code < 300 :
        url_http_code.labels('2xx',url).inc()
    elif http_code >= 300 and http_code < 400 :
        url_http_code.labels('3xx',url).inc()
    elif http_code >= 400 and http_code < 500 :
        url_http_code.labels('4xx',url).inc()
    else:
        url_http_code.labels('5xx',url).inc()
    if http_total_time < 1 :
        url_http_request_time.labels('1',url).inc()
    elif http_total_time < 2 :
        url_http_request_time.labels('2',url).inc()
    elif http_total_time < 3 :
        url_http_request_time.labels('3',url).inc()
    else :
        url_http_request_time.labels('+Inf',url).inc()
    http_request_total.labels(url).inc()
#线程控制,每隔5s执行curl url
def count_threads(url):
    while True:
        t = threading.Thread(target=count_metric,args=(url,))
        t.setDaemon(True)
        t.start()
        time.sleep(5)
#将每个需要监控的域名起一个进程
if __name__ == '__main__':
    start_http_server(9091)
    server_list = [
            'www.baidu.com',
            'www.qq.com',
            'blog.csdn.net',
            'github.com',
            'google.com'
            ]
    threads = []
    for url in server_list:
        t = threading.Thread(target=count_threads,args=(url,))
        threads.append(t)
    for thread in threads:
        thread.setDaemon(True)
        thread.start()
    thread.join()

这里我用独立线程控制pycurl,每秒执行一次
访问9091端口,显示出来下面数据,不知道为啥全自动加上了_total

# HELP url_http_code_total request http_code of the host
# TYPE url_http_code_total counter
url_http_code_total{code="3xx",url="blog.csdn.net"} 563.0
url_http_code_total{code="2xx",url="www.baidu.com"} 562.0
url_http_code_total{code="3xx",url="www.qq.com"} 563.0
url_http_code_total{code="3xx",url="github.com"} 555.0
url_http_code_total{code="5xx",url="google.com"} 562.0
url_http_code_total{code="5xx",url="github.com"} 8.0
url_http_code_total{code="5xx",url="www.baidu.com"} 1.0
# TYPE url_http_code_created gauge
url_http_code_created{code="3xx",url="blog.csdn.net"} 1.5511035889801528e+09
url_http_code_created{code="2xx",url="www.baidu.com"} 1.5511035889983172e+09
url_http_code_created{code="3xx",url="www.qq.com"} 1.551103589051125e+09
url_http_code_created{code="3xx",url="github.com"} 1.5511035896066923e+09
url_http_code_created{code="5xx",url="google.com"} 1.5511035919719362e+09
url_http_code_created{code="5xx",url="github.com"} 1.5511040173621314e+09
url_http_code_created{code="5xx",url="www.baidu.com"} 1.5511053585579858e+09
# HELP url_http_request_time_total request http_request_time of the host
# TYPE url_http_request_time_total counter
url_http_request_time_total{le="1",url="blog.csdn.net"} 563.0
url_http_request_time_total{le="1",url="www.baidu.com"} 547.0
url_http_request_time_total{le="1",url="www.qq.com"} 563.0
url_http_request_time_total{le="1",url="github.com"} 416.0
url_http_request_time_total{le="+Inf",url="google.com"} 562.0
url_http_request_time_total{le="2",url="github.com"} 113.0
url_http_request_time_total{le="3",url="github.com"} 26.0
url_http_request_time_total{le="+Inf",url="github.com"} 8.0
url_http_request_time_total{le="2",url="www.baidu.com"} 15.0
url_http_request_time_total{le="+Inf",url="www.baidu.com"} 1.0
# TYPE url_http_request_time_created gauge
url_http_request_time_created{le="1",url="blog.csdn.net"} 1.5511035889801846e+09
url_http_request_time_created{le="1",url="www.baidu.com"} 1.5511035889983401e+09
url_http_request_time_created{le="1",url="www.qq.com"} 1.5511035890511582e+09
url_http_request_time_created{le="1",url="github.com"} 1.5511035896067169e+09
url_http_request_time_created{le="+Inf",url="google.com"} 1.5511035919719641e+09
url_http_request_time_created{le="2",url="github.com"} 1.551103605623951e+09
url_http_request_time_created{le="3",url="github.com"} 1.5511039065420728e+09
url_http_request_time_created{le="+Inf",url="github.com"} 1.551104017362157e+09
url_http_request_time_created{le="2",url="www.baidu.com"} 1.551104911398178e+09
url_http_request_time_created{le="+Inf",url="www.baidu.com"} 1.5511053585580106e+09
# HELP http_request_total request request total of the host
# TYPE http_request_total counter
http_request_total{url="blog.csdn.net"} 563.0
http_request_total{url="www.baidu.com"} 563.0
http_request_total{url="www.qq.com"} 563.0
http_request_total{url="github.com"} 563.0
http_request_total{url="google.com"} 562.0
# TYPE http_request_created gauge
http_request_created{url="blog.csdn.net"} 1.551103588980202e+09
http_request_created{url="www.baidu.com"} 1.5511035889983532e+09
http_request_created{url="www.qq.com"} 1.5511035890511742e+09
http_request_created{url="github.com"} 1.5511035896067343e+09
http_request_created{url="google.com"} 1.5511035919719923e+09

接入到prometheus后,grafana画图
这个是http code的每分钟增长率,如果出现5xx,就说明有问题了
python开发prometheus exporter

这个是显示期望时间的比例,比如只显示小于1秒,占总次数的比例

delta(url_http_request_time_total{le='1'}[1m]) / on(url) group_left delta(http_request_total[1m])

python开发prometheus exporter

相关文章: