docker配置搭建Prometheus

一、说明
Prometheus负责收集数据，Grafana负责展示数据。其中采用Prometheus 中的 Exporter含：
1）Node Exporter，负责收集 host 硬件和操作系统数据。它将以容器方式运行在所有 host 上。
2）cAdvisor，负责收集容器数据。它将以容器方式运行在所有 host 上。
3）Alertmanager，负责告警。它将以容器方式运行在所有 host 上。
完整Exporter列表请参考：https://prometheus.io/docs/instrumenting/exporters/

二、添加配置文件

1、alertmanager.yaml

global:
  smtp_smarthost: 'smtp.sina.com:25'　　#163服务器
  smtp_from: 'dogotsn@sina.com'　　　　　　　　#发邮件的邮箱
  smtp_auth_username: 'dogotsn'　　#发邮件的邮箱用户名，也就是你的邮箱
  smtp_auth_password: '35ea02c*****'　　　　　　　　#发邮件的邮箱密码
  smtp_require_tls: false　　　　　　　　#不进行tls验证

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 10m
  receiver: live-monitoring

receivers:
- name: 'live-monitoring'
  email_configs:
  - to: 'zhangc***o15@163.com'

2、node_down.yml

groups:
- name: node_down
  rules:
  - alert: InstanceDown
    expr: up == 0
    for: 1m
    labels:
      user: test
    annotations:
      summary: "Instance {{ $labels.instance }} down"
      description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."

3、prometheus.yml

# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['193.168.1.39:9093']
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  - "node_down.yml"
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'
    static_configs:
    - targets: ['193.168.1.39:9090']

  - job_name: 'cadvisor'
    static_configs:
    - targets: ['193.168.1.39:8080']

  - job_name: 'node'
    scrape_interval: 8s
    static_configs:
      - targets: ['193.168.1.39:9100']

三、编写docker-compose

version: '2'

networks:
    mynet:
        driver: bridge

services:
    prometheus:
        image: prom/prometheus
        container_name: prometheus
        hostname: prometheus
        restart: always
        volumes:
            - ./prometheus.yml:/etc/prometheus/prometheus.yml
            - ./node_down.yml:/etc/prometheus/node_down.yml
        ports:
            - "9090:9090"
        networks:
            - mynet

    alertmanager:
        image: prom/alertmanager
        container_name: alertmanager
        hostname: alertmanager
        restart: always
        volumes:
            - ./alertmanager.yaml:/etc/alertmanager/alertmanager.yaml
        ports:
            - "9093:9093"
        networks:
            - mynet

    grafana:
        image: grafana/grafana
        container_name: grafana
        hostname: grafana
        restart: always
        ports:
            - "3000:3000"
        networks:
            - mynet

    node-exporter:
        image: prom/node-exporter
        #image: quay.io/prometheus/node-exporter
        container_name: node-exporter
        hostname: node-exporter
        restart: always
        ports:
            - "9100:9100"
        networks:
            - mynet
    cadvisor:
        image: google/cadvisor:latest
        container_name: cadvisor
        hostname: cadvisor
        restart: always
        volumes:
            - /:/rootfs:ro
            - /var/run:/var/run:rw
            - /sys:/sys:ro
            - /var/lib/docker/:/var/lib/docker:ro
        ports:
            - "8080:8080"
        networks:
            - mynet

View Code